From 5128a9a453d64bfe1ed978cf9ffed27985eeef36 Mon Sep 17 00:00:00 2001
From: Owen O'Malley <omalley@apache.org>
Date: Tue, 19 May 2009 04:20:40 +0000
Subject: [PATCH] HADOOP-4687 Moving src directories on branch

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/HADOOP-4687/core@776174 13f79535-47bb-0310-9956-ffa450edef68
---
 src/java/core-default.xml                     |  444 +++
 .../hadoop/HadoopVersionAnnotation.java       |   69 +
 .../org/apache/hadoop/conf/Configurable.java  |   29 +
 .../org/apache/hadoop/conf/Configuration.java | 1326 +++++++
 .../org/apache/hadoop/conf/Configured.java    |   46 +
 src/java/org/apache/hadoop/conf/package.html  |   23 +
 .../hadoop/filecache/DistributedCache.java    |  879 +++++
 .../org/apache/hadoop/fs/BlockLocation.java   |  241 ++
 .../hadoop/fs/BufferedFSInputStream.java      |   96 +
 .../apache/hadoop/fs/ChecksumException.java   |   35 +
 .../apache/hadoop/fs/ChecksumFileSystem.java  |  547 +++
 .../org/apache/hadoop/fs/ContentSummary.java  |  164 +
 src/java/org/apache/hadoop/fs/DF.java         |  193 +
 src/java/org/apache/hadoop/fs/DU.java         |  198 +
 .../apache/hadoop/fs/FSDataInputStream.java   |   62 +
 .../apache/hadoop/fs/FSDataOutputStream.java  |  100 +
 src/java/org/apache/hadoop/fs/FSError.java    |   29 +
 .../org/apache/hadoop/fs/FSInputChecker.java  |  432 +++
 .../org/apache/hadoop/fs/FSInputStream.java   |   78 +
 .../org/apache/hadoop/fs/FSOutputSummer.java  |  176 +
 .../org/apache/hadoop/fs/FileChecksum.java    |   53 +
 src/java/org/apache/hadoop/fs/FileStatus.java |  252 ++
 src/java/org/apache/hadoop/fs/FileSystem.java | 1648 +++++++++
 src/java/org/apache/hadoop/fs/FileUtil.java   |  794 ++++
 .../apache/hadoop/fs/FilterFileSystem.java    |  278 ++
 src/java/org/apache/hadoop/fs/FsShell.java    | 1925 ++++++++++
 .../apache/hadoop/fs/FsShellPermissions.java  |  315 ++
 src/java/org/apache/hadoop/fs/FsStatus.java   |   70 +
 .../org/apache/hadoop/fs/FsUrlConnection.java |   61 +
 .../apache/hadoop/fs/FsUrlStreamHandler.java  |   47 +
 .../hadoop/fs/FsUrlStreamHandlerFactory.java  |   78 +
 .../org/apache/hadoop/fs/GlobExpander.java    |  166 +
 .../org/apache/hadoop/fs/HarFileSystem.java   |  892 +++++
 .../apache/hadoop/fs/LengthFileChecksum.java  |    0
 .../apache/hadoop/fs/LocalDirAllocator.java   |  418 +++
 .../org/apache/hadoop/fs/LocalFileSystem.java |  115 +
 .../hadoop/fs/MD5MD5CRC32FileChecksum.java    |  113 +
 src/java/org/apache/hadoop/fs/Path.java       |  298 ++
 src/java/org/apache/hadoop/fs/PathFilter.java |   32 +
 .../apache/hadoop/fs/PositionedReadable.java  |   47 +
 .../apache/hadoop/fs/RawLocalFileSystem.java  |  496 +++
 src/java/org/apache/hadoop/fs/Seekable.java   |   41 +
 src/java/org/apache/hadoop/fs/Syncable.java   |   30 +
 src/java/org/apache/hadoop/fs/Trash.java      |  291 ++
 .../apache/hadoop/fs/ftp/FTPException.java    |   38 +
 .../apache/hadoop/fs/ftp/FTPFileSystem.java   |  576 +++
 .../apache/hadoop/fs/ftp/FTPInputStream.java  |  126 +
 .../org/apache/hadoop/fs/kfs/IFSImpl.java     |   60 +
 .../org/apache/hadoop/fs/kfs/KFSImpl.java     |  151 +
 .../apache/hadoop/fs/kfs/KFSInputStream.java  |  130 +
 .../apache/hadoop/fs/kfs/KFSOutputStream.java |   97 +
 .../hadoop/fs/kfs/KosmosFileSystem.java       |  340 ++
 .../org/apache/hadoop/fs/kfs/package.html     |   98 +
 src/java/org/apache/hadoop/fs/package.html    |   23 +
 .../fs/permission/AccessControlException.java |   61 +
 .../apache/hadoop/fs/permission/FsAction.java |   67 +
 .../hadoop/fs/permission/FsPermission.java    |  232 ++
 .../fs/permission/PermissionStatus.java       |  118 +
 src/java/org/apache/hadoop/fs/s3/Block.java   |   47 +
 .../apache/hadoop/fs/s3/FileSystemStore.java  |   63 +
 src/java/org/apache/hadoop/fs/s3/INode.java   |  117 +
 .../hadoop/fs/s3/Jets3tFileSystemStore.java   |  390 ++
 .../apache/hadoop/fs/s3/MigrationTool.java    |  280 ++
 .../apache/hadoop/fs/s3/S3Credentials.java    |   99 +
 .../org/apache/hadoop/fs/s3/S3Exception.java  |   34 +
 .../org/apache/hadoop/fs/s3/S3FileSystem.java |  361 ++
 .../hadoop/fs/s3/S3FileSystemException.java   |   31 +
 .../apache/hadoop/fs/s3/S3InputStream.java    |  211 ++
 .../apache/hadoop/fs/s3/S3OutputStream.java   |  231 ++
 .../fs/s3/VersionMismatchException.java       |   32 +
 src/java/org/apache/hadoop/fs/s3/package.html |   55 +
 .../hadoop/fs/s3native/FileMetadata.java      |   54 +
 .../s3native/Jets3tNativeFileSystemStore.java |  255 ++
 .../fs/s3native/NativeFileSystemStore.java    |   65 +
 .../fs/s3native/NativeS3FileSystem.java       |  578 +++
 .../hadoop/fs/s3native/PartialListing.java    |   59 +
 .../apache/hadoop/fs/s3native/package.html    |   32 +
 .../org/apache/hadoop/fs/shell/Command.java   |   86 +
 .../apache/hadoop/fs/shell/CommandFormat.java |   75 +
 .../apache/hadoop/fs/shell/CommandUtils.java  |   28 +
 .../org/apache/hadoop/fs/shell/Count.java     |   77 +
 .../apache/hadoop/http/FilterContainer.java   |   40 +
 .../apache/hadoop/http/FilterInitializer.java |   29 +
 .../org/apache/hadoop/http/HttpServer.java    |  519 +++
 .../apache/hadoop/io/AbstractMapWritable.java |  207 ++
 src/java/org/apache/hadoop/io/ArrayFile.java  |   94 +
 .../org/apache/hadoop/io/ArrayWritable.java   |  103 +
 .../apache/hadoop/io/BinaryComparable.java    |   76 +
 .../org/apache/hadoop/io/BloomMapFile.java    |  259 ++
 .../org/apache/hadoop/io/BooleanWritable.java |  111 +
 .../org/apache/hadoop/io/ByteWritable.java    |   87 +
 .../org/apache/hadoop/io/BytesWritable.java   |  216 ++
 src/java/org/apache/hadoop/io/Closeable.java  |   24 +
 .../apache/hadoop/io/CompressedWritable.java  |   86 +
 .../org/apache/hadoop/io/DataInputBuffer.java |   91 +
 .../apache/hadoop/io/DataOutputBuffer.java    |  108 +
 .../apache/hadoop/io/DefaultStringifier.java  |  199 +
 .../org/apache/hadoop/io/DeprecatedUTF8.java  |   60 +
 .../org/apache/hadoop/io/DoubleWritable.java  |   95 +
 .../org/apache/hadoop/io/EnumSetWritable.java |  202 +
 .../org/apache/hadoop/io/FloatWritable.java   |   87 +
 .../org/apache/hadoop/io/GenericWritable.java |  152 +
 src/java/org/apache/hadoop/io/IOUtils.java    |  177 +
 .../org/apache/hadoop/io/InputBuffer.java     |   89 +
 .../org/apache/hadoop/io/IntWritable.java     |   86 +
 .../org/apache/hadoop/io/LongWritable.java    |   97 +
 src/java/org/apache/hadoop/io/MD5Hash.java    |  221 ++
 src/java/org/apache/hadoop/io/MapFile.java    |  713 ++++
 .../org/apache/hadoop/io/MapWritable.java     |  169 +
 .../apache/hadoop/io/MultipleIOException.java |   49 +
 .../org/apache/hadoop/io/NullWritable.java    |   70 +
 .../org/apache/hadoop/io/ObjectWritable.java  |  273 ++
 .../org/apache/hadoop/io/OutputBuffer.java    |   92 +
 .../org/apache/hadoop/io/RawComparator.java   |   37 +
 .../org/apache/hadoop/io/SequenceFile.java    | 3244 +++++++++++++++++
 src/java/org/apache/hadoop/io/SetFile.java    |  105 +
 .../apache/hadoop/io/SortedMapWritable.java   |  204 ++
 .../org/apache/hadoop/io/Stringifier.java     |   54 +
 src/java/org/apache/hadoop/io/Text.java       |  594 +++
 .../apache/hadoop/io/TwoDArrayWritable.java   |   91 +
 src/java/org/apache/hadoop/io/UTF8.java       |  286 ++
 .../org/apache/hadoop/io/VIntWritable.java    |   73 +
 .../org/apache/hadoop/io/VLongWritable.java   |   73 +
 .../hadoop/io/VersionMismatchException.java   |   41 +
 .../apache/hadoop/io/VersionedWritable.java   |   50 +
 src/java/org/apache/hadoop/io/Writable.java   |   80 +
 .../apache/hadoop/io/WritableComparable.java  |   55 +
 .../apache/hadoop/io/WritableComparator.java  |  216 ++
 .../apache/hadoop/io/WritableFactories.java   |   63 +
 .../org/apache/hadoop/io/WritableFactory.java |   28 +
 .../org/apache/hadoop/io/WritableName.java    |   79 +
 .../org/apache/hadoop/io/WritableUtils.java   |  418 +++
 .../apache/hadoop/io/compress/BZip2Codec.java |  301 ++
 .../io/compress/BlockCompressorStream.java    |  156 +
 .../io/compress/BlockDecompressorStream.java  |  128 +
 .../apache/hadoop/io/compress/CodecPool.java  |  154 +
 .../hadoop/io/compress/CompressionCodec.java  |  110 +
 .../io/compress/CompressionCodecFactory.java  |  230 ++
 .../io/compress/CompressionInputStream.java   |   63 +
 .../io/compress/CompressionOutputStream.java  |   69 +
 .../apache/hadoop/io/compress/Compressor.java |  106 +
 .../hadoop/io/compress/CompressorStream.java  |  109 +
 .../hadoop/io/compress/Decompressor.java      |   97 +
 .../io/compress/DecompressorStream.java       |  159 +
 .../hadoop/io/compress/DefaultCodec.java      |   87 +
 .../apache/hadoop/io/compress/GzipCodec.java  |  216 ++
 .../io/compress/bzip2/BZip2Constants.java     |   97 +
 .../compress/bzip2/BZip2DummyCompressor.java  |   62 +
 .../bzip2/BZip2DummyDecompressor.java         |   52 +
 .../io/compress/bzip2/CBZip2InputStream.java  |  969 +++++
 .../io/compress/bzip2/CBZip2OutputStream.java | 2081 +++++++++++
 .../apache/hadoop/io/compress/bzip2/CRC.java  |  125 +
 .../io/compress/zlib/BuiltInZlibDeflater.java |   49 +
 .../io/compress/zlib/BuiltInZlibInflater.java |   50 +
 .../io/compress/zlib/ZlibCompressor.java      |  378 ++
 .../io/compress/zlib/ZlibDecompressor.java    |  287 ++
 .../hadoop/io/compress/zlib/ZlibFactory.java  |  110 +
 src/java/org/apache/hadoop/io/package.html    |   24 +
 .../io/retry/RetryInvocationHandler.java      |   88 +
 .../apache/hadoop/io/retry/RetryPolicies.java |  258 ++
 .../apache/hadoop/io/retry/RetryPolicy.java   |   43 +
 .../apache/hadoop/io/retry/RetryProxy.java    |   68 +
 .../org/apache/hadoop/io/retry/package.html   |   48 +
 .../hadoop/io/serializer/Deserializer.java    |   59 +
 .../io/serializer/DeserializerComparator.java |   70 +
 .../io/serializer/JavaSerialization.java      |  101 +
 .../JavaSerializationComparator.java          |   46 +
 .../hadoop/io/serializer/Serialization.java   |   44 +
 .../io/serializer/SerializationFactory.java   |   89 +
 .../hadoop/io/serializer/Serializer.java      |   52 +
 .../io/serializer/WritableSerialization.java  |  111 +
 .../apache/hadoop/io/serializer/package.html  |   37 +
 src/java/org/apache/hadoop/ipc/Client.java    |  914 +++++
 .../apache/hadoop/ipc/ConnectionHeader.java   |   93 +
 src/java/org/apache/hadoop/ipc/RPC.java       |  575 +++
 .../apache/hadoop/ipc/RemoteException.java    |  120 +
 src/java/org/apache/hadoop/ipc/Server.java    | 1255 +++++++
 src/java/org/apache/hadoop/ipc/Status.java    |   32 +
 .../apache/hadoop/ipc/VersionedProtocol.java  |   38 +
 .../hadoop/ipc/metrics/RpcActivityMBean.java  |   80 +
 .../apache/hadoop/ipc/metrics/RpcMetrics.java |  104 +
 .../org/apache/hadoop/ipc/metrics/RpcMgt.java |  119 +
 .../hadoop/ipc/metrics/RpcMgtMBean.java       |  105 +
 src/java/org/apache/hadoop/ipc/package.html   |   23 +
 src/java/org/apache/hadoop/log/LogLevel.java  |  151 +
 .../apache/hadoop/metrics/ContextFactory.java |  204 ++
 .../apache/hadoop/metrics/MetricsContext.java |  118 +
 .../hadoop/metrics/MetricsException.java      |   42 +
 .../apache/hadoop/metrics/MetricsRecord.java  |  246 ++
 .../apache/hadoop/metrics/MetricsServlet.java |  160 +
 .../apache/hadoop/metrics/MetricsUtil.java    |  100 +
 .../org/apache/hadoop/metrics/Updater.java    |   33 +
 .../hadoop/metrics/file/FileContext.java      |  139 +
 .../apache/hadoop/metrics/file/package.html   |   43 +
 .../metrics/ganglia/GangliaContext.java       |  231 ++
 .../hadoop/metrics/ganglia/package.html       |   74 +
 .../hadoop/metrics/jvm/EventCounter.java      |   94 +
 .../apache/hadoop/metrics/jvm/JvmMetrics.java |  191 +
 .../org/apache/hadoop/metrics/package.html    |  159 +
 .../metrics/spi/AbstractMetricsContext.java   |  475 +++
 .../hadoop/metrics/spi/CompositeContext.java  |  186 +
 .../hadoop/metrics/spi/MetricValue.java       |   52 +
 .../hadoop/metrics/spi/MetricsRecordImpl.java |  275 ++
 .../metrics/spi/NoEmitMetricsContext.java     |   49 +
 .../hadoop/metrics/spi/NullContext.java       |   58 +
 .../spi/NullContextWithUpdateThread.java      |   69 +
 .../hadoop/metrics/spi/OutputRecord.java      |   90 +
 .../org/apache/hadoop/metrics/spi/Util.java   |   67 +
 .../apache/hadoop/metrics/spi/package.html    |   36 +
 .../apache/hadoop/metrics/util/MBeanUtil.java |   87 +
 .../hadoop/metrics/util/MetricsBase.java      |   47 +
 .../metrics/util/MetricsDynamicMBeanBase.java |  226 ++
 .../hadoop/metrics/util/MetricsIntValue.java  |  104 +
 .../hadoop/metrics/util/MetricsLongValue.java |   88 +
 .../hadoop/metrics/util/MetricsRegistry.java  |   85 +
 .../metrics/util/MetricsTimeVaryingInt.java   |  128 +
 .../metrics/util/MetricsTimeVaryingLong.java  |  124 +
 .../metrics/util/MetricsTimeVaryingRate.java  |  196 +
 .../hadoop/net/CachedDNSToSwitchMapping.java  |   80 +
 src/java/org/apache/hadoop/net/DNS.java       |  279 ++
 .../apache/hadoop/net/DNSToSwitchMapping.java |   42 +
 src/java/org/apache/hadoop/net/NetUtils.java  |  440 +++
 .../apache/hadoop/net/NetworkTopology.java    |  655 ++++
 src/java/org/apache/hadoop/net/Node.java      |   47 +
 src/java/org/apache/hadoop/net/NodeBase.java  |  134 +
 .../apache/hadoop/net/ScriptBasedMapping.java |  159 +
 .../hadoop/net/SocketIOWithTimeout.java       |  455 +++
 .../apache/hadoop/net/SocketInputStream.java  |  170 +
 .../apache/hadoop/net/SocketOutputStream.java |  219 ++
 .../apache/hadoop/net/SocksSocketFactory.java |  161 +
 .../hadoop/net/StandardSocketFactory.java     |  122 +
 src/java/org/apache/hadoop/net/package.html   |   23 +
 .../hadoop/record/BinaryRecordInput.java      |  136 +
 .../hadoop/record/BinaryRecordOutput.java     |  120 +
 src/java/org/apache/hadoop/record/Buffer.java |  246 ++
 .../apache/hadoop/record/CsvRecordInput.java  |  200 +
 .../apache/hadoop/record/CsvRecordOutput.java |  140 +
 src/java/org/apache/hadoop/record/Index.java  |   37 +
 src/java/org/apache/hadoop/record/Record.java |   91 +
 .../hadoop/record/RecordComparator.java       |   47 +
 .../org/apache/hadoop/record/RecordInput.java |  120 +
 .../apache/hadoop/record/RecordOutput.java    |  141 +
 src/java/org/apache/hadoop/record/Utils.java  |  490 +++
 .../apache/hadoop/record/XmlRecordInput.java  |  243 ++
 .../apache/hadoop/record/XmlRecordOutput.java |  248 ++
 .../hadoop/record/compiler/CGenerator.java    |   71 +
 .../hadoop/record/compiler/CodeBuffer.java    |   96 +
 .../hadoop/record/compiler/CodeGenerator.java |   53 +
 .../apache/hadoop/record/compiler/Consts.java |   44 +
 .../hadoop/record/compiler/CppGenerator.java  |   74 +
 .../hadoop/record/compiler/JBoolean.java      |   92 +
 .../hadoop/record/compiler/JBuffer.java       |  103 +
 .../apache/hadoop/record/compiler/JByte.java  |   80 +
 .../hadoop/record/compiler/JCompType.java     |   72 +
 .../hadoop/record/compiler/JDouble.java       |   89 +
 .../apache/hadoop/record/compiler/JField.java |   44 +
 .../apache/hadoop/record/compiler/JFile.java  |   70 +
 .../apache/hadoop/record/compiler/JFloat.java |   86 +
 .../apache/hadoop/record/compiler/JInt.java   |   80 +
 .../apache/hadoop/record/compiler/JLong.java  |   84 +
 .../apache/hadoop/record/compiler/JMap.java   |  229 ++
 .../hadoop/record/compiler/JRecord.java       |  806 ++++
 .../hadoop/record/compiler/JString.java       |   83 +
 .../apache/hadoop/record/compiler/JType.java  |  222 ++
 .../hadoop/record/compiler/JVector.java       |  197 +
 .../hadoop/record/compiler/JavaGenerator.java |   50 +
 .../hadoop/record/compiler/ant/RccTask.java   |  136 +
 .../compiler/generated/ParseException.java    |  210 ++
 .../hadoop/record/compiler/generated/Rcc.java |  535 +++
 .../compiler/generated/RccConstants.java      |   88 +
 .../compiler/generated/RccTokenManager.java   |  833 +++++
 .../compiler/generated/SimpleCharStream.java  |  439 +++
 .../record/compiler/generated/Token.java      |   99 +
 .../compiler/generated/TokenMgrError.java     |  151 +
 .../record/compiler/generated/package.html    |   29 +
 .../hadoop/record/compiler/generated/rcc.jj   |  384 ++
 .../hadoop/record/compiler/package.html       |   31 +
 .../hadoop/record/meta/FieldTypeInfo.java     |   98 +
 .../apache/hadoop/record/meta/MapTypeID.java  |   82 +
 .../hadoop/record/meta/RecordTypeInfo.java    |  151 +
 .../hadoop/record/meta/StructTypeID.java      |  156 +
 .../org/apache/hadoop/record/meta/TypeID.java |  107 +
 .../org/apache/hadoop/record/meta/Utils.java  |   96 +
 .../hadoop/record/meta/VectorTypeID.java      |   65 +
 .../org/apache/hadoop/record/package.html     |  800 ++++
 .../security/AccessControlException.java      |   56 +
 .../org/apache/hadoop/security/AccessKey.java |  110 +
 .../apache/hadoop/security/AccessToken.java   |   89 +
 .../hadoop/security/AccessTokenHandler.java   |  289 ++
 .../hadoop/security/ExportedAccessKeys.java   |  138 +
 .../org/apache/hadoop/security/Group.java     |   70 +
 .../security/InvalidAccessTokenException.java |   36 +
 .../hadoop/security/PermissionChecker.java    |   80 +
 .../apache/hadoop/security/SecurityUtil.java  |  159 +
 .../security/UnixUserGroupInformation.java    |  432 +++
 src/java/org/apache/hadoop/security/User.java |   70 +
 .../hadoop/security/UserGroupInformation.java |  129 +
 .../authorize/AuthorizationException.java     |   76 +
 .../security/authorize/ConfiguredPolicy.java  |  156 +
 .../authorize/ConnectionPermission.java       |   74 +
 .../security/authorize/PolicyProvider.java    |   50 +
 .../RefreshAuthorizationPolicyProtocol.java   |   39 +
 .../hadoop/security/authorize/Service.java    |   53 +
 .../ServiceAuthorizationManager.java          |  105 +
 .../apache/hadoop/util/CyclicIteration.java   |  108 +
 src/java/org/apache/hadoop/util/Daemon.java   |   51 +
 .../org/apache/hadoop/util/DataChecksum.java  |  247 ++
 .../org/apache/hadoop/util/DiskChecker.java   |   89 +
 .../hadoop/util/GenericOptionsParser.java     |  408 +++
 .../org/apache/hadoop/util/GenericsUtil.java  |   70 +
 src/java/org/apache/hadoop/util/HeapSort.java |   71 +
 .../apache/hadoop/util/HostsFileReader.java   |  115 +
 .../apache/hadoop/util/IndexedSortable.java   |   36 +
 .../org/apache/hadoop/util/IndexedSorter.java |   46 +
 .../org/apache/hadoop/util/LineReader.java    |  190 +
 .../util/LinuxMemoryCalculatorPlugin.java     |  132 +
 .../hadoop/util/MemoryCalculatorPlugin.java   |   74 +
 .../org/apache/hadoop/util/MergeSort.java     |   85 +
 .../apache/hadoop/util/NativeCodeLoader.java  |   89 +
 .../org/apache/hadoop/util/PlatformName.java  |   45 +
 .../apache/hadoop/util/PrintJarMainClass.java |   51 +
 .../org/apache/hadoop/util/PriorityQueue.java |  150 +
 .../org/apache/hadoop/util/ProcessTree.java   |  239 ++
 .../hadoop/util/ProcfsBasedProcessTree.java   |  448 +++
 .../org/apache/hadoop/util/ProgramDriver.java |  144 +
 src/java/org/apache/hadoop/util/Progress.java |  132 +
 .../org/apache/hadoop/util/Progressable.java  |   35 +
 .../org/apache/hadoop/util/QuickSort.java     |  131 +
 .../apache/hadoop/util/ReflectionUtils.java   |  291 ++
 src/java/org/apache/hadoop/util/RunJar.java   |  166 +
 .../org/apache/hadoop/util/ServicePlugin.java |   46 +
 .../org/apache/hadoop/util/ServletUtil.java   |  105 +
 src/java/org/apache/hadoop/util/Shell.java    |  357 ++
 .../org/apache/hadoop/util/StringUtils.java   |  679 ++++
 src/java/org/apache/hadoop/util/Tool.java     |   79 +
 .../org/apache/hadoop/util/ToolRunner.java    |   91 +
 .../hadoop/util/UTF8ByteArrayUtils.java       |   98 +
 .../org/apache/hadoop/util/VersionInfo.java   |  116 +
 src/java/org/apache/hadoop/util/XMLUtils.java |   56 +
 .../apache/hadoop/util/bloom/BloomFilter.java |  234 ++
 .../util/bloom/CountingBloomFilter.java       |  305 ++
 .../hadoop/util/bloom/DynamicBloomFilter.java |  293 ++
 .../org/apache/hadoop/util/bloom/Filter.java  |  213 ++
 .../hadoop/util/bloom/HashFunction.java       |  119 +
 .../org/apache/hadoop/util/bloom/Key.java     |  178 +
 .../hadoop/util/bloom/RemoveScheme.java       |   91 +
 .../util/bloom/RetouchedBloomFilter.java      |  450 +++
 .../org/apache/hadoop/util/hash/Hash.java     |  119 +
 .../apache/hadoop/util/hash/JenkinsHash.java  |  258 ++
 .../apache/hadoop/util/hash/MurmurHash.java   |   83 +
 src/java/org/apache/hadoop/util/package.html  |   23 +
 src/java/overview.html                        |  292 ++
 352 files changed, 67543 insertions(+)
 create mode 100644 src/java/core-default.xml
 create mode 100644 src/java/org/apache/hadoop/HadoopVersionAnnotation.java
 create mode 100644 src/java/org/apache/hadoop/conf/Configurable.java
 create mode 100644 src/java/org/apache/hadoop/conf/Configuration.java
 create mode 100644 src/java/org/apache/hadoop/conf/Configured.java
 create mode 100644 src/java/org/apache/hadoop/conf/package.html
 create mode 100644 src/java/org/apache/hadoop/filecache/DistributedCache.java
 create mode 100644 src/java/org/apache/hadoop/fs/BlockLocation.java
 create mode 100644 src/java/org/apache/hadoop/fs/BufferedFSInputStream.java
 create mode 100644 src/java/org/apache/hadoop/fs/ChecksumException.java
 create mode 100644 src/java/org/apache/hadoop/fs/ChecksumFileSystem.java
 create mode 100644 src/java/org/apache/hadoop/fs/ContentSummary.java
 create mode 100644 src/java/org/apache/hadoop/fs/DF.java
 create mode 100644 src/java/org/apache/hadoop/fs/DU.java
 create mode 100644 src/java/org/apache/hadoop/fs/FSDataInputStream.java
 create mode 100644 src/java/org/apache/hadoop/fs/FSDataOutputStream.java
 create mode 100644 src/java/org/apache/hadoop/fs/FSError.java
 create mode 100644 src/java/org/apache/hadoop/fs/FSInputChecker.java
 create mode 100644 src/java/org/apache/hadoop/fs/FSInputStream.java
 create mode 100644 src/java/org/apache/hadoop/fs/FSOutputSummer.java
 create mode 100644 src/java/org/apache/hadoop/fs/FileChecksum.java
 create mode 100644 src/java/org/apache/hadoop/fs/FileStatus.java
 create mode 100644 src/java/org/apache/hadoop/fs/FileSystem.java
 create mode 100644 src/java/org/apache/hadoop/fs/FileUtil.java
 create mode 100644 src/java/org/apache/hadoop/fs/FilterFileSystem.java
 create mode 100644 src/java/org/apache/hadoop/fs/FsShell.java
 create mode 100644 src/java/org/apache/hadoop/fs/FsShellPermissions.java
 create mode 100644 src/java/org/apache/hadoop/fs/FsStatus.java
 create mode 100644 src/java/org/apache/hadoop/fs/FsUrlConnection.java
 create mode 100644 src/java/org/apache/hadoop/fs/FsUrlStreamHandler.java
 create mode 100644 src/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java
 create mode 100644 src/java/org/apache/hadoop/fs/GlobExpander.java
 create mode 100644 src/java/org/apache/hadoop/fs/HarFileSystem.java
 create mode 100644 src/java/org/apache/hadoop/fs/LengthFileChecksum.java
 create mode 100644 src/java/org/apache/hadoop/fs/LocalDirAllocator.java
 create mode 100644 src/java/org/apache/hadoop/fs/LocalFileSystem.java
 create mode 100644 src/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
 create mode 100644 src/java/org/apache/hadoop/fs/Path.java
 create mode 100644 src/java/org/apache/hadoop/fs/PathFilter.java
 create mode 100644 src/java/org/apache/hadoop/fs/PositionedReadable.java
 create mode 100644 src/java/org/apache/hadoop/fs/RawLocalFileSystem.java
 create mode 100644 src/java/org/apache/hadoop/fs/Seekable.java
 create mode 100644 src/java/org/apache/hadoop/fs/Syncable.java
 create mode 100644 src/java/org/apache/hadoop/fs/Trash.java
 create mode 100644 src/java/org/apache/hadoop/fs/ftp/FTPException.java
 create mode 100644 src/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java
 create mode 100644 src/java/org/apache/hadoop/fs/ftp/FTPInputStream.java
 create mode 100644 src/java/org/apache/hadoop/fs/kfs/IFSImpl.java
 create mode 100644 src/java/org/apache/hadoop/fs/kfs/KFSImpl.java
 create mode 100644 src/java/org/apache/hadoop/fs/kfs/KFSInputStream.java
 create mode 100644 src/java/org/apache/hadoop/fs/kfs/KFSOutputStream.java
 create mode 100644 src/java/org/apache/hadoop/fs/kfs/KosmosFileSystem.java
 create mode 100644 src/java/org/apache/hadoop/fs/kfs/package.html
 create mode 100644 src/java/org/apache/hadoop/fs/package.html
 create mode 100644 src/java/org/apache/hadoop/fs/permission/AccessControlException.java
 create mode 100644 src/java/org/apache/hadoop/fs/permission/FsAction.java
 create mode 100644 src/java/org/apache/hadoop/fs/permission/FsPermission.java
 create mode 100644 src/java/org/apache/hadoop/fs/permission/PermissionStatus.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/Block.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/FileSystemStore.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/INode.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/MigrationTool.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/S3Credentials.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/S3Exception.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/S3FileSystem.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/S3FileSystemException.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/S3InputStream.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/S3OutputStream.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/VersionMismatchException.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3/package.html
 create mode 100644 src/java/org/apache/hadoop/fs/s3native/FileMetadata.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3native/PartialListing.java
 create mode 100644 src/java/org/apache/hadoop/fs/s3native/package.html
 create mode 100644 src/java/org/apache/hadoop/fs/shell/Command.java
 create mode 100644 src/java/org/apache/hadoop/fs/shell/CommandFormat.java
 create mode 100644 src/java/org/apache/hadoop/fs/shell/CommandUtils.java
 create mode 100644 src/java/org/apache/hadoop/fs/shell/Count.java
 create mode 100644 src/java/org/apache/hadoop/http/FilterContainer.java
 create mode 100644 src/java/org/apache/hadoop/http/FilterInitializer.java
 create mode 100644 src/java/org/apache/hadoop/http/HttpServer.java
 create mode 100644 src/java/org/apache/hadoop/io/AbstractMapWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/ArrayFile.java
 create mode 100644 src/java/org/apache/hadoop/io/ArrayWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/BinaryComparable.java
 create mode 100644 src/java/org/apache/hadoop/io/BloomMapFile.java
 create mode 100644 src/java/org/apache/hadoop/io/BooleanWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/ByteWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/BytesWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/Closeable.java
 create mode 100644 src/java/org/apache/hadoop/io/CompressedWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/DataInputBuffer.java
 create mode 100644 src/java/org/apache/hadoop/io/DataOutputBuffer.java
 create mode 100644 src/java/org/apache/hadoop/io/DefaultStringifier.java
 create mode 100644 src/java/org/apache/hadoop/io/DeprecatedUTF8.java
 create mode 100644 src/java/org/apache/hadoop/io/DoubleWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/EnumSetWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/FloatWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/GenericWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/IOUtils.java
 create mode 100644 src/java/org/apache/hadoop/io/InputBuffer.java
 create mode 100644 src/java/org/apache/hadoop/io/IntWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/LongWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/MD5Hash.java
 create mode 100644 src/java/org/apache/hadoop/io/MapFile.java
 create mode 100644 src/java/org/apache/hadoop/io/MapWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/MultipleIOException.java
 create mode 100644 src/java/org/apache/hadoop/io/NullWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/ObjectWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/OutputBuffer.java
 create mode 100644 src/java/org/apache/hadoop/io/RawComparator.java
 create mode 100644 src/java/org/apache/hadoop/io/SequenceFile.java
 create mode 100644 src/java/org/apache/hadoop/io/SetFile.java
 create mode 100644 src/java/org/apache/hadoop/io/SortedMapWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/Stringifier.java
 create mode 100644 src/java/org/apache/hadoop/io/Text.java
 create mode 100644 src/java/org/apache/hadoop/io/TwoDArrayWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/UTF8.java
 create mode 100644 src/java/org/apache/hadoop/io/VIntWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/VLongWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/VersionMismatchException.java
 create mode 100644 src/java/org/apache/hadoop/io/VersionedWritable.java
 create mode 100644 src/java/org/apache/hadoop/io/Writable.java
 create mode 100644 src/java/org/apache/hadoop/io/WritableComparable.java
 create mode 100644 src/java/org/apache/hadoop/io/WritableComparator.java
 create mode 100644 src/java/org/apache/hadoop/io/WritableFactories.java
 create mode 100644 src/java/org/apache/hadoop/io/WritableFactory.java
 create mode 100644 src/java/org/apache/hadoop/io/WritableName.java
 create mode 100644 src/java/org/apache/hadoop/io/WritableUtils.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/BZip2Codec.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/BlockCompressorStream.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/CodecPool.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/CompressionCodec.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/CompressionInputStream.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/CompressionOutputStream.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/Compressor.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/CompressorStream.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/Decompressor.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/DecompressorStream.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/DefaultCodec.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/GzipCodec.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/BZip2Constants.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyCompressor.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyDecompressor.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/CRC.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibInflater.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java
 create mode 100644 src/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java
 create mode 100644 src/java/org/apache/hadoop/io/package.html
 create mode 100644 src/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
 create mode 100644 src/java/org/apache/hadoop/io/retry/RetryPolicies.java
 create mode 100644 src/java/org/apache/hadoop/io/retry/RetryPolicy.java
 create mode 100644 src/java/org/apache/hadoop/io/retry/RetryProxy.java
 create mode 100644 src/java/org/apache/hadoop/io/retry/package.html
 create mode 100644 src/java/org/apache/hadoop/io/serializer/Deserializer.java
 create mode 100644 src/java/org/apache/hadoop/io/serializer/DeserializerComparator.java
 create mode 100644 src/java/org/apache/hadoop/io/serializer/JavaSerialization.java
 create mode 100644 src/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java
 create mode 100644 src/java/org/apache/hadoop/io/serializer/Serialization.java
 create mode 100644 src/java/org/apache/hadoop/io/serializer/SerializationFactory.java
 create mode 100644 src/java/org/apache/hadoop/io/serializer/Serializer.java
 create mode 100644 src/java/org/apache/hadoop/io/serializer/WritableSerialization.java
 create mode 100644 src/java/org/apache/hadoop/io/serializer/package.html
 create mode 100644 src/java/org/apache/hadoop/ipc/Client.java
 create mode 100644 src/java/org/apache/hadoop/ipc/ConnectionHeader.java
 create mode 100644 src/java/org/apache/hadoop/ipc/RPC.java
 create mode 100644 src/java/org/apache/hadoop/ipc/RemoteException.java
 create mode 100644 src/java/org/apache/hadoop/ipc/Server.java
 create mode 100644 src/java/org/apache/hadoop/ipc/Status.java
 create mode 100644 src/java/org/apache/hadoop/ipc/VersionedProtocol.java
 create mode 100644 src/java/org/apache/hadoop/ipc/metrics/RpcActivityMBean.java
 create mode 100644 src/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java
 create mode 100644 src/java/org/apache/hadoop/ipc/metrics/RpcMgt.java
 create mode 100644 src/java/org/apache/hadoop/ipc/metrics/RpcMgtMBean.java
 create mode 100644 src/java/org/apache/hadoop/ipc/package.html
 create mode 100644 src/java/org/apache/hadoop/log/LogLevel.java
 create mode 100644 src/java/org/apache/hadoop/metrics/ContextFactory.java
 create mode 100644 src/java/org/apache/hadoop/metrics/MetricsContext.java
 create mode 100644 src/java/org/apache/hadoop/metrics/MetricsException.java
 create mode 100644 src/java/org/apache/hadoop/metrics/MetricsRecord.java
 create mode 100644 src/java/org/apache/hadoop/metrics/MetricsServlet.java
 create mode 100644 src/java/org/apache/hadoop/metrics/MetricsUtil.java
 create mode 100644 src/java/org/apache/hadoop/metrics/Updater.java
 create mode 100644 src/java/org/apache/hadoop/metrics/file/FileContext.java
 create mode 100644 src/java/org/apache/hadoop/metrics/file/package.html
 create mode 100644 src/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java
 create mode 100644 src/java/org/apache/hadoop/metrics/ganglia/package.html
 create mode 100644 src/java/org/apache/hadoop/metrics/jvm/EventCounter.java
 create mode 100644 src/java/org/apache/hadoop/metrics/jvm/JvmMetrics.java
 create mode 100644 src/java/org/apache/hadoop/metrics/package.html
 create mode 100644 src/java/org/apache/hadoop/metrics/spi/AbstractMetricsContext.java
 create mode 100644 src/java/org/apache/hadoop/metrics/spi/CompositeContext.java
 create mode 100644 src/java/org/apache/hadoop/metrics/spi/MetricValue.java
 create mode 100644 src/java/org/apache/hadoop/metrics/spi/MetricsRecordImpl.java
 create mode 100644 src/java/org/apache/hadoop/metrics/spi/NoEmitMetricsContext.java
 create mode 100644 src/java/org/apache/hadoop/metrics/spi/NullContext.java
 create mode 100644 src/java/org/apache/hadoop/metrics/spi/NullContextWithUpdateThread.java
 create mode 100644 src/java/org/apache/hadoop/metrics/spi/OutputRecord.java
 create mode 100644 src/java/org/apache/hadoop/metrics/spi/Util.java
 create mode 100644 src/java/org/apache/hadoop/metrics/spi/package.html
 create mode 100644 src/java/org/apache/hadoop/metrics/util/MBeanUtil.java
 create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsBase.java
 create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java
 create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsIntValue.java
 create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsLongValue.java
 create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsRegistry.java
 create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingInt.java
 create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingLong.java
 create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingRate.java
 create mode 100644 src/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java
 create mode 100644 src/java/org/apache/hadoop/net/DNS.java
 create mode 100644 src/java/org/apache/hadoop/net/DNSToSwitchMapping.java
 create mode 100644 src/java/org/apache/hadoop/net/NetUtils.java
 create mode 100644 src/java/org/apache/hadoop/net/NetworkTopology.java
 create mode 100644 src/java/org/apache/hadoop/net/Node.java
 create mode 100644 src/java/org/apache/hadoop/net/NodeBase.java
 create mode 100644 src/java/org/apache/hadoop/net/ScriptBasedMapping.java
 create mode 100644 src/java/org/apache/hadoop/net/SocketIOWithTimeout.java
 create mode 100644 src/java/org/apache/hadoop/net/SocketInputStream.java
 create mode 100644 src/java/org/apache/hadoop/net/SocketOutputStream.java
 create mode 100644 src/java/org/apache/hadoop/net/SocksSocketFactory.java
 create mode 100644 src/java/org/apache/hadoop/net/StandardSocketFactory.java
 create mode 100644 src/java/org/apache/hadoop/net/package.html
 create mode 100644 src/java/org/apache/hadoop/record/BinaryRecordInput.java
 create mode 100644 src/java/org/apache/hadoop/record/BinaryRecordOutput.java
 create mode 100644 src/java/org/apache/hadoop/record/Buffer.java
 create mode 100644 src/java/org/apache/hadoop/record/CsvRecordInput.java
 create mode 100644 src/java/org/apache/hadoop/record/CsvRecordOutput.java
 create mode 100644 src/java/org/apache/hadoop/record/Index.java
 create mode 100644 src/java/org/apache/hadoop/record/Record.java
 create mode 100644 src/java/org/apache/hadoop/record/RecordComparator.java
 create mode 100644 src/java/org/apache/hadoop/record/RecordInput.java
 create mode 100644 src/java/org/apache/hadoop/record/RecordOutput.java
 create mode 100644 src/java/org/apache/hadoop/record/Utils.java
 create mode 100644 src/java/org/apache/hadoop/record/XmlRecordInput.java
 create mode 100644 src/java/org/apache/hadoop/record/XmlRecordOutput.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/CGenerator.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/CodeBuffer.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/CodeGenerator.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/Consts.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/CppGenerator.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JBoolean.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JBuffer.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JByte.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JCompType.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JDouble.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JField.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JFile.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JFloat.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JInt.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JLong.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JMap.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JRecord.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JString.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JType.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JVector.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/JavaGenerator.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/ant/RccTask.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/ParseException.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/Rcc.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/RccConstants.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/Token.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java
 create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/package.html
 create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/rcc.jj
 create mode 100644 src/java/org/apache/hadoop/record/compiler/package.html
 create mode 100644 src/java/org/apache/hadoop/record/meta/FieldTypeInfo.java
 create mode 100644 src/java/org/apache/hadoop/record/meta/MapTypeID.java
 create mode 100644 src/java/org/apache/hadoop/record/meta/RecordTypeInfo.java
 create mode 100644 src/java/org/apache/hadoop/record/meta/StructTypeID.java
 create mode 100644 src/java/org/apache/hadoop/record/meta/TypeID.java
 create mode 100644 src/java/org/apache/hadoop/record/meta/Utils.java
 create mode 100644 src/java/org/apache/hadoop/record/meta/VectorTypeID.java
 create mode 100644 src/java/org/apache/hadoop/record/package.html
 create mode 100644 src/java/org/apache/hadoop/security/AccessControlException.java
 create mode 100644 src/java/org/apache/hadoop/security/AccessKey.java
 create mode 100644 src/java/org/apache/hadoop/security/AccessToken.java
 create mode 100644 src/java/org/apache/hadoop/security/AccessTokenHandler.java
 create mode 100644 src/java/org/apache/hadoop/security/ExportedAccessKeys.java
 create mode 100644 src/java/org/apache/hadoop/security/Group.java
 create mode 100644 src/java/org/apache/hadoop/security/InvalidAccessTokenException.java
 create mode 100644 src/java/org/apache/hadoop/security/PermissionChecker.java
 create mode 100644 src/java/org/apache/hadoop/security/SecurityUtil.java
 create mode 100644 src/java/org/apache/hadoop/security/UnixUserGroupInformation.java
 create mode 100644 src/java/org/apache/hadoop/security/User.java
 create mode 100644 src/java/org/apache/hadoop/security/UserGroupInformation.java
 create mode 100644 src/java/org/apache/hadoop/security/authorize/AuthorizationException.java
 create mode 100644 src/java/org/apache/hadoop/security/authorize/ConfiguredPolicy.java
 create mode 100644 src/java/org/apache/hadoop/security/authorize/ConnectionPermission.java
 create mode 100644 src/java/org/apache/hadoop/security/authorize/PolicyProvider.java
 create mode 100644 src/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java
 create mode 100644 src/java/org/apache/hadoop/security/authorize/Service.java
 create mode 100644 src/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java
 create mode 100644 src/java/org/apache/hadoop/util/CyclicIteration.java
 create mode 100644 src/java/org/apache/hadoop/util/Daemon.java
 create mode 100644 src/java/org/apache/hadoop/util/DataChecksum.java
 create mode 100644 src/java/org/apache/hadoop/util/DiskChecker.java
 create mode 100644 src/java/org/apache/hadoop/util/GenericOptionsParser.java
 create mode 100644 src/java/org/apache/hadoop/util/GenericsUtil.java
 create mode 100644 src/java/org/apache/hadoop/util/HeapSort.java
 create mode 100644 src/java/org/apache/hadoop/util/HostsFileReader.java
 create mode 100644 src/java/org/apache/hadoop/util/IndexedSortable.java
 create mode 100644 src/java/org/apache/hadoop/util/IndexedSorter.java
 create mode 100644 src/java/org/apache/hadoop/util/LineReader.java
 create mode 100644 src/java/org/apache/hadoop/util/LinuxMemoryCalculatorPlugin.java
 create mode 100644 src/java/org/apache/hadoop/util/MemoryCalculatorPlugin.java
 create mode 100644 src/java/org/apache/hadoop/util/MergeSort.java
 create mode 100644 src/java/org/apache/hadoop/util/NativeCodeLoader.java
 create mode 100644 src/java/org/apache/hadoop/util/PlatformName.java
 create mode 100644 src/java/org/apache/hadoop/util/PrintJarMainClass.java
 create mode 100644 src/java/org/apache/hadoop/util/PriorityQueue.java
 create mode 100644 src/java/org/apache/hadoop/util/ProcessTree.java
 create mode 100644 src/java/org/apache/hadoop/util/ProcfsBasedProcessTree.java
 create mode 100644 src/java/org/apache/hadoop/util/ProgramDriver.java
 create mode 100644 src/java/org/apache/hadoop/util/Progress.java
 create mode 100644 src/java/org/apache/hadoop/util/Progressable.java
 create mode 100644 src/java/org/apache/hadoop/util/QuickSort.java
 create mode 100644 src/java/org/apache/hadoop/util/ReflectionUtils.java
 create mode 100644 src/java/org/apache/hadoop/util/RunJar.java
 create mode 100644 src/java/org/apache/hadoop/util/ServicePlugin.java
 create mode 100644 src/java/org/apache/hadoop/util/ServletUtil.java
 create mode 100644 src/java/org/apache/hadoop/util/Shell.java
 create mode 100644 src/java/org/apache/hadoop/util/StringUtils.java
 create mode 100644 src/java/org/apache/hadoop/util/Tool.java
 create mode 100644 src/java/org/apache/hadoop/util/ToolRunner.java
 create mode 100644 src/java/org/apache/hadoop/util/UTF8ByteArrayUtils.java
 create mode 100644 src/java/org/apache/hadoop/util/VersionInfo.java
 create mode 100644 src/java/org/apache/hadoop/util/XMLUtils.java
 create mode 100644 src/java/org/apache/hadoop/util/bloom/BloomFilter.java
 create mode 100644 src/java/org/apache/hadoop/util/bloom/CountingBloomFilter.java
 create mode 100644 src/java/org/apache/hadoop/util/bloom/DynamicBloomFilter.java
 create mode 100644 src/java/org/apache/hadoop/util/bloom/Filter.java
 create mode 100644 src/java/org/apache/hadoop/util/bloom/HashFunction.java
 create mode 100644 src/java/org/apache/hadoop/util/bloom/Key.java
 create mode 100644 src/java/org/apache/hadoop/util/bloom/RemoveScheme.java
 create mode 100644 src/java/org/apache/hadoop/util/bloom/RetouchedBloomFilter.java
 create mode 100644 src/java/org/apache/hadoop/util/hash/Hash.java
 create mode 100644 src/java/org/apache/hadoop/util/hash/JenkinsHash.java
 create mode 100644 src/java/org/apache/hadoop/util/hash/MurmurHash.java
 create mode 100644 src/java/org/apache/hadoop/util/package.html
 create mode 100644 src/java/overview.html

diff --git a/src/java/core-default.xml b/src/java/core-default.xml
new file mode 100644
index 00000000000..b56dda4235b
--- /dev/null
+++ b/src/java/core-default.xml
@@ -0,0 +1,444 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Do not modify this file directly.  Instead, copy entries that you -->
+<!-- wish to modify from this file into core-site.xml and change them -->
+<!-- there.  If core-site.xml does not already exist, create it.      -->
+
+<configuration>
+
+<!--- global properties -->
+
+<property>
+  <name>hadoop.tmp.dir</name>
+  <value>/tmp/hadoop-${user.name}</value>
+  <description>A base for other temporary directories.</description>
+</property>
+
+<property>
+  <name>hadoop.native.lib</name>
+  <value>true</value>
+  <description>Should native hadoop libraries, if present, be used.</description>
+</property>
+
+<property>
+  <name>hadoop.http.filter.initializers</name>
+  <value></value>
+  <description>A comma separated list of class names. Each class in the list 
+  must extend org.apache.hadoop.http.FilterInitializer. The corresponding 
+  Filter will be initialized. Then, the Filter will be applied to all user 
+  facing jsp and servlet web pages.  The ordering of the list defines the 
+  ordering of the filters.</description>
+</property>
+
+<property>
+  <name>hadoop.security.authorization</name>
+  <value>false</value>
+  <description>Is service-level authorization enabled?</description>
+</property>
+
+<!--- logging properties -->
+
+<property>
+  <name>hadoop.logfile.size</name>
+  <value>10000000</value>
+  <description>The max size of each log file</description>
+</property>
+
+<property>
+  <name>hadoop.logfile.count</name>
+  <value>10</value>
+  <description>The max number of log files</description>
+</property>
+
+<!-- i/o properties -->
+<property>
+  <name>io.file.buffer.size</name>
+  <value>4096</value>
+  <description>The size of buffer for use in sequence files.
+  The size of this buffer should probably be a multiple of hardware
+  page size (4096 on Intel x86), and it determines how much data is
+  buffered during read and write operations.</description>
+</property>
+  
+<property>
+  <name>io.bytes.per.checksum</name>
+  <value>512</value>
+  <description>The number of bytes per checksum.  Must not be larger than
+  io.file.buffer.size.</description>
+</property>
+
+<property>
+  <name>io.skip.checksum.errors</name>
+  <value>false</value>
+  <description>If true, when a checksum error is encountered while
+  reading a sequence file, entries are skipped, instead of throwing an
+  exception.</description>
+</property>
+
+<property>
+  <name>io.compression.codecs</name>
+  <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value>
+  <description>A list of the compression codec classes that can be used 
+               for compression/decompression.</description>
+</property>
+
+<property>
+  <name>io.serializations</name>
+  <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
+  <description>A list of serialization classes that can be used for
+  obtaining serializers and deserializers.</description>
+</property>
+
+<property>
+  <name>io.seqfile.local.dir</name>
+  <value>${hadoop.tmp.dir}/io/local</value>
+  <description>The local directory where sequence file stores intermediate
+  data files during merge.  May be a comma-separated list of
+  directories on different devices in order to spread disk i/o.
+  Directories that do not exist are ignored.
+  </description>
+</property>
+
+<!-- file system properties -->
+
+<property>
+  <name>fs.default.name</name>
+  <value>file:///</value>
+  <description>The name of the default file system.  A URI whose
+  scheme and authority determine the FileSystem implementation.  The
+  uri's scheme determines the config property (fs.SCHEME.impl) naming
+  the FileSystem implementation class.  The uri's authority is used to
+  determine the host, port, etc. for a filesystem.</description>
+</property>
+
+<property>
+  <name>fs.trash.interval</name>
+  <value>0</value>
+  <description>Number of minutes between trash checkpoints.
+  If zero, the trash feature is disabled.
+  </description>
+</property>
+
+<property>
+  <name>fs.file.impl</name>
+  <value>org.apache.hadoop.fs.LocalFileSystem</value>
+  <description>The FileSystem for file: uris.</description>
+</property>
+
+<property>
+  <name>fs.hdfs.impl</name>
+  <value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
+  <description>The FileSystem for hdfs: uris.</description>
+</property>
+
+<property>
+  <name>fs.s3.impl</name>
+  <value>org.apache.hadoop.fs.s3.S3FileSystem</value>
+  <description>The FileSystem for s3: uris.</description>
+</property>
+
+<property>
+  <name>fs.s3n.impl</name>
+  <value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
+  <description>The FileSystem for s3n: (Native S3) uris.</description>
+</property>
+
+<property>
+  <name>fs.kfs.impl</name>
+  <value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
+  <description>The FileSystem for kfs: uris.</description>
+</property>
+
+<property>
+  <name>fs.hftp.impl</name>
+  <value>org.apache.hadoop.hdfs.HftpFileSystem</value>
+</property>
+
+<property>
+  <name>fs.hsftp.impl</name>
+  <value>org.apache.hadoop.hdfs.HsftpFileSystem</value>
+</property>
+
+<property>
+  <name>fs.ftp.impl</name>
+  <value>org.apache.hadoop.fs.ftp.FTPFileSystem</value>
+  <description>The FileSystem for ftp: uris.</description>
+</property>
+
+<property>
+  <name>fs.ramfs.impl</name>
+  <value>org.apache.hadoop.fs.InMemoryFileSystem</value>
+  <description>The FileSystem for ramfs: uris.</description>
+</property>
+
+<property>
+  <name>fs.har.impl</name>
+  <value>org.apache.hadoop.fs.HarFileSystem</value>
+  <description>The filesystem for Hadoop archives. </description>
+</property>
+
+<property>
+  <name>fs.checkpoint.dir</name>
+  <value>${hadoop.tmp.dir}/dfs/namesecondary</value>
+  <description>Determines where on the local filesystem the DFS secondary
+      name node should store the temporary images to merge.
+      If this is a comma-delimited list of directories then the image is
+      replicated in all of the directories for redundancy.
+  </description>
+</property>
+
+<property>
+  <name>fs.checkpoint.edits.dir</name>
+  <value>${fs.checkpoint.dir}</value>
+  <description>Determines where on the local filesystem the DFS secondary
+      name node should store the temporary edits to merge.
+      If this is a comma-delimited list of directoires then teh edits is
+      replicated in all of the directoires for redundancy.
+      Default value is same as fs.checkpoint.dir
+  </description>
+</property>
+
+<property>
+  <name>fs.checkpoint.period</name>
+  <value>3600</value>
+  <description>The number of seconds between two periodic checkpoints.
+  </description>
+</property>
+
+<property>
+  <name>fs.checkpoint.size</name>
+  <value>67108864</value>
+  <description>The size of the current edit log (in bytes) that triggers
+       a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
+  </description>
+</property>
+
+
+
+<property>
+  <name>fs.s3.block.size</name>
+  <value>67108864</value>
+  <description>Block size to use when writing files to S3.</description>
+</property>
+
+<property>
+  <name>fs.s3.buffer.dir</name>
+  <value>${hadoop.tmp.dir}/s3</value>
+  <description>Determines where on the local filesystem the S3 filesystem
+  should store files before sending them to S3
+  (or after retrieving them from S3).
+  </description>
+</property>
+
+<property>
+  <name>fs.s3.maxRetries</name>
+  <value>4</value>
+  <description>The maximum number of retries for reading or writing files to S3, 
+  before we signal failure to the application.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3.sleepTimeSeconds</name>
+  <value>10</value>
+  <description>The number of seconds to sleep between each S3 retry.
+  </description>
+</property>
+
+
+<property>
+  <name>local.cache.size</name>
+  <value>10737418240</value>
+  <description>The limit on the size of cache you want to keep, set by default
+  to 10GB. This will act as a soft limit on the cache directory for out of band data.
+  </description>
+</property>
+            
+<property>
+  <name>io.seqfile.compress.blocksize</name>
+  <value>1000000</value>
+  <description>The minimum block size for compression in block compressed 
+          SequenceFiles.
+  </description>
+</property>
+
+<property>
+  <name>io.seqfile.lazydecompress</name>
+  <value>true</value>
+  <description>Should values of block-compressed SequenceFiles be decompressed
+          only when necessary.
+  </description>
+</property>
+
+<property>
+  <name>io.seqfile.sorter.recordlimit</name>
+  <value>1000000</value>
+  <description>The limit on number of records to be kept in memory in a spill 
+          in SequenceFiles.Sorter
+  </description>
+</property>
+
+ <property>
+  <name>io.mapfile.bloom.size</name>
+  <value>1048576</value>
+  <description>The size of BloomFilter-s used in BloomMapFile. Each time this many
+  keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter).
+  Larger values minimize the number of filters, which slightly increases the performance,
+  but may waste too much space if the total number of keys is usually much smaller
+  than this number.
+  </description>
+</property>
+
+<property>
+  <name>io.mapfile.bloom.error.rate</name>
+  <value>0.005</value>
+  <description>The rate of false positives in BloomFilter-s used in BloomMapFile.
+  As this value decreases, the size of BloomFilter-s increases exponentially. This
+  value is the probability of encountering false positives (default is 0.5%).
+  </description>
+</property>
+
+<property>
+  <name>hadoop.util.hash.type</name>
+  <value>murmur</value>
+  <description>The default implementation of Hash. Currently this can take one of the
+  two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash.
+  </description>
+</property>
+
+
+<!-- ipc properties -->
+
+<property>
+  <name>ipc.client.idlethreshold</name>
+  <value>4000</value>
+  <description>Defines the threshold number of connections after which
+               connections will be inspected for idleness.
+  </description>
+</property>
+
+<property>
+  <name>ipc.client.kill.max</name>
+  <value>10</value>
+  <description>Defines the maximum number of clients to disconnect in one go.
+  </description>
+</property>
+
+<property>
+  <name>ipc.client.connection.maxidletime</name>
+  <value>10000</value>
+  <description>The maximum time in msec after which a client will bring down the
+               connection to the server.
+  </description>
+</property>
+
+<property>
+  <name>ipc.client.connect.max.retries</name>
+  <value>10</value>
+  <description>Indicates the number of retries a client will make to establish
+               a server connection.
+  </description>
+</property>
+
+<property>
+  <name>ipc.server.listen.queue.size</name>
+  <value>128</value>
+  <description>Indicates the length of the listen queue for servers accepting
+               client connections.
+  </description>
+</property>
+
+<property>
+  <name>ipc.server.tcpnodelay</name>
+  <value>false</value>
+  <description>Turn on/off Nagle's algorithm for the TCP socket connection on 
+  the server. Setting to true disables the algorithm and may decrease latency
+  with a cost of more/smaller packets. 
+  </description>
+</property>
+
+<property>
+  <name>ipc.client.tcpnodelay</name>
+  <value>false</value>
+  <description>Turn on/off Nagle's algorithm for the TCP socket connection on 
+  the client. Setting to true disables the algorithm and may decrease latency
+  with a cost of more/smaller packets. 
+  </description>
+</property>
+
+
+<!-- Web Interface Configuration -->
+
+<property>
+  <name>webinterface.private.actions</name>
+  <value>false</value>
+  <description> If set to true, the web interfaces of JT and NN may contain 
+                actions, such as kill job, delete file, etc., that should 
+                not be exposed to public. Enable this option if the interfaces 
+                are only reachable by those who have the right authorization.
+  </description>
+</property>
+
+<!-- Proxy Configuration -->
+
+<property>
+  <name>hadoop.rpc.socket.factory.class.default</name>
+  <value>org.apache.hadoop.net.StandardSocketFactory</value>
+  <description> Default SocketFactory to use. This parameter is expected to be
+    formatted as "package.FactoryClassName".
+  </description>
+</property>
+
+<property>
+  <name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
+  <value></value>
+  <description> SocketFactory to use to connect to a DFS. If null or empty, use
+    hadoop.rpc.socket.class.default. This socket factory is also used by
+    DFSClient to create sockets to DataNodes.
+  </description>
+</property>
+
+
+
+<property>
+  <name>hadoop.socks.server</name>
+  <value></value>
+  <description> Address (host:port) of the SOCKS server to be used by the
+    SocksSocketFactory.
+  </description>
+</property>
+
+<!-- Rack Configuration -->
+
+<property>
+  <name>topology.node.switch.mapping.impl</name>
+  <value>org.apache.hadoop.net.ScriptBasedMapping</value>
+  <description> The default implementation of the DNSToSwitchMapping. It
+    invokes a script specified in topology.script.file.name to resolve
+    node names. If the value for topology.script.file.name is not set, the
+    default value of DEFAULT_RACK is returned for all node names.
+  </description>
+</property>
+
+<property>
+  <name>topology.script.file.name</name>
+  <value></value>
+  <description> The script name that should be invoked to resolve DNS names to
+    NetworkTopology names. Example: the script would take host.foo.bar as an
+    argument, and return /rack1 as the output.
+  </description>
+</property>
+
+<property>
+  <name>topology.script.number.args</name>
+  <value>100</value>
+  <description> The max number of args that the script configured with 
+    topology.script.file.name should be run with. Each arg is an
+    IP address.
+  </description>
+</property>
+
+
+
+</configuration>
diff --git a/src/java/org/apache/hadoop/HadoopVersionAnnotation.java b/src/java/org/apache/hadoop/HadoopVersionAnnotation.java
new file mode 100644
index 00000000000..324003a839b
--- /dev/null
+++ b/src/java/org/apache/hadoop/HadoopVersionAnnotation.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop;
+
+import java.lang.annotation.*;
+
+/**
+ * A package attribute that captures the version of Hadoop that was compiled.
+ */
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.PACKAGE)
+public @interface HadoopVersionAnnotation {
+ 
+  /**
+   * Get the Hadoop version
+   * @return the version string "0.6.3-dev"
+   */
+  String version();
+  
+  /**
+   * Get the username that compiled Hadoop.
+   */
+  String user();
+  
+  /**
+   * Get the date when Hadoop was compiled.
+   * @return the date in unix 'date' format
+   */
+  String date();
+    
+  /**
+   * Get the url for the subversion repository.
+   */
+  String url();
+  
+  /**
+   * Get the subversion revision.
+   * @return the revision number as a string (eg. "451451")
+   */
+  String revision();
+
+  /**
+   * Get the branch from which this was compiled.
+   * @return The branch name, e.g. "trunk" or "branches/branch-0.20"
+   */
+  String branch();
+
+  /**
+   * Get a checksum of the source files from which
+   * Hadoop was compiled.
+   * @return a string that uniquely identifies the source
+   **/
+  String srcChecksum();    
+}
diff --git a/src/java/org/apache/hadoop/conf/Configurable.java b/src/java/org/apache/hadoop/conf/Configurable.java
new file mode 100644
index 00000000000..f4637f0e82b
--- /dev/null
+++ b/src/java/org/apache/hadoop/conf/Configurable.java
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.conf;
+
+/** Something that may be configured with a {@link Configuration}. */
+public interface Configurable {
+
+  /** Set the configuration to be used by this object. */
+  void setConf(Configuration conf);
+
+  /** Return the configuration used by this object. */
+  Configuration getConf();
+}
diff --git a/src/java/org/apache/hadoop/conf/Configuration.java b/src/java/org/apache/hadoop/conf/Configuration.java
new file mode 100644
index 00000000000..e1381f3bb62
--- /dev/null
+++ b/src/java/org/apache/hadoop/conf/Configuration.java
@@ -0,0 +1,1326 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.conf;
+
+import java.io.BufferedInputStream;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.Reader;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.StringTokenizer;
+import java.util.WeakHashMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
+import org.w3c.dom.DOMException;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Text;
+import org.xml.sax.SAXException;
+
+/** 
+ * Provides access to configuration parameters.
+ *
+ * <h4 id="Resources">Resources</h4>
+ *
+ * <p>Configurations are specified by resources. A resource contains a set of
+ * name/value pairs as XML data. Each resource is named by either a 
+ * <code>String</code> or by a {@link Path}. If named by a <code>String</code>, 
+ * then the classpath is examined for a file with that name.  If named by a 
+ * <code>Path</code>, then the local filesystem is examined directly, without 
+ * referring to the classpath.
+ *
+ * <p>Unless explicitly turned off, Hadoop by default specifies two 
+ * resources, loaded in-order from the classpath: <ol>
+ * <li><tt><a href="{@docRoot}/../core-default.html">core-default.xml</a>
+ * </tt>: Read-only defaults for hadoop.</li>
+ * <li><tt>core-site.xml</tt>: Site-specific configuration for a given hadoop
+ * installation.</li>
+ * </ol>
+ * Applications may add additional resources, which are loaded
+ * subsequent to these resources in the order they are added.
+ * 
+ * <h4 id="FinalParams">Final Parameters</h4>
+ *
+ * <p>Configuration parameters may be declared <i>final</i>. 
+ * Once a resource declares a value final, no subsequently-loaded 
+ * resource can alter that value.  
+ * For example, one might define a final parameter with:
+ * <tt><pre>
+ *  &lt;property&gt;
+ *    &lt;name&gt;dfs.client.buffer.dir&lt;/name&gt;
+ *    &lt;value&gt;/tmp/hadoop/dfs/client&lt;/value&gt;
+ *    <b>&lt;final&gt;true&lt;/final&gt;</b>
+ *  &lt;/property&gt;</pre></tt>
+ *
+ * Administrators typically define parameters as final in 
+ * <tt>core-site.xml</tt> for values that user applications may not alter.
+ *
+ * <h4 id="VariableExpansion">Variable Expansion</h4>
+ *
+ * <p>Value strings are first processed for <i>variable expansion</i>. The
+ * available properties are:<ol>
+ * <li>Other properties defined in this Configuration; and, if a name is
+ * undefined here,</li>
+ * <li>Properties in {@link System#getProperties()}.</li>
+ * </ol>
+ *
+ * <p>For example, if a configuration resource contains the following property
+ * definitions: 
+ * <tt><pre>
+ *  &lt;property&gt;
+ *    &lt;name&gt;basedir&lt;/name&gt;
+ *    &lt;value&gt;/user/${<i>user.name</i>}&lt;/value&gt;
+ *  &lt;/property&gt;
+ *  
+ *  &lt;property&gt;
+ *    &lt;name&gt;tempdir&lt;/name&gt;
+ *    &lt;value&gt;${<i>basedir</i>}/tmp&lt;/value&gt;
+ *  &lt;/property&gt;</pre></tt>
+ *
+ * When <tt>conf.get("tempdir")</tt> is called, then <tt>${<i>basedir</i>}</tt>
+ * will be resolved to another property in this Configuration, while
+ * <tt>${<i>user.name</i>}</tt> would then ordinarily be resolved to the value
+ * of the System property with that name.
+ */
+public class Configuration implements Iterable<Map.Entry<String,String>>,
+                                      Writable {
+  private static final Log LOG =
+    LogFactory.getLog(Configuration.class);
+
+  private boolean quietmode = true;
+  
+  /**
+   * List of configuration resources.
+   */
+  private ArrayList<Object> resources = new ArrayList<Object>();
+
+  /**
+   * List of configuration parameters marked <b>final</b>. 
+   */
+  private Set<String> finalParameters = new HashSet<String>();
+  
+  private boolean loadDefaults = true;
+  
+  /**
+   * Configurtion objects
+   */
+  private static final WeakHashMap<Configuration,Object> REGISTRY = 
+    new WeakHashMap<Configuration,Object>();
+  
+  /**
+   * List of default Resources. Resources are loaded in the order of the list 
+   * entries
+   */
+  private static final ArrayList<String> defaultResources = 
+    new ArrayList<String>();
+  
+  static{
+    //print deprecation warning if hadoop-site.xml is found in classpath
+    ClassLoader cL = Thread.currentThread().getContextClassLoader();
+    if (cL == null) {
+      cL = Configuration.class.getClassLoader();
+    }
+    if(cL.getResource("hadoop-site.xml")!=null) {
+      LOG.warn("DEPRECATED: hadoop-site.xml found in the classpath. " +
+          "Usage of hadoop-site.xml is deprecated. Instead use core-site.xml, "
+          + "mapred-site.xml and hdfs-site.xml to override properties of " +
+          "core-default.xml, mapred-default.xml and hdfs-default.xml " +
+          "respectively");
+    }
+    addDefaultResource("core-default.xml");
+    addDefaultResource("core-site.xml");
+  }
+  
+  private Properties properties;
+  private Properties overlay;
+  private ClassLoader classLoader;
+  {
+    classLoader = Thread.currentThread().getContextClassLoader();
+    if (classLoader == null) {
+      classLoader = Configuration.class.getClassLoader();
+    }
+  }
+  
+  /** A new configuration. */
+  public Configuration() {
+    this(true);
+  }
+
+  /** A new configuration where the behavior of reading from the default 
+   * resources can be turned off.
+   * 
+   * If the parameter {@code loadDefaults} is false, the new instance
+   * will not load resources from the default files. 
+   * @param loadDefaults specifies whether to load from the default files
+   */
+  public Configuration(boolean loadDefaults) {
+    this.loadDefaults = loadDefaults;
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(StringUtils.stringifyException(new IOException("config()")));
+    }
+    synchronized(Configuration.class) {
+      REGISTRY.put(this, null);
+    }
+  }
+  
+  /** 
+   * A new configuration with the same settings cloned from another.
+   * 
+   * @param other the configuration from which to clone settings.
+   */
+  @SuppressWarnings("unchecked")
+  public Configuration(Configuration other) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(StringUtils.stringifyException
+                (new IOException("config(config)")));
+    }
+   
+   this.resources = (ArrayList)other.resources.clone();
+   synchronized(other) {
+     if (other.properties != null) {
+       this.properties = (Properties)other.properties.clone();
+     }
+
+     if (other.overlay!=null) {
+       this.overlay = (Properties)other.overlay.clone();
+     }
+   }
+   
+    this.finalParameters = new HashSet<String>(other.finalParameters);
+    synchronized(Configuration.class) {
+      REGISTRY.put(this, null);
+    }
+  }
+  
+  /**
+   * Add a default resource. Resources are loaded in the order of the resources 
+   * added.
+   * @param name file name. File should be present in the classpath.
+   */
+  public static synchronized void addDefaultResource(String name) {
+    if(!defaultResources.contains(name)) {
+      defaultResources.add(name);
+      for(Configuration conf : REGISTRY.keySet()) {
+        if(conf.loadDefaults) {
+          conf.reloadConfiguration();
+        }
+      }
+    }
+  }
+
+  /**
+   * Add a configuration resource. 
+   * 
+   * The properties of this resource will override properties of previously 
+   * added resources, unless they were marked <a href="#Final">final</a>. 
+   * 
+   * @param name resource to be added, the classpath is examined for a file 
+   *             with that name.
+   */
+  public void addResource(String name) {
+    addResourceObject(name);
+  }
+
+  /**
+   * Add a configuration resource. 
+   * 
+   * The properties of this resource will override properties of previously 
+   * added resources, unless they were marked <a href="#Final">final</a>. 
+   * 
+   * @param url url of the resource to be added, the local filesystem is 
+   *            examined directly to find the resource, without referring to 
+   *            the classpath.
+   */
+  public void addResource(URL url) {
+    addResourceObject(url);
+  }
+
+  /**
+   * Add a configuration resource. 
+   * 
+   * The properties of this resource will override properties of previously 
+   * added resources, unless they were marked <a href="#Final">final</a>. 
+   * 
+   * @param file file-path of resource to be added, the local filesystem is
+   *             examined directly to find the resource, without referring to 
+   *             the classpath.
+   */
+  public void addResource(Path file) {
+    addResourceObject(file);
+  }
+
+  /**
+   * Add a configuration resource. 
+   * 
+   * The properties of this resource will override properties of previously 
+   * added resources, unless they were marked <a href="#Final">final</a>. 
+   * 
+   * @param in InputStream to deserialize the object from. 
+   */
+  public void addResource(InputStream in) {
+    addResourceObject(in);
+  }
+  
+  
+  /**
+   * Reload configuration from previously added resources.
+   *
+   * This method will clear all the configuration read from the added 
+   * resources, and final parameters. This will make the resources to 
+   * be read again before accessing the values. Values that are added
+   * via set methods will overlay values read from the resources.
+   */
+  public synchronized void reloadConfiguration() {
+    properties = null;                            // trigger reload
+    finalParameters.clear();                      // clear site-limits
+  }
+  
+  private synchronized void addResourceObject(Object resource) {
+    resources.add(resource);                      // add to resources
+    reloadConfiguration();
+  }
+  
+  private static Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
+  private static int MAX_SUBST = 20;
+
+  private String substituteVars(String expr) {
+    if (expr == null) {
+      return null;
+    }
+    Matcher match = varPat.matcher("");
+    String eval = expr;
+    for(int s=0; s<MAX_SUBST; s++) {
+      match.reset(eval);
+      if (!match.find()) {
+        return eval;
+      }
+      String var = match.group();
+      var = var.substring(2, var.length()-1); // remove ${ .. }
+      String val = null;
+      try {
+        val = System.getProperty(var);
+      } catch(SecurityException se) {
+        LOG.warn("Unexpected SecurityException in Configuration", se);
+      }
+      if (val == null) {
+        val = getRaw(var);
+      }
+      if (val == null) {
+        return eval; // return literal ${var}: var is unbound
+      }
+      // substitute
+      eval = eval.substring(0, match.start())+val+eval.substring(match.end());
+    }
+    throw new IllegalStateException("Variable substitution depth too large: " 
+                                    + MAX_SUBST + " " + expr);
+  }
+  
+  /**
+   * Get the value of the <code>name</code> property, <code>null</code> if
+   * no such property exists.
+   * 
+   * Values are processed for <a href="#VariableExpansion">variable expansion</a> 
+   * before being returned. 
+   * 
+   * @param name the property name.
+   * @return the value of the <code>name</code> property, 
+   *         or null if no such property exists.
+   */
+  public String get(String name) {
+    return substituteVars(getProps().getProperty(name));
+  }
+
+  /**
+   * Get the value of the <code>name</code> property, without doing
+   * <a href="#VariableExpansion">variable expansion</a>.
+   * 
+   * @param name the property name.
+   * @return the value of the <code>name</code> property, 
+   *         or null if no such property exists.
+   */
+  public String getRaw(String name) {
+    return getProps().getProperty(name);
+  }
+
+  /** 
+   * Set the <code>value</code> of the <code>name</code> property.
+   * 
+   * @param name property name.
+   * @param value property value.
+   */
+  public void set(String name, String value) {
+    getOverlay().setProperty(name, value);
+    getProps().setProperty(name, value);
+  }
+  
+  /**
+   * Sets a property if it is currently unset.
+   * @param name the property name
+   * @param value the new value
+   */
+  public void setIfUnset(String name, String value) {
+    if (get(name) == null) {
+      set(name, value);
+    }
+  }
+  
+  private synchronized Properties getOverlay() {
+    if (overlay==null){
+      overlay=new Properties();
+    }
+    return overlay;
+  }
+
+  /** 
+   * Get the value of the <code>name</code> property. If no such property 
+   * exists, then <code>defaultValue</code> is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue default value.
+   * @return property value, or <code>defaultValue</code> if the property 
+   *         doesn't exist.                    
+   */
+  public String get(String name, String defaultValue) {
+    return substituteVars(getProps().getProperty(name, defaultValue));
+  }
+    
+  /** 
+   * Get the value of the <code>name</code> property as an <code>int</code>.
+   *   
+   * If no such property exists, or if the specified value is not a valid
+   * <code>int</code>, then <code>defaultValue</code> is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue default value.
+   * @return property value as an <code>int</code>, 
+   *         or <code>defaultValue</code>. 
+   */
+  public int getInt(String name, int defaultValue) {
+    String valueString = get(name);
+    if (valueString == null)
+      return defaultValue;
+    try {
+      String hexString = getHexDigits(valueString);
+      if (hexString != null) {
+        return Integer.parseInt(hexString, 16);
+      }
+      return Integer.parseInt(valueString);
+    } catch (NumberFormatException e) {
+      return defaultValue;
+    }
+  }
+
+  /** 
+   * Set the value of the <code>name</code> property to an <code>int</code>.
+   * 
+   * @param name property name.
+   * @param value <code>int</code> value of the property.
+   */
+  public void setInt(String name, int value) {
+    set(name, Integer.toString(value));
+  }
+
+
+  /** 
+   * Get the value of the <code>name</code> property as a <code>long</code>.  
+   * If no such property is specified, or if the specified value is not a valid
+   * <code>long</code>, then <code>defaultValue</code> is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue default value.
+   * @return property value as a <code>long</code>, 
+   *         or <code>defaultValue</code>. 
+   */
+  public long getLong(String name, long defaultValue) {
+    String valueString = get(name);
+    if (valueString == null)
+      return defaultValue;
+    try {
+      String hexString = getHexDigits(valueString);
+      if (hexString != null) {
+        return Long.parseLong(hexString, 16);
+      }
+      return Long.parseLong(valueString);
+    } catch (NumberFormatException e) {
+      return defaultValue;
+    }
+  }
+
+  private String getHexDigits(String value) {
+    boolean negative = false;
+    String str = value;
+    String hexString = null;
+    if (value.startsWith("-")) {
+      negative = true;
+      str = value.substring(1);
+    }
+    if (str.startsWith("0x") || str.startsWith("0X")) {
+      hexString = str.substring(2);
+      if (negative) {
+        hexString = "-" + hexString;
+      }
+      return hexString;
+    }
+    return null;
+  }
+  
+  /** 
+   * Set the value of the <code>name</code> property to a <code>long</code>.
+   * 
+   * @param name property name.
+   * @param value <code>long</code> value of the property.
+   */
+  public void setLong(String name, long value) {
+    set(name, Long.toString(value));
+  }
+
+  /** 
+   * Get the value of the <code>name</code> property as a <code>float</code>.  
+   * If no such property is specified, or if the specified value is not a valid
+   * <code>float</code>, then <code>defaultValue</code> is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue default value.
+   * @return property value as a <code>float</code>, 
+   *         or <code>defaultValue</code>. 
+   */
+  public float getFloat(String name, float defaultValue) {
+    String valueString = get(name);
+    if (valueString == null)
+      return defaultValue;
+    try {
+      return Float.parseFloat(valueString);
+    } catch (NumberFormatException e) {
+      return defaultValue;
+    }
+  }
+  /**
+   * Set the value of the <code>name</code> property to a <code>float</code>.
+   * 
+   * @param name property name.
+   * @param value property value.
+   */
+  public void setFloat(String name, float value) {
+    set(name,Float.toString(value));
+  }
+ 
+  /** 
+   * Get the value of the <code>name</code> property as a <code>boolean</code>.  
+   * If no such property is specified, or if the specified value is not a valid
+   * <code>boolean</code>, then <code>defaultValue</code> is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue default value.
+   * @return property value as a <code>boolean</code>, 
+   *         or <code>defaultValue</code>. 
+   */
+  public boolean getBoolean(String name, boolean defaultValue) {
+    String valueString = get(name);
+    if ("true".equals(valueString))
+      return true;
+    else if ("false".equals(valueString))
+      return false;
+    else return defaultValue;
+  }
+
+  /** 
+   * Set the value of the <code>name</code> property to a <code>boolean</code>.
+   * 
+   * @param name property name.
+   * @param value <code>boolean</code> value of the property.
+   */
+  public void setBoolean(String name, boolean value) {
+    set(name, Boolean.toString(value));
+  }
+
+  /**
+   * Set the given property, if it is currently unset.
+   * @param name property name
+   * @param value new value
+   */
+  public void setBooleanIfUnset(String name, boolean value) {
+    setIfUnset(name, Boolean.toString(value));
+  }
+
+  /**
+   * A class that represents a set of positive integer ranges. It parses 
+   * strings of the form: "2-3,5,7-" where ranges are separated by comma and 
+   * the lower/upper bounds are separated by dash. Either the lower or upper 
+   * bound may be omitted meaning all values up to or over. So the string 
+   * above means 2, 3, 5, and 7, 8, 9, ...
+   */
+  public static class IntegerRanges {
+    private static class Range {
+      int start;
+      int end;
+    }
+
+    List<Range> ranges = new ArrayList<Range>();
+    
+    public IntegerRanges() {
+    }
+    
+    public IntegerRanges(String newValue) {
+      StringTokenizer itr = new StringTokenizer(newValue, ",");
+      while (itr.hasMoreTokens()) {
+        String rng = itr.nextToken().trim();
+        String[] parts = rng.split("-", 3);
+        if (parts.length < 1 || parts.length > 2) {
+          throw new IllegalArgumentException("integer range badly formed: " + 
+                                             rng);
+        }
+        Range r = new Range();
+        r.start = convertToInt(parts[0], 0);
+        if (parts.length == 2) {
+          r.end = convertToInt(parts[1], Integer.MAX_VALUE);
+        } else {
+          r.end = r.start;
+        }
+        if (r.start > r.end) {
+          throw new IllegalArgumentException("IntegerRange from " + r.start + 
+                                             " to " + r.end + " is invalid");
+        }
+        ranges.add(r);
+      }
+    }
+
+    /**
+     * Convert a string to an int treating empty strings as the default value.
+     * @param value the string value
+     * @param defaultValue the value for if the string is empty
+     * @return the desired integer
+     */
+    private static int convertToInt(String value, int defaultValue) {
+      String trim = value.trim();
+      if (trim.length() == 0) {
+        return defaultValue;
+      }
+      return Integer.parseInt(trim);
+    }
+
+    /**
+     * Is the given value in the set of ranges
+     * @param value the value to check
+     * @return is the value in the ranges?
+     */
+    public boolean isIncluded(int value) {
+      for(Range r: ranges) {
+        if (r.start <= value && value <= r.end) {
+          return true;
+        }
+      }
+      return false;
+    }
+    
+    @Override
+    public String toString() {
+      StringBuffer result = new StringBuffer();
+      boolean first = true;
+      for(Range r: ranges) {
+        if (first) {
+          first = false;
+        } else {
+          result.append(',');
+        }
+        result.append(r.start);
+        result.append('-');
+        result.append(r.end);
+      }
+      return result.toString();
+    }
+  }
+
+  /**
+   * Parse the given attribute as a set of integer ranges
+   * @param name the attribute name
+   * @param defaultValue the default value if it is not set
+   * @return a new set of ranges from the configured value
+   */
+  public IntegerRanges getRange(String name, String defaultValue) {
+    return new IntegerRanges(get(name, defaultValue));
+  }
+
+  /** 
+   * Get the comma delimited values of the <code>name</code> property as 
+   * a collection of <code>String</code>s.  
+   * If no such property is specified then empty collection is returned.
+   * <p>
+   * This is an optimized version of {@link #getStrings(String)}
+   * 
+   * @param name property name.
+   * @return property value as a collection of <code>String</code>s. 
+   */
+  public Collection<String> getStringCollection(String name) {
+    String valueString = get(name);
+    return StringUtils.getStringCollection(valueString);
+  }
+
+  /** 
+   * Get the comma delimited values of the <code>name</code> property as 
+   * an array of <code>String</code>s.  
+   * If no such property is specified then <code>null</code> is returned.
+   * 
+   * @param name property name.
+   * @return property value as an array of <code>String</code>s, 
+   *         or <code>null</code>. 
+   */
+  public String[] getStrings(String name) {
+    String valueString = get(name);
+    return StringUtils.getStrings(valueString);
+  }
+
+  /** 
+   * Get the comma delimited values of the <code>name</code> property as 
+   * an array of <code>String</code>s.  
+   * If no such property is specified then default value is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue The default value
+   * @return property value as an array of <code>String</code>s, 
+   *         or default value. 
+   */
+  public String[] getStrings(String name, String... defaultValue) {
+    String valueString = get(name);
+    if (valueString == null) {
+      return defaultValue;
+    } else {
+      return StringUtils.getStrings(valueString);
+    }
+  }
+
+  /** 
+   * Set the array of string values for the <code>name</code> property as 
+   * as comma delimited values.  
+   * 
+   * @param name property name.
+   * @param values The values
+   */
+  public void setStrings(String name, String... values) {
+    set(name, StringUtils.arrayToString(values));
+  }
+ 
+  /**
+   * Load a class by name.
+   * 
+   * @param name the class name.
+   * @return the class object.
+   * @throws ClassNotFoundException if the class is not found.
+   */
+  public Class<?> getClassByName(String name) throws ClassNotFoundException {
+    return Class.forName(name, true, classLoader);
+  }
+
+  /** 
+   * Get the value of the <code>name</code> property
+   * as an array of <code>Class</code>.
+   * The value of the property specifies a list of comma separated class names.  
+   * If no such property is specified, then <code>defaultValue</code> is 
+   * returned.
+   * 
+   * @param name the property name.
+   * @param defaultValue default value.
+   * @return property value as a <code>Class[]</code>, 
+   *         or <code>defaultValue</code>. 
+   */
+  public Class<?>[] getClasses(String name, Class<?> ... defaultValue) {
+    String[] classnames = getStrings(name);
+    if (classnames == null)
+      return defaultValue;
+    try {
+      Class<?>[] classes = new Class<?>[classnames.length];
+      for(int i = 0; i < classnames.length; i++) {
+        classes[i] = getClassByName(classnames[i]);
+      }
+      return classes;
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /** 
+   * Get the value of the <code>name</code> property as a <code>Class</code>.  
+   * If no such property is specified, then <code>defaultValue</code> is 
+   * returned.
+   * 
+   * @param name the class name.
+   * @param defaultValue default value.
+   * @return property value as a <code>Class</code>, 
+   *         or <code>defaultValue</code>. 
+   */
+  public Class<?> getClass(String name, Class<?> defaultValue) {
+    String valueString = get(name);
+    if (valueString == null)
+      return defaultValue;
+    try {
+      return getClassByName(valueString);
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /** 
+   * Get the value of the <code>name</code> property as a <code>Class</code>
+   * implementing the interface specified by <code>xface</code>.
+   *   
+   * If no such property is specified, then <code>defaultValue</code> is 
+   * returned.
+   * 
+   * An exception is thrown if the returned class does not implement the named
+   * interface. 
+   * 
+   * @param name the class name.
+   * @param defaultValue default value.
+   * @param xface the interface implemented by the named class.
+   * @return property value as a <code>Class</code>, 
+   *         or <code>defaultValue</code>.
+   */
+  public <U> Class<? extends U> getClass(String name, 
+                                         Class<? extends U> defaultValue, 
+                                         Class<U> xface) {
+    try {
+      Class<?> theClass = getClass(name, defaultValue);
+      if (theClass != null && !xface.isAssignableFrom(theClass))
+        throw new RuntimeException(theClass+" not "+xface.getName());
+      else if (theClass != null)
+        return theClass.asSubclass(xface);
+      else
+        return null;
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * Get the value of the <code>name</code> property as a <code>List</code>
+   * of objects implementing the interface specified by <code>xface</code>.
+   * 
+   * An exception is thrown if any of the classes does not exist, or if it does
+   * not implement the named interface.
+   * 
+   * @param name the property name.
+   * @param xface the interface implemented by the classes named by
+   *        <code>name</code>.
+   * @return a <code>List</code> of objects implementing <code>xface</code>.
+   */
+  @SuppressWarnings("unchecked")
+  public <U> List<U> getInstances(String name, Class<U> xface) {
+    List<U> ret = new ArrayList<U>();
+    Class<?>[] classes = getClasses(name);
+    for (Class<?> cl: classes) {
+      if (!xface.isAssignableFrom(cl)) {
+        throw new RuntimeException(cl + " does not implement " + xface);
+      }
+      ret.add((U)ReflectionUtils.newInstance(cl, this));
+    }
+    return ret;
+  }
+
+  /** 
+   * Set the value of the <code>name</code> property to the name of a 
+   * <code>theClass</code> implementing the given interface <code>xface</code>.
+   * 
+   * An exception is thrown if <code>theClass</code> does not implement the 
+   * interface <code>xface</code>. 
+   * 
+   * @param name property name.
+   * @param theClass property value.
+   * @param xface the interface implemented by the named class.
+   */
+  public void setClass(String name, Class<?> theClass, Class<?> xface) {
+    if (!xface.isAssignableFrom(theClass))
+      throw new RuntimeException(theClass+" not "+xface.getName());
+    set(name, theClass.getName());
+  }
+
+  /** 
+   * Get a local file under a directory named by <i>dirsProp</i> with
+   * the given <i>path</i>.  If <i>dirsProp</i> contains multiple directories,
+   * then one is chosen based on <i>path</i>'s hash code.  If the selected
+   * directory does not exist, an attempt is made to create it.
+   * 
+   * @param dirsProp directory in which to locate the file.
+   * @param path file-path.
+   * @return local file under the directory with the given path.
+   */
+  public Path getLocalPath(String dirsProp, String path)
+    throws IOException {
+    String[] dirs = getStrings(dirsProp);
+    int hashCode = path.hashCode();
+    FileSystem fs = FileSystem.getLocal(this);
+    for (int i = 0; i < dirs.length; i++) {  // try each local dir
+      int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length;
+      Path file = new Path(dirs[index], path);
+      Path dir = file.getParent();
+      if (fs.mkdirs(dir) || fs.exists(dir)) {
+        return file;
+      }
+    }
+    LOG.warn("Could not make " + path + 
+             " in local directories from " + dirsProp);
+    for(int i=0; i < dirs.length; i++) {
+      int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length;
+      LOG.warn(dirsProp + "[" + index + "]=" + dirs[index]);
+    }
+    throw new IOException("No valid local directories in property: "+dirsProp);
+  }
+
+  /** 
+   * Get a local file name under a directory named in <i>dirsProp</i> with
+   * the given <i>path</i>.  If <i>dirsProp</i> contains multiple directories,
+   * then one is chosen based on <i>path</i>'s hash code.  If the selected
+   * directory does not exist, an attempt is made to create it.
+   * 
+   * @param dirsProp directory in which to locate the file.
+   * @param path file-path.
+   * @return local file under the directory with the given path.
+   */
+  public File getFile(String dirsProp, String path)
+    throws IOException {
+    String[] dirs = getStrings(dirsProp);
+    int hashCode = path.hashCode();
+    for (int i = 0; i < dirs.length; i++) {  // try each local dir
+      int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length;
+      File file = new File(dirs[index], path);
+      File dir = file.getParentFile();
+      if (dir.exists() || dir.mkdirs()) {
+        return file;
+      }
+    }
+    throw new IOException("No valid local directories in property: "+dirsProp);
+  }
+
+  /** 
+   * Get the {@link URL} for the named resource.
+   * 
+   * @param name resource name.
+   * @return the url for the named resource.
+   */
+  public URL getResource(String name) {
+    return classLoader.getResource(name);
+  }
+  
+  /** 
+   * Get an input stream attached to the configuration resource with the
+   * given <code>name</code>.
+   * 
+   * @param name configuration resource name.
+   * @return an input stream attached to the resource.
+   */
+  public InputStream getConfResourceAsInputStream(String name) {
+    try {
+      URL url= getResource(name);
+
+      if (url == null) {
+        LOG.info(name + " not found");
+        return null;
+      } else {
+        LOG.info("found resource " + name + " at " + url);
+      }
+
+      return url.openStream();
+    } catch (Exception e) {
+      return null;
+    }
+  }
+
+  /** 
+   * Get a {@link Reader} attached to the configuration resource with the
+   * given <code>name</code>.
+   * 
+   * @param name configuration resource name.
+   * @return a reader attached to the resource.
+   */
+  public Reader getConfResourceAsReader(String name) {
+    try {
+      URL url= getResource(name);
+
+      if (url == null) {
+        LOG.info(name + " not found");
+        return null;
+      } else {
+        LOG.info("found resource " + name + " at " + url);
+      }
+
+      return new InputStreamReader(url.openStream());
+    } catch (Exception e) {
+      return null;
+    }
+  }
+
+  protected synchronized Properties getProps() {
+    if (properties == null) {
+      properties = new Properties();
+      loadResources(properties, resources, quietmode);
+      if (overlay!= null)
+        properties.putAll(overlay);
+    }
+    return properties;
+  }
+
+  /**
+   * Return the number of keys in the configuration.
+   *
+   * @return number of keys in the configuration.
+   */
+  public int size() {
+    return getProps().size();
+  }
+
+  /**
+   * Clears all keys from the configuration.
+   */
+  public void clear() {
+    getProps().clear();
+    getOverlay().clear();
+  }
+
+  /**
+   * Get an {@link Iterator} to go through the list of <code>String</code> 
+   * key-value pairs in the configuration.
+   * 
+   * @return an iterator over the entries.
+   */
+  public Iterator<Map.Entry<String, String>> iterator() {
+    // Get a copy of just the string to string pairs. After the old object
+    // methods that allow non-strings to be put into configurations are removed,
+    // we could replace properties with a Map<String,String> and get rid of this
+    // code.
+    Map<String,String> result = new HashMap<String,String>();
+    for(Map.Entry<Object,Object> item: getProps().entrySet()) {
+      if (item.getKey() instanceof String && 
+          item.getValue() instanceof String) {
+        result.put((String) item.getKey(), (String) item.getValue());
+      }
+    }
+    return result.entrySet().iterator();
+  }
+
+  private void loadResources(Properties properties,
+                             ArrayList resources,
+                             boolean quiet) {
+    if(loadDefaults) {
+      for (String resource : defaultResources) {
+        loadResource(properties, resource, quiet);
+      }
+    
+      //support the hadoop-site.xml as a deprecated case
+      if(getResource("hadoop-site.xml")!=null) {
+        loadResource(properties, "hadoop-site.xml", quiet);
+      }
+    }
+    
+    for (Object resource : resources) {
+      loadResource(properties, resource, quiet);
+    }
+  }
+
+  private void loadResource(Properties properties, Object name, boolean quiet) {
+    try {
+      DocumentBuilderFactory docBuilderFactory 
+        = DocumentBuilderFactory.newInstance();
+      //ignore all comments inside the xml file
+      docBuilderFactory.setIgnoringComments(true);
+
+      //allow includes in the xml file
+      docBuilderFactory.setNamespaceAware(true);
+      try {
+          docBuilderFactory.setXIncludeAware(true);
+      } catch (UnsupportedOperationException e) {
+        LOG.error("Failed to set setXIncludeAware(true) for parser "
+                + docBuilderFactory
+                + ":" + e,
+                e);
+      }
+      DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
+      Document doc = null;
+      Element root = null;
+
+      if (name instanceof URL) {                  // an URL resource
+        URL url = (URL)name;
+        if (url != null) {
+          if (!quiet) {
+            LOG.info("parsing " + url);
+          }
+          doc = builder.parse(url.toString());
+        }
+      } else if (name instanceof String) {        // a CLASSPATH resource
+        URL url = getResource((String)name);
+        if (url != null) {
+          if (!quiet) {
+            LOG.info("parsing " + url);
+          }
+          doc = builder.parse(url.toString());
+        }
+      } else if (name instanceof Path) {          // a file resource
+        // Can't use FileSystem API or we get an infinite loop
+        // since FileSystem uses Configuration API.  Use java.io.File instead.
+        File file = new File(((Path)name).toUri().getPath())
+          .getAbsoluteFile();
+        if (file.exists()) {
+          if (!quiet) {
+            LOG.info("parsing " + file);
+          }
+          InputStream in = new BufferedInputStream(new FileInputStream(file));
+          try {
+            doc = builder.parse(in);
+          } finally {
+            in.close();
+          }
+        }
+      } else if (name instanceof InputStream) {
+        try {
+          doc = builder.parse((InputStream)name);
+        } finally {
+          ((InputStream)name).close();
+        }
+      } else if (name instanceof Element) {
+        root = (Element)name;
+      }
+
+      if (doc == null && root == null) {
+        if (quiet)
+          return;
+        throw new RuntimeException(name + " not found");
+      }
+
+      if (root == null) {
+        root = doc.getDocumentElement();
+      }
+      if (!"configuration".equals(root.getTagName()))
+        LOG.fatal("bad conf file: top-level element not <configuration>");
+      NodeList props = root.getChildNodes();
+      for (int i = 0; i < props.getLength(); i++) {
+        Node propNode = props.item(i);
+        if (!(propNode instanceof Element))
+          continue;
+        Element prop = (Element)propNode;
+        if ("configuration".equals(prop.getTagName())) {
+          loadResource(properties, prop, quiet);
+          continue;
+        }
+        if (!"property".equals(prop.getTagName()))
+          LOG.warn("bad conf file: element not <property>");
+        NodeList fields = prop.getChildNodes();
+        String attr = null;
+        String value = null;
+        boolean finalParameter = false;
+        for (int j = 0; j < fields.getLength(); j++) {
+          Node fieldNode = fields.item(j);
+          if (!(fieldNode instanceof Element))
+            continue;
+          Element field = (Element)fieldNode;
+          if ("name".equals(field.getTagName()) && field.hasChildNodes())
+            attr = ((Text)field.getFirstChild()).getData().trim();
+          if ("value".equals(field.getTagName()) && field.hasChildNodes())
+            value = ((Text)field.getFirstChild()).getData();
+          if ("final".equals(field.getTagName()) && field.hasChildNodes())
+            finalParameter = "true".equals(((Text)field.getFirstChild()).getData());
+        }
+        
+        // Ignore this parameter if it has already been marked as 'final'
+        if (attr != null && value != null) {
+          if (!finalParameters.contains(attr)) {
+            properties.setProperty(attr, value);
+            if (finalParameter)
+              finalParameters.add(attr);
+          } else {
+            LOG.warn(name+":a attempt to override final parameter: "+attr
+                     +";  Ignoring.");
+          }
+        }
+      }
+        
+    } catch (IOException e) {
+      LOG.fatal("error parsing conf file: " + e);
+      throw new RuntimeException(e);
+    } catch (DOMException e) {
+      LOG.fatal("error parsing conf file: " + e);
+      throw new RuntimeException(e);
+    } catch (SAXException e) {
+      LOG.fatal("error parsing conf file: " + e);
+      throw new RuntimeException(e);
+    } catch (ParserConfigurationException e) {
+      LOG.fatal("error parsing conf file: " + e);
+      throw new RuntimeException(e);
+    }
+  }
+
+  /** 
+   * Write out the non-default properties in this configuration to the give
+   * {@link OutputStream}.
+   * 
+   * @param out the output stream to write to.
+   */
+  public void writeXml(OutputStream out) throws IOException {
+    Properties properties = getProps();
+    try {
+      Document doc =
+        DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
+      Element conf = doc.createElement("configuration");
+      doc.appendChild(conf);
+      conf.appendChild(doc.createTextNode("\n"));
+      for (Enumeration e = properties.keys(); e.hasMoreElements();) {
+        String name = (String)e.nextElement();
+        Object object = properties.get(name);
+        String value = null;
+        if (object instanceof String) {
+          value = (String) object;
+        }else {
+          continue;
+        }
+        Element propNode = doc.createElement("property");
+        conf.appendChild(propNode);
+      
+        Element nameNode = doc.createElement("name");
+        nameNode.appendChild(doc.createTextNode(name));
+        propNode.appendChild(nameNode);
+      
+        Element valueNode = doc.createElement("value");
+        valueNode.appendChild(doc.createTextNode(value));
+        propNode.appendChild(valueNode);
+
+        conf.appendChild(doc.createTextNode("\n"));
+      }
+    
+      DOMSource source = new DOMSource(doc);
+      StreamResult result = new StreamResult(out);
+      TransformerFactory transFactory = TransformerFactory.newInstance();
+      Transformer transformer = transFactory.newTransformer();
+      transformer.transform(source, result);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * Get the {@link ClassLoader} for this job.
+   * 
+   * @return the correct class loader.
+   */
+  public ClassLoader getClassLoader() {
+    return classLoader;
+  }
+  
+  /**
+   * Set the class loader that will be used to load the various objects.
+   * 
+   * @param classLoader the new class loader.
+   */
+  public void setClassLoader(ClassLoader classLoader) {
+    this.classLoader = classLoader;
+  }
+  
+  @Override
+  public String toString() {
+    StringBuffer sb = new StringBuffer();
+    sb.append("Configuration: ");
+    if(loadDefaults) {
+      toString(defaultResources, sb);
+      if(resources.size()>0) {
+        sb.append(", ");
+      }
+    }
+    toString(resources, sb);
+    return sb.toString();
+  }
+
+  private void toString(ArrayList resources, StringBuffer sb) {
+    ListIterator i = resources.listIterator();
+    while (i.hasNext()) {
+      if (i.nextIndex() != 0) {
+        sb.append(", ");
+      }
+      sb.append(i.next());
+    }
+  }
+
+  /** 
+   * Set the quietness-mode. 
+   * 
+   * In the quiet-mode, error and informational messages might not be logged.
+   * 
+   * @param quietmode <code>true</code> to set quiet-mode on, <code>false</code>
+   *              to turn it off.
+   */
+  public synchronized void setQuietMode(boolean quietmode) {
+    this.quietmode = quietmode;
+  }
+
+  /** For debugging.  List non-default properties to the terminal and exit. */
+  public static void main(String[] args) throws Exception {
+    new Configuration().writeXml(System.out);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    clear();
+    int size = WritableUtils.readVInt(in);
+    for(int i=0; i < size; ++i) {
+      set(org.apache.hadoop.io.Text.readString(in), 
+          org.apache.hadoop.io.Text.readString(in));
+    }
+  }
+
+  //@Override
+  public void write(DataOutput out) throws IOException {
+    Properties props = getProps();
+    WritableUtils.writeVInt(out, props.size());
+    for(Map.Entry<Object, Object> item: props.entrySet()) {
+      org.apache.hadoop.io.Text.writeString(out, (String) item.getKey());
+      org.apache.hadoop.io.Text.writeString(out, (String) item.getValue());
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/conf/Configured.java b/src/java/org/apache/hadoop/conf/Configured.java
new file mode 100644
index 00000000000..cd5604e981d
--- /dev/null
+++ b/src/java/org/apache/hadoop/conf/Configured.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.conf;
+
+/** Base class for things that may be configured with a {@link Configuration}. */
+public class Configured implements Configurable {
+
+  private Configuration conf;
+
+  /** Construct a Configured. */
+  public Configured() {
+    this(null);
+  }
+  
+  /** Construct a Configured. */
+  public Configured(Configuration conf) {
+    setConf(conf);
+  }
+
+  // inherit javadoc
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  // inherit javadoc
+  public Configuration getConf() {
+    return conf;
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/conf/package.html b/src/java/org/apache/hadoop/conf/package.html
new file mode 100644
index 00000000000..0be80bed5f9
--- /dev/null
+++ b/src/java/org/apache/hadoop/conf/package.html
@@ -0,0 +1,23 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+Configuration of system parameters.
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/filecache/DistributedCache.java b/src/java/org/apache/hadoop/filecache/DistributedCache.java
new file mode 100644
index 00000000000..9d4a8f9a426
--- /dev/null
+++ b/src/java/org/apache/hadoop/filecache/DistributedCache.java
@@ -0,0 +1,879 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.filecache;
+
+import org.apache.commons.logging.*;
+import java.io.*;
+import java.util.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.*;
+import org.apache.hadoop.fs.*;
+
+import java.net.URI;
+
+/**
+ * Distribute application-specific large, read-only files efficiently.
+ * 
+ * <p><code>DistributedCache</code> is a facility provided by the Map-Reduce
+ * framework to cache files (text, archives, jars etc.) needed by applications.
+ * </p>
+ * 
+ * <p>Applications specify the files, via urls (hdfs:// or http://) to be cached 
+ * via the {@link org.apache.hadoop.mapred.JobConf}.
+ * The <code>DistributedCache</code> assumes that the
+ * files specified via hdfs:// urls are already present on the 
+ * {@link FileSystem} at the path specified by the url.</p>
+ * 
+ * <p>The framework will copy the necessary files on to the slave node before 
+ * any tasks for the job are executed on that node. Its efficiency stems from 
+ * the fact that the files are only copied once per job and the ability to 
+ * cache archives which are un-archived on the slaves.</p> 
+ *
+ * <p><code>DistributedCache</code> can be used to distribute simple, read-only
+ * data/text files and/or more complex types such as archives, jars etc. 
+ * Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. 
+ * Jars may be optionally added to the classpath of the tasks, a rudimentary 
+ * software distribution mechanism.  Files have execution permissions.
+ * Optionally users can also direct it to symlink the distributed cache file(s)
+ * into the working directory of the task.</p>
+ * 
+ * <p><code>DistributedCache</code> tracks modification timestamps of the cache 
+ * files. Clearly the cache files should not be modified by the application 
+ * or externally while the job is executing.</p>
+ * 
+ * <p>Here is an illustrative example on how to use the 
+ * <code>DistributedCache</code>:</p>
+ * <p><blockquote><pre>
+ *     // Setting up the cache for the application
+ *     
+ *     1. Copy the requisite files to the <code>FileSystem</code>:
+ *     
+ *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
+ *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
+ *     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
+ *     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
+ *     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
+ *     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
+ *     
+ *     2. Setup the application's <code>JobConf</code>:
+ *     
+ *     JobConf job = new JobConf();
+ *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
+ *                                   job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
+ *     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
+ *     
+ *     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
+ *     or {@link org.apache.hadoop.mapred.Reducer}:
+ *     
+ *     public static class MapClass extends MapReduceBase  
+ *     implements Mapper&lt;K, V, K, V&gt; {
+ *     
+ *       private Path[] localArchives;
+ *       private Path[] localFiles;
+ *       
+ *       public void configure(JobConf job) {
+ *         // Get the cached archives/files
+ *         localArchives = DistributedCache.getLocalCacheArchives(job);
+ *         localFiles = DistributedCache.getLocalCacheFiles(job);
+ *       }
+ *       
+ *       public void map(K key, V value, 
+ *                       OutputCollector&lt;K, V&gt; output, Reporter reporter) 
+ *       throws IOException {
+ *         // Use data from the cached archives/files here
+ *         // ...
+ *         // ...
+ *         output.collect(k, v);
+ *       }
+ *     }
+ *     
+ * </pre></blockquote></p>
+ * 
+ * @see org.apache.hadoop.mapred.JobConf
+ * @see org.apache.hadoop.mapred.JobClient
+ */
+public class DistributedCache {
+  // cacheID to cacheStatus mapping
+  private static TreeMap<String, CacheStatus> cachedArchives = new TreeMap<String, CacheStatus>();
+  
+  private static TreeMap<Path, Long> baseDirSize = new TreeMap<Path, Long>();
+  
+  // default total cache size
+  private static final long DEFAULT_CACHE_SIZE = 10737418240L;
+
+  private static final Log LOG =
+    LogFactory.getLog(DistributedCache.class);
+  
+  /**
+   * Get the locally cached file or archive; it could either be 
+   * previously cached (and valid) or copy it from the {@link FileSystem} now.
+   * 
+   * @param cache the cache to be localized, this should be specified as 
+   * new URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema 
+   * or hostname:port is provided the file is assumed to be in the filesystem
+   * being used in the Configuration
+   * @param conf The Confguration file which contains the filesystem
+   * @param baseDir The base cache Dir where you wnat to localize the files/archives
+   * @param fileStatus The file status on the dfs.
+   * @param isArchive if the cache is an archive or a file. In case it is an
+   *  archive with a .zip or .jar or .tar or .tgz or .tar.gz extension it will
+   *  be unzipped/unjarred/untarred automatically 
+   *  and the directory where the archive is unzipped/unjarred/untarred is
+   *  returned as the Path.
+   *  In case of a file, the path to the file is returned
+   * @param confFileStamp this is the hdfs file modification timestamp to verify that the 
+   * file to be cached hasn't changed since the job started
+   * @param currentWorkDir this is the directory where you would want to create symlinks 
+   * for the locally cached files/archives
+   * @return the path to directory where the archives are unjarred in case of archives,
+   * the path to the file where the file is copied locally 
+   * @throws IOException
+   */
+  public static Path getLocalCache(URI cache, Configuration conf, 
+                                   Path baseDir, FileStatus fileStatus,
+                                   boolean isArchive, long confFileStamp,
+                                   Path currentWorkDir) 
+  throws IOException {
+    return getLocalCache(cache, conf, baseDir, fileStatus, isArchive, 
+        confFileStamp, currentWorkDir, true);
+  }
+  /**
+   * Get the locally cached file or archive; it could either be 
+   * previously cached (and valid) or copy it from the {@link FileSystem} now.
+   * 
+   * @param cache the cache to be localized, this should be specified as 
+   * new URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema 
+   * or hostname:port is provided the file is assumed to be in the filesystem
+   * being used in the Configuration
+   * @param conf The Confguration file which contains the filesystem
+   * @param baseDir The base cache Dir where you wnat to localize the files/archives
+   * @param fileStatus The file status on the dfs.
+   * @param isArchive if the cache is an archive or a file. In case it is an
+   *  archive with a .zip or .jar or .tar or .tgz or .tar.gz extension it will
+   *  be unzipped/unjarred/untarred automatically 
+   *  and the directory where the archive is unzipped/unjarred/untarred is
+   *  returned as the Path.
+   *  In case of a file, the path to the file is returned
+   * @param confFileStamp this is the hdfs file modification timestamp to verify that the 
+   * file to be cached hasn't changed since the job started
+   * @param currentWorkDir this is the directory where you would want to create symlinks 
+   * for the locally cached files/archives
+   * @param honorSymLinkConf if this is false, then the symlinks are not
+   * created even if conf says so (this is required for an optimization in task
+   * launches
+   * @return the path to directory where the archives are unjarred in case of archives,
+   * the path to the file where the file is copied locally 
+   * @throws IOException
+   */
+  public static Path getLocalCache(URI cache, Configuration conf, 
+      Path baseDir, FileStatus fileStatus,
+      boolean isArchive, long confFileStamp,
+      Path currentWorkDir, boolean honorSymLinkConf) 
+  throws IOException {
+    String cacheId = makeRelative(cache, conf);
+    CacheStatus lcacheStatus;
+    Path localizedPath;
+    synchronized (cachedArchives) {
+      lcacheStatus = cachedArchives.get(cacheId);
+      if (lcacheStatus == null) {
+        // was never localized
+        lcacheStatus = new CacheStatus(baseDir, new Path(baseDir, new Path(cacheId)));
+        cachedArchives.put(cacheId, lcacheStatus);
+      }
+
+      synchronized (lcacheStatus) {
+        localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus, 
+            fileStatus, isArchive, currentWorkDir, honorSymLinkConf);
+        lcacheStatus.refcount++;
+      }
+    }
+
+    // try deleting stuff if you can
+    long size = 0;
+    synchronized (baseDirSize) {
+      Long get = baseDirSize.get(baseDir);
+      if ( get != null ) {
+    	size = get.longValue();
+      }
+    }
+    // setting the cache size to a default of 10GB
+    long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE);
+    if (allowedSize < size) {
+      // try some cache deletions
+      deleteCache(conf);
+    }
+    return localizedPath;
+  }
+
+  
+  /**
+   * Get the locally cached file or archive; it could either be 
+   * previously cached (and valid) or copy it from the {@link FileSystem} now.
+   * 
+   * @param cache the cache to be localized, this should be specified as 
+   * new URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema 
+   * or hostname:port is provided the file is assumed to be in the filesystem
+   * being used in the Configuration
+   * @param conf The Confguration file which contains the filesystem
+   * @param baseDir The base cache Dir where you wnat to localize the files/archives
+   * @param isArchive if the cache is an archive or a file. In case it is an 
+   *  archive with a .zip or .jar or .tar or .tgz or .tar.gz extension it will 
+   *  be unzipped/unjarred/untarred automatically 
+   *  and the directory where the archive is unzipped/unjarred/untarred 
+   *  is returned as the Path.
+   *  In case of a file, the path to the file is returned
+   * @param confFileStamp this is the hdfs file modification timestamp to verify that the 
+   * file to be cached hasn't changed since the job started
+   * @param currentWorkDir this is the directory where you would want to create symlinks 
+   * for the locally cached files/archives
+   * @return the path to directory where the archives are unjarred in case of archives,
+   * the path to the file where the file is copied locally 
+   * @throws IOException
+
+   */
+  public static Path getLocalCache(URI cache, Configuration conf, 
+                                   Path baseDir, boolean isArchive,
+                                   long confFileStamp, Path currentWorkDir) 
+  throws IOException {
+    return getLocalCache(cache, conf, 
+                         baseDir, null, isArchive,
+                         confFileStamp, currentWorkDir);
+  }
+  
+  /**
+   * This is the opposite of getlocalcache. When you are done with
+   * using the cache, you need to release the cache
+   * @param cache The cache URI to be released
+   * @param conf configuration which contains the filesystem the cache 
+   * is contained in.
+   * @throws IOException
+   */
+  public static void releaseCache(URI cache, Configuration conf)
+    throws IOException {
+    String cacheId = makeRelative(cache, conf);
+    synchronized (cachedArchives) {
+      CacheStatus lcacheStatus = cachedArchives.get(cacheId);
+      if (lcacheStatus == null)
+        return;
+      synchronized (lcacheStatus) {
+        lcacheStatus.refcount--;
+      }
+    }
+  }
+  
+  // To delete the caches which have a refcount of zero
+  
+  private static void deleteCache(Configuration conf) throws IOException {
+    // try deleting cache Status with refcount of zero
+    synchronized (cachedArchives) {
+      for (Iterator it = cachedArchives.keySet().iterator(); it.hasNext();) {
+        String cacheId = (String) it.next();
+        CacheStatus lcacheStatus = cachedArchives.get(cacheId);
+        synchronized (lcacheStatus) {
+          if (lcacheStatus.refcount == 0) {
+            // delete this cache entry
+            FileSystem.getLocal(conf).delete(lcacheStatus.localLoadPath, true);
+            synchronized (baseDirSize) {
+              Long dirSize = baseDirSize.get(lcacheStatus.baseDir);
+              if ( dirSize != null ) {
+            	dirSize -= lcacheStatus.size;
+            	baseDirSize.put(lcacheStatus.baseDir, dirSize);
+              }
+            }
+            it.remove();
+          }
+        }
+      }
+    }
+  }
+
+  /*
+   * Returns the relative path of the dir this cache will be localized in
+   * relative path that this cache will be localized in. For
+   * hdfs://hostname:port/absolute_path -- the relative path is
+   * hostname/absolute path -- if it is just /absolute_path -- then the
+   * relative path is hostname of DFS this mapred cluster is running
+   * on/absolute_path
+   */
+  public static String makeRelative(URI cache, Configuration conf)
+    throws IOException {
+    String host = cache.getHost();
+    if (host == null) {
+      host = cache.getScheme();
+    }
+    if (host == null) {
+      URI defaultUri = FileSystem.get(conf).getUri();
+      host = defaultUri.getHost();
+      if (host == null) {
+        host = defaultUri.getScheme();
+      }
+    }
+    String path = host + cache.getPath();
+    path = path.replace(":/","/");                // remove windows device colon
+    return path;
+  }
+
+  private static Path cacheFilePath(Path p) {
+    return new Path(p, p.getName());
+  }
+
+  // the method which actually copies the caches locally and unjars/unzips them
+  // and does chmod for the files
+  private static Path localizeCache(Configuration conf, 
+                                    URI cache, long confFileStamp,
+                                    CacheStatus cacheStatus,
+                                    FileStatus fileStatus,
+                                    boolean isArchive, 
+                                    Path currentWorkDir,boolean honorSymLinkConf) 
+  throws IOException {
+    boolean doSymlink = honorSymLinkConf && getSymlink(conf);
+    if(cache.getFragment() == null) {
+    	doSymlink = false;
+    }
+    FileSystem fs = getFileSystem(cache, conf);
+    String link = currentWorkDir.toString() + Path.SEPARATOR + cache.getFragment();
+    File flink = new File(link);
+    if (ifExistsAndFresh(conf, fs, cache, confFileStamp,
+                           cacheStatus, fileStatus)) {
+      if (isArchive) {
+        if (doSymlink){
+          if (!flink.exists())
+            FileUtil.symLink(cacheStatus.localLoadPath.toString(), 
+                             link);
+        }
+        return cacheStatus.localLoadPath;
+      }
+      else {
+        if (doSymlink){
+          if (!flink.exists())
+            FileUtil.symLink(cacheFilePath(cacheStatus.localLoadPath).toString(), 
+                             link);
+        }
+        return cacheFilePath(cacheStatus.localLoadPath);
+      }
+    } else {
+      // remove the old archive
+      // if the old archive cannot be removed since it is being used by another
+      // job
+      // return null
+      if (cacheStatus.refcount > 1 && (cacheStatus.currentStatus == true))
+        throw new IOException("Cache " + cacheStatus.localLoadPath.toString()
+                              + " is in use and cannot be refreshed");
+      
+      FileSystem localFs = FileSystem.getLocal(conf);
+      localFs.delete(cacheStatus.localLoadPath, true);
+      synchronized (baseDirSize) {
+    	Long dirSize = baseDirSize.get(cacheStatus.baseDir);
+    	if ( dirSize != null ) {
+    	  dirSize -= cacheStatus.size;
+    	  baseDirSize.put(cacheStatus.baseDir, dirSize);
+    	}
+      }
+      Path parchive = new Path(cacheStatus.localLoadPath,
+                               new Path(cacheStatus.localLoadPath.getName()));
+      
+      if (!localFs.mkdirs(cacheStatus.localLoadPath)) {
+        throw new IOException("Mkdirs failed to create directory " + 
+                              cacheStatus.localLoadPath.toString());
+      }
+
+      String cacheId = cache.getPath();
+      fs.copyToLocalFile(new Path(cacheId), parchive);
+      if (isArchive) {
+        String tmpArchive = parchive.toString().toLowerCase();
+        File srcFile = new File(parchive.toString());
+        File destDir = new File(parchive.getParent().toString());
+        if (tmpArchive.endsWith(".jar")) {
+          RunJar.unJar(srcFile, destDir);
+        } else if (tmpArchive.endsWith(".zip")) {
+          FileUtil.unZip(srcFile, destDir);
+        } else if (isTarFile(tmpArchive)) {
+          FileUtil.unTar(srcFile, destDir);
+        }
+        // else will not do anyhting
+        // and copy the file into the dir as it is
+      }
+      
+      long cacheSize = FileUtil.getDU(new File(parchive.getParent().toString()));
+      cacheStatus.size = cacheSize;
+      synchronized (baseDirSize) {
+      	Long dirSize = baseDirSize.get(cacheStatus.baseDir);
+      	if( dirSize == null ) {
+      	  dirSize = Long.valueOf(cacheSize);
+      	} else {
+      	  dirSize += cacheSize;
+      	}
+      	baseDirSize.put(cacheStatus.baseDir, dirSize);
+      }
+      
+      // do chmod here 
+      try {
+        //Setting recursive permission to grant everyone read and execute
+        FileUtil.chmod(cacheStatus.baseDir.toString(), "ugo+rx",true);
+      } catch(InterruptedException e) {
+    	LOG.warn("Exception in chmod" + e.toString());
+      }
+
+      // update cacheStatus to reflect the newly cached file
+      cacheStatus.currentStatus = true;
+      cacheStatus.mtime = getTimestamp(conf, cache);
+    }
+    
+    if (isArchive){
+      if (doSymlink){
+        if (!flink.exists())
+          FileUtil.symLink(cacheStatus.localLoadPath.toString(), 
+                           link);
+      }
+      return cacheStatus.localLoadPath;
+    }
+    else {
+      if (doSymlink){
+        if (!flink.exists())
+          FileUtil.symLink(cacheFilePath(cacheStatus.localLoadPath).toString(), 
+                           link);
+      }
+      return cacheFilePath(cacheStatus.localLoadPath);
+    }
+  }
+
+  private static boolean isTarFile(String filename) {
+    return (filename.endsWith(".tgz") || filename.endsWith(".tar.gz") ||
+           filename.endsWith(".tar"));
+  }
+  
+  // Checks if the cache has already been localized and is fresh
+  private static boolean ifExistsAndFresh(Configuration conf, FileSystem fs, 
+                                          URI cache, long confFileStamp, 
+                                          CacheStatus lcacheStatus,
+                                          FileStatus fileStatus) 
+  throws IOException {
+    // check for existence of the cache
+    if (lcacheStatus.currentStatus == false) {
+      return false;
+    } else {
+      long dfsFileStamp;
+      if (fileStatus != null) {
+        dfsFileStamp = fileStatus.getModificationTime();
+      } else {
+        dfsFileStamp = getTimestamp(conf, cache);
+      }
+
+      // ensure that the file on hdfs hasn't been modified since the job started 
+      if (dfsFileStamp != confFileStamp) {
+        LOG.fatal("File: " + cache + " has changed on HDFS since job started");
+        throw new IOException("File: " + cache + 
+                              " has changed on HDFS since job started");
+      }
+      
+      if (dfsFileStamp != lcacheStatus.mtime) {
+        // needs refreshing
+        return false;
+      }
+    }
+    
+    return true;
+  }
+
+  /**
+   * Returns mtime of a given cache file on hdfs.
+   * @param conf configuration
+   * @param cache cache file 
+   * @return mtime of a given cache file on hdfs
+   * @throws IOException
+   */
+  public static long getTimestamp(Configuration conf, URI cache)
+    throws IOException {
+    FileSystem fileSystem = FileSystem.get(cache, conf);
+    Path filePath = new Path(cache.getPath());
+
+    return fileSystem.getFileStatus(filePath).getModificationTime();
+  }
+  
+  /**
+   * This method create symlinks for all files in a given dir in another directory
+   * @param conf the configuration
+   * @param jobCacheDir the target directory for creating symlinks
+   * @param workDir the directory in which the symlinks are created
+   * @throws IOException
+   */
+  public static void createAllSymlink(Configuration conf, File jobCacheDir, File workDir)
+    throws IOException{
+    if ((jobCacheDir == null || !jobCacheDir.isDirectory()) ||
+           workDir == null || (!workDir.isDirectory())) {
+      return;
+    }
+    boolean createSymlink = getSymlink(conf);
+    if (createSymlink){
+      File[] list = jobCacheDir.listFiles();
+      for (int i=0; i < list.length; i++){
+        FileUtil.symLink(list[i].getAbsolutePath(),
+                         new File(workDir, list[i].getName()).toString());
+      }
+    }  
+  }
+  
+  private static FileSystem getFileSystem(URI cache, Configuration conf)
+    throws IOException {
+    if ("hdfs".equals(cache.getScheme()))
+      return FileSystem.get(cache, conf);
+    else
+      return FileSystem.get(conf);
+  }
+
+  /**
+   * Set the configuration with the given set of archives
+   * @param archives The list of archives that need to be localized
+   * @param conf Configuration which will be changed
+   */
+  public static void setCacheArchives(URI[] archives, Configuration conf) {
+    String sarchives = StringUtils.uriToString(archives);
+    conf.set("mapred.cache.archives", sarchives);
+  }
+
+  /**
+   * Set the configuration with the given set of files
+   * @param files The list of files that need to be localized
+   * @param conf Configuration which will be changed
+   */
+  public static void setCacheFiles(URI[] files, Configuration conf) {
+    String sfiles = StringUtils.uriToString(files);
+    conf.set("mapred.cache.files", sfiles);
+  }
+
+  /**
+   * Get cache archives set in the Configuration
+   * @param conf The configuration which contains the archives
+   * @return A URI array of the caches set in the Configuration
+   * @throws IOException
+   */
+  public static URI[] getCacheArchives(Configuration conf) throws IOException {
+    return StringUtils.stringToURI(conf.getStrings("mapred.cache.archives"));
+  }
+
+  /**
+   * Get cache files set in the Configuration
+   * @param conf The configuration which contains the files
+   * @return A URI array of the files set in the Configuration
+   * @throws IOException
+   */
+
+  public static URI[] getCacheFiles(Configuration conf) throws IOException {
+    return StringUtils.stringToURI(conf.getStrings("mapred.cache.files"));
+  }
+
+  /**
+   * Return the path array of the localized caches
+   * @param conf Configuration that contains the localized archives
+   * @return A path array of localized caches
+   * @throws IOException
+   */
+  public static Path[] getLocalCacheArchives(Configuration conf)
+    throws IOException {
+    return StringUtils.stringToPath(conf
+                                    .getStrings("mapred.cache.localArchives"));
+  }
+
+  /**
+   * Return the path array of the localized files
+   * @param conf Configuration that contains the localized files
+   * @return A path array of localized files
+   * @throws IOException
+   */
+  public static Path[] getLocalCacheFiles(Configuration conf)
+    throws IOException {
+    return StringUtils.stringToPath(conf.getStrings("mapred.cache.localFiles"));
+  }
+
+  /**
+   * Get the timestamps of the archives
+   * @param conf The configuration which stored the timestamps
+   * @return a string array of timestamps 
+   * @throws IOException
+   */
+  public static String[] getArchiveTimestamps(Configuration conf) {
+    return conf.getStrings("mapred.cache.archives.timestamps");
+  }
+
+
+  /**
+   * Get the timestamps of the files
+   * @param conf The configuration which stored the timestamps
+   * @return a string array of timestamps 
+   * @throws IOException
+   */
+  public static String[] getFileTimestamps(Configuration conf) {
+    return conf.getStrings("mapred.cache.files.timestamps");
+  }
+
+  /**
+   * This is to check the timestamp of the archives to be localized
+   * @param conf Configuration which stores the timestamp's
+   * @param timestamps comma separated list of timestamps of archives.
+   * The order should be the same as the order in which the archives are added.
+   */
+  public static void setArchiveTimestamps(Configuration conf, String timestamps) {
+    conf.set("mapred.cache.archives.timestamps", timestamps);
+  }
+
+  /**
+   * This is to check the timestamp of the files to be localized
+   * @param conf Configuration which stores the timestamp's
+   * @param timestamps comma separated list of timestamps of files.
+   * The order should be the same as the order in which the files are added.
+   */
+  public static void setFileTimestamps(Configuration conf, String timestamps) {
+    conf.set("mapred.cache.files.timestamps", timestamps);
+  }
+  
+  /**
+   * Set the conf to contain the location for localized archives 
+   * @param conf The conf to modify to contain the localized caches
+   * @param str a comma separated list of local archives
+   */
+  public static void setLocalArchives(Configuration conf, String str) {
+    conf.set("mapred.cache.localArchives", str);
+  }
+
+  /**
+   * Set the conf to contain the location for localized files 
+   * @param conf The conf to modify to contain the localized caches
+   * @param str a comma separated list of local files
+   */
+  public static void setLocalFiles(Configuration conf, String str) {
+    conf.set("mapred.cache.localFiles", str);
+  }
+
+  /**
+   * Add a archives to be localized to the conf
+   * @param uri The uri of the cache to be localized
+   * @param conf Configuration to add the cache to
+   */
+  public static void addCacheArchive(URI uri, Configuration conf) {
+    String archives = conf.get("mapred.cache.archives");
+    conf.set("mapred.cache.archives", archives == null ? uri.toString()
+             : archives + "," + uri.toString());
+  }
+  
+  /**
+   * Add a file to be localized to the conf
+   * @param uri The uri of the cache to be localized
+   * @param conf Configuration to add the cache to
+   */
+  public static void addCacheFile(URI uri, Configuration conf) {
+    String files = conf.get("mapred.cache.files");
+    conf.set("mapred.cache.files", files == null ? uri.toString() : files + ","
+             + uri.toString());
+  }
+
+  /**
+   * Add an file path to the current set of classpath entries It adds the file
+   * to cache as well.
+   * 
+   * @param file Path of the file to be added
+   * @param conf Configuration that contains the classpath setting
+   */
+  public static void addFileToClassPath(Path file, Configuration conf)
+    throws IOException {
+    String classpath = conf.get("mapred.job.classpath.files");
+    conf.set("mapred.job.classpath.files", classpath == null ? file.toString()
+             : classpath + System.getProperty("path.separator") + file.toString());
+    FileSystem fs = FileSystem.get(conf);
+    URI uri = fs.makeQualified(file).toUri();
+
+    addCacheFile(uri, conf);
+  }
+
+  /**
+   * Get the file entries in classpath as an array of Path
+   * 
+   * @param conf Configuration that contains the classpath setting
+   */
+  public static Path[] getFileClassPaths(Configuration conf) {
+    String classpath = conf.get("mapred.job.classpath.files");
+    if (classpath == null)
+      return null;
+    ArrayList list = Collections.list(new StringTokenizer(classpath, System
+                                                          .getProperty("path.separator")));
+    Path[] paths = new Path[list.size()];
+    for (int i = 0; i < list.size(); i++) {
+      paths[i] = new Path((String) list.get(i));
+    }
+    return paths;
+  }
+
+  /**
+   * Add an archive path to the current set of classpath entries. It adds the
+   * archive to cache as well.
+   * 
+   * @param archive Path of the archive to be added
+   * @param conf Configuration that contains the classpath setting
+   */
+  public static void addArchiveToClassPath(Path archive, Configuration conf)
+    throws IOException {
+    String classpath = conf.get("mapred.job.classpath.archives");
+    conf.set("mapred.job.classpath.archives", classpath == null ? archive
+             .toString() : classpath + System.getProperty("path.separator")
+             + archive.toString());
+    FileSystem fs = FileSystem.get(conf);
+    URI uri = fs.makeQualified(archive).toUri();
+
+    addCacheArchive(uri, conf);
+  }
+
+  /**
+   * Get the archive entries in classpath as an array of Path
+   * 
+   * @param conf Configuration that contains the classpath setting
+   */
+  public static Path[] getArchiveClassPaths(Configuration conf) {
+    String classpath = conf.get("mapred.job.classpath.archives");
+    if (classpath == null)
+      return null;
+    ArrayList list = Collections.list(new StringTokenizer(classpath, System
+                                                          .getProperty("path.separator")));
+    Path[] paths = new Path[list.size()];
+    for (int i = 0; i < list.size(); i++) {
+      paths[i] = new Path((String) list.get(i));
+    }
+    return paths;
+  }
+
+  /**
+   * This method allows you to create symlinks in the current working directory
+   * of the task to all the cache files/archives
+   * @param conf the jobconf 
+   */
+  public static void createSymlink(Configuration conf){
+    conf.set("mapred.create.symlink", "yes");
+  }
+  
+  /**
+   * This method checks to see if symlinks are to be create for the 
+   * localized cache files in the current working directory 
+   * @param conf the jobconf
+   * @return true if symlinks are to be created- else return false
+   */
+  public static boolean getSymlink(Configuration conf){
+    String result = conf.get("mapred.create.symlink");
+    if ("yes".equals(result)){
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * This method checks if there is a conflict in the fragment names 
+   * of the uris. Also makes sure that each uri has a fragment. It 
+   * is only to be called if you want to create symlinks for 
+   * the various archives and files.
+   * @param uriFiles The uri array of urifiles
+   * @param uriArchives the uri array of uri archives
+   */
+  public static boolean checkURIs(URI[]  uriFiles, URI[] uriArchives){
+    if ((uriFiles == null) && (uriArchives == null)){
+      return true;
+    }
+    if (uriFiles != null){
+      for (int i = 0; i < uriFiles.length; i++){
+        String frag1 = uriFiles[i].getFragment();
+        if (frag1 == null)
+          return false;
+        for (int j=i+1; j < uriFiles.length; j++){
+          String frag2 = uriFiles[j].getFragment();
+          if (frag2 == null)
+            return false;
+          if (frag1.equalsIgnoreCase(frag2))
+            return false;
+        }
+        if (uriArchives != null){
+          for (int j = 0; j < uriArchives.length; j++){
+            String frag2 = uriArchives[j].getFragment();
+            if (frag2 == null){
+              return false;
+            }
+            if (frag1.equalsIgnoreCase(frag2))
+              return false;
+            for (int k=j+1; k < uriArchives.length; k++){
+              String frag3 = uriArchives[k].getFragment();
+              if (frag3 == null)
+                return false;
+              if (frag2.equalsIgnoreCase(frag3))
+                return false;
+            }
+          }
+        }
+      }
+    }
+    return true;
+  }
+
+  private static class CacheStatus {
+    // false, not loaded yet, true is loaded
+    boolean currentStatus;
+
+    // the local load path of this cache
+    Path localLoadPath;
+    
+    //the base dir where the cache lies
+    Path baseDir;
+    
+    //the size of this cache
+    long size;
+
+    // number of instances using this cache
+    int refcount;
+
+    // the cache-file modification time
+    long mtime;
+
+    public CacheStatus(Path baseDir, Path localLoadPath) {
+      super();
+      this.currentStatus = false;
+      this.localLoadPath = localLoadPath;
+      this.refcount = 0;
+      this.mtime = -1;
+      this.baseDir = baseDir;
+      this.size = 0;
+    }
+  }
+
+  /**
+   * Clear the entire contents of the cache and delete the backing files. This
+   * should only be used when the server is reinitializing, because the users
+   * are going to lose their files.
+   */
+  public static void purgeCache(Configuration conf) throws IOException {
+    synchronized (cachedArchives) {
+      FileSystem localFs = FileSystem.getLocal(conf);
+      for (Map.Entry<String,CacheStatus> f: cachedArchives.entrySet()) {
+        try {
+          localFs.delete(f.getValue().localLoadPath, true);
+        } catch (IOException ie) {
+          LOG.debug("Error cleaning up cache", ie);
+        }
+      }
+      cachedArchives.clear();
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/BlockLocation.java b/src/java/org/apache/hadoop/fs/BlockLocation.java
new file mode 100644
index 00000000000..8fb24a2fb30
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/BlockLocation.java
@@ -0,0 +1,241 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import org.apache.hadoop.io.*;
+
+import java.io.*;
+
+/*
+ * A BlockLocation lists hosts, offset and length
+ * of block. 
+ * 
+ */
+public class BlockLocation implements Writable {
+
+  static {               // register a ctor
+    WritableFactories.setFactory
+      (BlockLocation.class,
+       new WritableFactory() {
+         public Writable newInstance() { return new BlockLocation(); }
+       });
+  }
+
+  private String[] hosts; //hostnames of datanodes
+  private String[] names; //hostname:portNumber of datanodes
+  private String[] topologyPaths; // full path name in network topology
+  private long offset;  //offset of the of the block in the file
+  private long length;
+
+  /**
+   * Default Constructor
+   */
+  public BlockLocation() {
+    this(new String[0], new String[0],  0L, 0L);
+  }
+
+  /**
+   * Constructor with host, name, offset and length
+   */
+  public BlockLocation(String[] names, String[] hosts, long offset, 
+                       long length) {
+    if (names == null) {
+      this.names = new String[0];
+    } else {
+      this.names = names;
+    }
+    if (hosts == null) {
+      this.hosts = new String[0];
+    } else {
+      this.hosts = hosts;
+    }
+    this.offset = offset;
+    this.length = length;
+    this.topologyPaths = new String[0];
+  }
+
+  /**
+   * Constructor with host, name, network topology, offset and length
+   */
+  public BlockLocation(String[] names, String[] hosts, String[] topologyPaths,
+                       long offset, long length) {
+    this(names, hosts, offset, length);
+    if (topologyPaths == null) {
+      this.topologyPaths = new String[0];
+    } else {
+      this.topologyPaths = topologyPaths;
+    }
+  }
+
+  /**
+   * Get the list of hosts (hostname) hosting this block
+   */
+  public String[] getHosts() throws IOException {
+    if ((hosts == null) || (hosts.length == 0)) {
+      return new String[0];
+    } else {
+      return hosts;
+    }
+  }
+
+  /**
+   * Get the list of names (hostname:port) hosting this block
+   */
+  public String[] getNames() throws IOException {
+    if ((names == null) || (names.length == 0)) {
+      return new String[0];
+    } else {
+      return this.names;
+    }
+  }
+
+  /**
+   * Get the list of network topology paths for each of the hosts.
+   * The last component of the path is the host.
+   */
+  public String[] getTopologyPaths() throws IOException {
+    if ((topologyPaths == null) || (topologyPaths.length == 0)) {
+      return new String[0];
+    } else {
+      return this.topologyPaths;
+    }
+  }
+  
+  /**
+   * Get the start offset of file associated with this block
+   */
+  public long getOffset() {
+    return offset;
+  }
+  
+  /**
+   * Get the length of the block
+   */
+  public long getLength() {
+    return length;
+  }
+  
+  /**
+   * Set the start offset of file associated with this block
+   */
+  public void setOffset(long offset) {
+    this.offset = offset;
+  }
+
+  /**
+   * Set the length of block
+   */
+  public void setLength(long length) {
+    this.length = length;
+  }
+
+  /**
+   * Set the hosts hosting this block
+   */
+  public void setHosts(String[] hosts) throws IOException {
+    if (hosts == null) {
+      this.hosts = new String[0];
+    } else {
+      this.hosts = hosts;
+    }
+  }
+
+  /**
+   * Set the names (host:port) hosting this block
+   */
+  public void setNames(String[] names) throws IOException {
+    if (names == null) {
+      this.names = new String[0];
+    } else {
+      this.names = names;
+    }
+  }
+
+  /**
+   * Set the network topology paths of the hosts
+   */
+  public void setTopologyPaths(String[] topologyPaths) throws IOException {
+    if (topologyPaths == null) {
+      this.topologyPaths = new String[0];
+    } else {
+      this.topologyPaths = topologyPaths;
+    }
+  }
+
+  /**
+   * Implement write of Writable
+   */
+  public void write(DataOutput out) throws IOException {
+    out.writeLong(offset);
+    out.writeLong(length);
+    out.writeInt(names.length);
+    for (int i=0; i < names.length; i++) {
+      Text name = new Text(names[i]);
+      name.write(out);
+    }
+    out.writeInt(hosts.length);
+    for (int i=0; i < hosts.length; i++) {
+      Text host = new Text(hosts[i]);
+      host.write(out);
+    }
+    out.writeInt(topologyPaths.length);
+    for (int i=0; i < topologyPaths.length; i++) {
+      Text host = new Text(topologyPaths[i]);
+      host.write(out);
+    }
+  }
+  
+  /**
+   * Implement readFields of Writable
+   */
+  public void readFields(DataInput in) throws IOException {
+    this.offset = in.readLong();
+    this.length = in.readLong();
+    int numNames = in.readInt();
+    this.names = new String[numNames];
+    for (int i = 0; i < numNames; i++) {
+      Text name = new Text();
+      name.readFields(in);
+      names[i] = name.toString();
+    }
+    int numHosts = in.readInt();
+    for (int i = 0; i < numHosts; i++) {
+      Text host = new Text();
+      host.readFields(in);
+      hosts[i] = host.toString();
+    }
+    int numTops = in.readInt();
+    Text path = new Text();
+    for (int i = 0; i < numTops; i++) {
+      path.readFields(in);
+      topologyPaths[i] = path.toString();
+    }
+  }
+  
+  public String toString() {
+    StringBuilder result = new StringBuilder();
+    result.append(offset);
+    result.append(',');
+    result.append(length);
+    for(String h: hosts) {
+      result.append(',');
+      result.append(h);
+    }
+    return result.toString();
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/BufferedFSInputStream.java b/src/java/org/apache/hadoop/fs/BufferedFSInputStream.java
new file mode 100644
index 00000000000..f682d969e40
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/BufferedFSInputStream.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+
+
+/**
+ * A class optimizes reading from FSInputStream by bufferring
+ */
+
+
+public class BufferedFSInputStream extends BufferedInputStream
+implements Seekable, PositionedReadable {
+  /**
+   * Creates a <code>BufferedFSInputStream</code>
+   * with the specified buffer size,
+   * and saves its  argument, the input stream
+   * <code>in</code>, for later use.  An internal
+   * buffer array of length  <code>size</code>
+   * is created and stored in <code>buf</code>.
+   *
+   * @param   in     the underlying input stream.
+   * @param   size   the buffer size.
+   * @exception IllegalArgumentException if size <= 0.
+   */
+  public BufferedFSInputStream(FSInputStream in, int size) {
+    super(in, size);
+  }
+
+  public long getPos() throws IOException {
+    return ((FSInputStream)in).getPos()-(count-pos);
+  }
+
+  public long skip(long n) throws IOException {
+    if (n <= 0) {
+      return 0;
+    }
+
+    seek(getPos()+n);
+    return n;
+  }
+
+  public void seek(long pos) throws IOException {
+    if( pos<0 ) {
+      return;
+    }
+    // optimize: check if the pos is in the buffer
+    long end = ((FSInputStream)in).getPos();
+    long start = end - count;
+    if( pos>=start && pos<end) {
+      this.pos = (int)(pos-start);
+      return;
+    }
+
+    // invalidate buffer
+    this.pos = 0;
+    this.count = 0;
+
+    ((FSInputStream)in).seek(pos);
+  }
+
+  public boolean seekToNewSource(long targetPos) throws IOException {
+    pos = 0;
+    count = 0;
+    return ((FSInputStream)in).seekToNewSource(targetPos);
+  }
+
+  public int read(long position, byte[] buffer, int offset, int length) throws IOException {
+    return ((FSInputStream)in).read(position, buffer, offset, length) ;
+  }
+
+  public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
+    ((FSInputStream)in).readFully(position, buffer, offset, length);
+  }
+
+  public void readFully(long position, byte[] buffer) throws IOException {
+    ((FSInputStream)in).readFully(position, buffer);
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/ChecksumException.java b/src/java/org/apache/hadoop/fs/ChecksumException.java
new file mode 100644
index 00000000000..1304da9ac3f
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/ChecksumException.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+
+/** Thrown for checksum errors. */
+public class ChecksumException extends IOException {
+  private static final long serialVersionUID = 1L;
+  private long pos;
+  public ChecksumException(String description, long pos) {
+    super(description);
+    this.pos = pos;
+  }
+  
+  public long getPos() {
+    return pos;
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java b/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java
new file mode 100644
index 00000000000..72a09bd75f2
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/ChecksumFileSystem.java
@@ -0,0 +1,547 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.*;
+import java.util.Arrays;
+import java.util.zip.CRC32;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.StringUtils;
+
+/****************************************************************
+ * Abstract Checksumed FileSystem.
+ * It provide a basice implementation of a Checksumed FileSystem,
+ * which creates a checksum file for each raw file.
+ * It generates & verifies checksums at the client side.
+ *
+ *****************************************************************/
+public abstract class ChecksumFileSystem extends FilterFileSystem {
+  private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0};
+  private int bytesPerChecksum = 512;
+  private boolean verifyChecksum = true;
+
+  public static double getApproxChkSumLength(long size) {
+    return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size;
+  }
+  
+  public ChecksumFileSystem(FileSystem fs) {
+    super(fs);
+  }
+
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    if (conf != null) {
+      bytesPerChecksum = conf.getInt("io.bytes.per.checksum", 512);
+    }
+  }
+  
+  /**
+   * Set whether to verify checksum.
+   */
+  public void setVerifyChecksum(boolean verifyChecksum) {
+    this.verifyChecksum = verifyChecksum;
+  }
+
+  /** get the raw file system */
+  public FileSystem getRawFileSystem() {
+    return fs;
+  }
+
+  /** Return the name of the checksum file associated with a file.*/
+  public Path getChecksumFile(Path file) {
+    return new Path(file.getParent(), "." + file.getName() + ".crc");
+  }
+
+  /** Return true iff file is a checksum file name.*/
+  public static boolean isChecksumFile(Path file) {
+    String name = file.getName();
+    return name.startsWith(".") && name.endsWith(".crc");
+  }
+
+  /** Return the length of the checksum file given the size of the 
+   * actual file.
+   **/
+  public long getChecksumFileLength(Path file, long fileSize) {
+    return getChecksumLength(fileSize, getBytesPerSum());
+  }
+
+  /** Return the bytes Per Checksum */
+  public int getBytesPerSum() {
+    return bytesPerChecksum;
+  }
+
+  private int getSumBufferSize(int bytesPerSum, int bufferSize) {
+    int defaultBufferSize = getConf().getInt("io.file.buffer.size", 4096);
+    int proportionalBufferSize = bufferSize / bytesPerSum;
+    return Math.max(bytesPerSum,
+                    Math.max(proportionalBufferSize, defaultBufferSize));
+  }
+
+  /*******************************************************
+   * For open()'s FSInputStream
+   * It verifies that data matches checksums.
+   *******************************************************/
+  private static class ChecksumFSInputChecker extends FSInputChecker {
+    public static final Log LOG 
+      = LogFactory.getLog(FSInputChecker.class);
+    
+    private ChecksumFileSystem fs;
+    private FSDataInputStream datas;
+    private FSDataInputStream sums;
+    
+    private static final int HEADER_LENGTH = 8;
+    
+    private int bytesPerSum = 1;
+    private long fileLen = -1L;
+    
+    public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file)
+      throws IOException {
+      this(fs, file, fs.getConf().getInt("io.file.buffer.size", 4096));
+    }
+    
+    public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize)
+      throws IOException {
+      super( file, fs.getFileStatus(file).getReplication() );
+      this.datas = fs.getRawFileSystem().open(file, bufferSize);
+      this.fs = fs;
+      Path sumFile = fs.getChecksumFile(file);
+      try {
+        int sumBufferSize = fs.getSumBufferSize(fs.getBytesPerSum(), bufferSize);
+        sums = fs.getRawFileSystem().open(sumFile, sumBufferSize);
+
+        byte[] version = new byte[CHECKSUM_VERSION.length];
+        sums.readFully(version);
+        if (!Arrays.equals(version, CHECKSUM_VERSION))
+          throw new IOException("Not a checksum file: "+sumFile);
+        this.bytesPerSum = sums.readInt();
+        set(fs.verifyChecksum, new CRC32(), bytesPerSum, 4);
+      } catch (FileNotFoundException e) {         // quietly ignore
+        set(fs.verifyChecksum, null, 1, 0);
+      } catch (IOException e) {                   // loudly ignore
+        LOG.warn("Problem opening checksum file: "+ file + 
+                 ".  Ignoring exception: " + 
+                 StringUtils.stringifyException(e));
+        set(fs.verifyChecksum, null, 1, 0);
+      }
+    }
+    
+    private long getChecksumFilePos( long dataPos ) {
+      return HEADER_LENGTH + 4*(dataPos/bytesPerSum);
+    }
+    
+    protected long getChunkPosition( long dataPos ) {
+      return dataPos/bytesPerSum*bytesPerSum;
+    }
+    
+    public int available() throws IOException {
+      return datas.available() + super.available();
+    }
+    
+    public int read(long position, byte[] b, int off, int len)
+      throws IOException {
+      // parameter check
+      if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
+        throw new IndexOutOfBoundsException();
+      } else if (len == 0) {
+        return 0;
+      }
+      if( position<0 ) {
+        throw new IllegalArgumentException(
+            "Parameter position can not to be negative");
+      }
+
+      ChecksumFSInputChecker checker = new ChecksumFSInputChecker(fs, file);
+      checker.seek(position);
+      int nread = checker.read(b, off, len);
+      checker.close();
+      return nread;
+    }
+    
+    public void close() throws IOException {
+      datas.close();
+      if( sums != null ) {
+        sums.close();
+      }
+      set(fs.verifyChecksum, null, 1, 0);
+    }
+    
+
+    @Override
+    public boolean seekToNewSource(long targetPos) throws IOException {
+      long sumsPos = getChecksumFilePos(targetPos);
+      fs.reportChecksumFailure(file, datas, targetPos, sums, sumsPos);
+      boolean newDataSource = datas.seekToNewSource(targetPos);
+      return sums.seekToNewSource(sumsPos) || newDataSource;
+    }
+
+    @Override
+    protected int readChunk(long pos, byte[] buf, int offset, int len,
+        byte[] checksum) throws IOException {
+      boolean eof = false;
+      if(needChecksum()) {
+        try {
+          long checksumPos = getChecksumFilePos(pos); 
+          if(checksumPos != sums.getPos()) {
+            sums.seek(checksumPos);
+          }
+          sums.readFully(checksum);
+        } catch (EOFException e) {
+          eof = true;
+        }
+        len = bytesPerSum;
+      }
+      if(pos != datas.getPos()) {
+        datas.seek(pos);
+      }
+      int nread = readFully(datas, buf, offset, len);
+      if( eof && nread > 0) {
+        throw new ChecksumException("Checksum error: "+file+" at "+pos, pos);
+      }
+      return nread;
+    }
+    
+    /* Return the file length */
+    private long getFileLength() throws IOException {
+      if( fileLen==-1L ) {
+        fileLen = fs.getContentSummary(file).getLength();
+      }
+      return fileLen;
+    }
+    
+    /**
+     * Skips over and discards <code>n</code> bytes of data from the
+     * input stream.
+     *
+     *The <code>skip</code> method skips over some smaller number of bytes
+     * when reaching end of file before <code>n</code> bytes have been skipped.
+     * The actual number of bytes skipped is returned.  If <code>n</code> is
+     * negative, no bytes are skipped.
+     *
+     * @param      n   the number of bytes to be skipped.
+     * @return     the actual number of bytes skipped.
+     * @exception  IOException  if an I/O error occurs.
+     *             ChecksumException if the chunk to skip to is corrupted
+     */
+    public synchronized long skip(long n) throws IOException {
+      long curPos = getPos();
+      long fileLength = getFileLength();
+      if( n+curPos > fileLength ) {
+        n = fileLength - curPos;
+      }
+      return super.skip(n);
+    }
+    
+    /**
+     * Seek to the given position in the stream.
+     * The next read() will be from that position.
+     * 
+     * <p>This method does not allow seek past the end of the file.
+     * This produces IOException.
+     *
+     * @param      pos   the postion to seek to.
+     * @exception  IOException  if an I/O error occurs or seeks after EOF
+     *             ChecksumException if the chunk to seek to is corrupted
+     */
+
+    public synchronized void seek(long pos) throws IOException {
+      if(pos>getFileLength()) {
+        throw new IOException("Cannot seek after EOF");
+      }
+      super.seek(pos);
+    }
+
+  }
+
+  /**
+   * Opens an FSDataInputStream at the indicated Path.
+   * @param f the file name to open
+   * @param bufferSize the size of the buffer to be used.
+   */
+  @Override
+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    return new FSDataInputStream(
+        new ChecksumFSInputChecker(this, f, bufferSize));
+  }
+
+  /** {@inheritDoc} */
+  public FSDataOutputStream append(Path f, int bufferSize,
+      Progressable progress) throws IOException {
+    throw new IOException("Not supported");
+  }
+
+  /**
+   * Calculated the length of the checksum file in bytes.
+   * @param size the length of the data file in bytes
+   * @param bytesPerSum the number of bytes in a checksum block
+   * @return the number of bytes in the checksum file
+   */
+  public static long getChecksumLength(long size, int bytesPerSum) {
+    //the checksum length is equal to size passed divided by bytesPerSum +
+    //bytes written in the beginning of the checksum file.  
+    return ((size + bytesPerSum - 1) / bytesPerSum) * 4 +
+             CHECKSUM_VERSION.length + 4;  
+  }
+
+  /** This class provides an output stream for a checksummed file.
+   * It generates checksums for data. */
+  private static class ChecksumFSOutputSummer extends FSOutputSummer {
+    private FSDataOutputStream datas;    
+    private FSDataOutputStream sums;
+    private static final float CHKSUM_AS_FRACTION = 0.01f;
+    
+    public ChecksumFSOutputSummer(ChecksumFileSystem fs, 
+                          Path file, 
+                          boolean overwrite, 
+                          short replication,
+                          long blockSize,
+                          Configuration conf)
+      throws IOException {
+      this(fs, file, overwrite, 
+           conf.getInt("io.file.buffer.size", 4096),
+           replication, blockSize, null);
+    }
+    
+    public ChecksumFSOutputSummer(ChecksumFileSystem fs, 
+                          Path file, 
+                          boolean overwrite,
+                          int bufferSize,
+                          short replication,
+                          long blockSize,
+                          Progressable progress)
+      throws IOException {
+      super(new CRC32(), fs.getBytesPerSum(), 4);
+      int bytesPerSum = fs.getBytesPerSum();
+      this.datas = fs.getRawFileSystem().create(file, overwrite, bufferSize, 
+                                         replication, blockSize, progress);
+      int sumBufferSize = fs.getSumBufferSize(bytesPerSum, bufferSize);
+      this.sums = fs.getRawFileSystem().create(fs.getChecksumFile(file), true, 
+                                               sumBufferSize, replication,
+                                               blockSize);
+      sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length);
+      sums.writeInt(bytesPerSum);
+    }
+    
+    public void close() throws IOException {
+      flushBuffer();
+      sums.close();
+      datas.close();
+    }
+    
+    @Override
+    protected void writeChunk(byte[] b, int offset, int len, byte[] checksum)
+    throws IOException {
+      datas.write(b, offset, len);
+      sums.write(checksum);
+    }
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public FSDataOutputStream create(Path f, FsPermission permission,
+      boolean overwrite, int bufferSize, short replication, long blockSize,
+      Progressable progress) throws IOException {
+    Path parent = f.getParent();
+    if (parent != null && !mkdirs(parent)) {
+      throw new IOException("Mkdirs failed to create " + parent);
+    }
+    final FSDataOutputStream out = new FSDataOutputStream(
+        new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
+            blockSize, progress), null);
+    if (permission != null) {
+      setPermission(f, permission);
+    }
+    return out;
+  }
+
+  /**
+   * Set replication for an existing file.
+   * Implement the abstract <tt>setReplication</tt> of <tt>FileSystem</tt>
+   * @param src file name
+   * @param replication new replication
+   * @throws IOException
+   * @return true if successful;
+   *         false if file does not exist or is a directory
+   */
+  public boolean setReplication(Path src, short replication) throws IOException {
+    boolean value = fs.setReplication(src, replication);
+    if (!value)
+      return false;
+
+    Path checkFile = getChecksumFile(src);
+    if (exists(checkFile))
+      fs.setReplication(checkFile, replication);
+
+    return true;
+  }
+
+  /**
+   * Rename files/dirs
+   */
+  public boolean rename(Path src, Path dst) throws IOException {
+    if (fs.isDirectory(src)) {
+      return fs.rename(src, dst);
+    } else {
+
+      boolean value = fs.rename(src, dst);
+      if (!value)
+        return false;
+
+      Path checkFile = getChecksumFile(src);
+      if (fs.exists(checkFile)) { //try to rename checksum
+        if (fs.isDirectory(dst)) {
+          value = fs.rename(checkFile, dst);
+        } else {
+          value = fs.rename(checkFile, getChecksumFile(dst));
+        }
+      }
+
+      return value;
+    }
+  }
+
+  /**
+   * Implement the delete(Path, boolean) in checksum
+   * file system.
+   */
+  public boolean delete(Path f, boolean recursive) throws IOException{
+    FileStatus fstatus = null;
+    try {
+      fstatus = fs.getFileStatus(f);
+    } catch(FileNotFoundException e) {
+      return false;
+    }
+    if(fstatus.isDir()) {
+      //this works since the crcs are in the same
+      //directories and the files. so we just delete
+      //everything in the underlying filesystem
+      return fs.delete(f, recursive);
+    } else {
+      Path checkFile = getChecksumFile(f);
+      if (fs.exists(checkFile)) {
+        fs.delete(checkFile, true);
+      }
+      return fs.delete(f, true);
+    }
+  }
+    
+  final private static PathFilter DEFAULT_FILTER = new PathFilter() {
+    public boolean accept(Path file) {
+      return !isChecksumFile(file);
+    }
+  };
+
+  /**
+   * List the statuses of the files/directories in the given path if the path is
+   * a directory.
+   * 
+   * @param f
+   *          given path
+   * @return the statuses of the files/directories in the given patch
+   * @throws IOException
+   */
+  @Override
+  public FileStatus[] listStatus(Path f) throws IOException {
+    return fs.listStatus(f, DEFAULT_FILTER);
+  }
+  
+  @Override
+  public boolean mkdirs(Path f) throws IOException {
+    return fs.mkdirs(f);
+  }
+
+  @Override
+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+    Configuration conf = getConf();
+    FileUtil.copy(getLocal(conf), src, this, dst, delSrc, conf);
+  }
+
+  /**
+   * The src file is under FS, and the dst is on the local disk.
+   * Copy it from FS control to the local dst name.
+   */
+  @Override
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+    Configuration conf = getConf();
+    FileUtil.copy(this, src, getLocal(conf), dst, delSrc, conf);
+  }
+
+  /**
+   * The src file is under FS, and the dst is on the local disk.
+   * Copy it from FS control to the local dst name.
+   * If src and dst are directories, the copyCrc parameter
+   * determines whether to copy CRC files.
+   */
+  public void copyToLocalFile(Path src, Path dst, boolean copyCrc)
+    throws IOException {
+    if (!fs.isDirectory(src)) { // source is a file
+      fs.copyToLocalFile(src, dst);
+      FileSystem localFs = getLocal(getConf()).getRawFileSystem();
+      if (localFs.isDirectory(dst)) {
+        dst = new Path(dst, src.getName());
+      }
+      dst = getChecksumFile(dst);
+      if (localFs.exists(dst)) { //remove old local checksum file
+        localFs.delete(dst, true);
+      }
+      Path checksumFile = getChecksumFile(src);
+      if (copyCrc && fs.exists(checksumFile)) { //copy checksum file
+        fs.copyToLocalFile(checksumFile, dst);
+      }
+    } else {
+      FileStatus[] srcs = listStatus(src);
+      for (FileStatus srcFile : srcs) {
+        copyToLocalFile(srcFile.getPath(), 
+                        new Path(dst, srcFile.getPath().getName()), copyCrc);
+      }
+    }
+  }
+
+  @Override
+  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+    return tmpLocalFile;
+  }
+
+  @Override
+  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+    moveFromLocalFile(tmpLocalFile, fsOutputFile);
+  }
+
+  /**
+   * Report a checksum error to the file system.
+   * @param f the file name containing the error
+   * @param in the stream open on the file
+   * @param inPos the position of the beginning of the bad data in the file
+   * @param sums the stream open on the checksum file
+   * @param sumsPos the position of the beginning of the bad data in the checksum file
+   * @return if retry is neccessary
+   */
+  public boolean reportChecksumFailure(Path f, FSDataInputStream in,
+                                       long inPos, FSDataInputStream sums, long sumsPos) {
+    return false;
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/ContentSummary.java b/src/java/org/apache/hadoop/fs/ContentSummary.java
new file mode 100644
index 00000000000..2ec7959370d
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/ContentSummary.java
@@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+/** Store the summary of a content (a directory or a file). */
+public class ContentSummary implements Writable{
+  private long length;
+  private long fileCount;
+  private long directoryCount;
+  private long quota;
+  private long spaceConsumed;
+  private long spaceQuota;
+  
+
+  /** Constructor */
+  public ContentSummary() {}
+  
+  /** Constructor */
+  public ContentSummary(long length, long fileCount, long directoryCount) {
+    this(length, fileCount, directoryCount, -1L, length, -1L);
+  }
+
+  /** Constructor */
+  public ContentSummary(
+      long length, long fileCount, long directoryCount, long quota,
+      long spaceConsumed, long spaceQuota) {
+    this.length = length;
+    this.fileCount = fileCount;
+    this.directoryCount = directoryCount;
+    this.quota = quota;
+    this.spaceConsumed = spaceConsumed;
+    this.spaceQuota = spaceQuota;
+  }
+
+  /** @return the length */
+  public long getLength() {return length;}
+
+  /** @return the directory count */
+  public long getDirectoryCount() {return directoryCount;}
+
+  /** @return the file count */
+  public long getFileCount() {return fileCount;}
+  
+  /** Return the directory quota */
+  public long getQuota() {return quota;}
+  
+  /** Retuns (disk) space consumed */ 
+  public long getSpaceConsumed() {return spaceConsumed;}
+
+  /** Returns (disk) space quota */
+  public long getSpaceQuota() {return spaceQuota;}
+  
+  /** {@inheritDoc} */
+  public void write(DataOutput out) throws IOException {
+    out.writeLong(length);
+    out.writeLong(fileCount);
+    out.writeLong(directoryCount);
+    out.writeLong(quota);
+    out.writeLong(spaceConsumed);
+    out.writeLong(spaceQuota);
+  }
+
+  /** {@inheritDoc} */
+  public void readFields(DataInput in) throws IOException {
+    this.length = in.readLong();
+    this.fileCount = in.readLong();
+    this.directoryCount = in.readLong();
+    this.quota = in.readLong();
+    this.spaceConsumed = in.readLong();
+    this.spaceQuota = in.readLong();
+  }
+  
+  /** 
+   * Output format:
+   * <----12----> <----12----> <-------18------->
+   *    DIR_COUNT   FILE_COUNT       CONTENT_SIZE FILE_NAME    
+   */
+  private static final String STRING_FORMAT = "%12d %12d %18d ";
+  /** 
+   * Output format:
+   * <----12----> <----15----> <----15----> <----15----> <----12----> <----12----> <-------18------->
+   *    QUOTA   REMAINING_QUATA SPACE_QUOTA SPACE_QUOTA_REM DIR_COUNT   FILE_COUNT   CONTENT_SIZE     FILE_NAME    
+   */
+  private static final String QUOTA_STRING_FORMAT = "%12s %15s ";
+  private static final String SPACE_QUOTA_STRING_FORMAT = "%15s %15s ";
+  
+  /** The header string */
+  private static final String HEADER = String.format(
+      STRING_FORMAT.replace('d', 's'), "directories", "files", "bytes");
+
+  private static final String QUOTA_HEADER = String.format(
+      QUOTA_STRING_FORMAT + SPACE_QUOTA_STRING_FORMAT, 
+      "quota", "remaining quota", "space quota", "reamaining quota") +
+      HEADER;
+  
+  /** Return the header of the output.
+   * if qOption is false, output directory count, file count, and content size;
+   * if qOption is true, output quota and remaining quota as well.
+   * 
+   * @param qOption a flag indicating if quota needs to be printed or not
+   * @return the header of the output
+   */
+  public static String getHeader(boolean qOption) {
+    return qOption ? QUOTA_HEADER : HEADER;
+  }
+  
+  /** {@inheritDoc} */
+  public String toString() {
+    return toString(true);
+  }
+
+  /** Return the string representation of the object in the output format.
+   * if qOption is false, output directory count, file count, and content size;
+   * if qOption is true, output quota and remaining quota as well.
+   * 
+   * @param qOption a flag indicating if quota needs to be printed or not
+   * @return the string representation of the object
+   */
+  public String toString(boolean qOption) {
+    String prefix = "";
+    if (qOption) {
+      String quotaStr = "none";
+      String quotaRem = "inf";
+      String spaceQuotaStr = "none";
+      String spaceQuotaRem = "inf";
+      
+      if (quota>0) {
+        quotaStr = Long.toString(quota);
+        quotaRem = Long.toString(quota-(directoryCount+fileCount));
+      }
+      if (spaceQuota>0) {
+        spaceQuotaStr = Long.toString(spaceQuota);
+        spaceQuotaRem = Long.toString(spaceQuota - spaceConsumed);        
+      }
+      
+      prefix = String.format(QUOTA_STRING_FORMAT + SPACE_QUOTA_STRING_FORMAT, 
+                             quotaStr, quotaRem, spaceQuotaStr, spaceQuotaRem);
+    }
+    
+    return prefix + String.format(STRING_FORMAT, directoryCount, 
+                                  fileCount, length);
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/DF.java b/src/java/org/apache/hadoop/fs/DF.java
new file mode 100644
index 00000000000..70cea9eb023
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/DF.java
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.BufferedReader;
+
+import java.util.EnumSet;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Shell;
+
+/** Filesystem disk space usage statistics.  Uses the unix 'df' program.
+ * Tested on Linux, FreeBSD, Cygwin. */
+public class DF extends Shell {
+  public static final long DF_INTERVAL_DEFAULT = 3 * 1000; // default DF refresh interval 
+  
+  private String dirPath;
+  private String filesystem;
+  private long capacity;
+  private long used;
+  private long available;
+  private int percentUsed;
+  private String mount;
+
+  enum OSType {
+    OS_TYPE_UNIX("UNIX"),
+    OS_TYPE_WIN("Windows"),
+    OS_TYPE_SOLARIS("SunOS"),
+    OS_TYPE_MAC("Mac"),
+    OS_TYPE_AIX("AIX");
+
+    private String id;
+    OSType(String id) {
+      this.id = id;
+    }
+    public boolean match(String osStr) {
+      return osStr != null && osStr.indexOf(id) >= 0;
+    }
+    String getId() {
+      return id;
+    }
+  }
+
+  private static final String OS_NAME = System.getProperty("os.name");
+  private static final OSType OS_TYPE = getOSType(OS_NAME);
+
+  protected static OSType getOSType(String osName) {
+    for (OSType ost : EnumSet.allOf(OSType.class)) {
+      if (ost.match(osName)) {
+        return ost;
+      }
+    }
+    return OSType.OS_TYPE_UNIX;
+  }
+
+  public DF(File path, Configuration conf) throws IOException {
+    this(path, conf.getLong("dfs.df.interval", DF.DF_INTERVAL_DEFAULT));
+  }
+
+  public DF(File path, long dfInterval) throws IOException {
+    super(dfInterval);
+    this.dirPath = path.getCanonicalPath();
+  }
+
+  protected OSType getOSType() {
+    return OS_TYPE;
+  }
+  
+  /// ACCESSORS
+
+  public String getDirPath() {
+    return dirPath;
+  }
+  
+  public String getFilesystem() throws IOException { 
+    run(); 
+    return filesystem; 
+  }
+  
+  public long getCapacity() throws IOException { 
+    run(); 
+    return capacity; 
+  }
+  
+  public long getUsed() throws IOException { 
+    run(); 
+    return used;
+  }
+  
+  public long getAvailable() throws IOException { 
+    run(); 
+    return available;
+  }
+  
+  public int getPercentUsed() throws IOException {
+    run();
+    return percentUsed;
+  }
+
+  public String getMount() throws IOException {
+    run();
+    return mount;
+  }
+  
+  public String toString() {
+    return
+      "df -k " + mount +"\n" +
+      filesystem + "\t" +
+      capacity / 1024 + "\t" +
+      used / 1024 + "\t" +
+      available / 1024 + "\t" +
+      percentUsed + "%\t" +
+      mount;
+  }
+
+  @Override
+  protected String[] getExecString() {
+    // ignoring the error since the exit code it enough
+    return new String[] {"bash","-c","exec 'df' '-k' '" + dirPath 
+                         + "' 2>/dev/null"};
+  }
+
+  @Override
+  protected void parseExecResult(BufferedReader lines) throws IOException {
+    lines.readLine();                         // skip headings
+  
+    String line = lines.readLine();
+    if (line == null) {
+      throw new IOException( "Expecting a line not the end of stream" );
+    }
+    StringTokenizer tokens =
+      new StringTokenizer(line, " \t\n\r\f%");
+    
+    this.filesystem = tokens.nextToken();
+    if (!tokens.hasMoreTokens()) {            // for long filesystem name
+      line = lines.readLine();
+      if (line == null) {
+        throw new IOException( "Expecting a line not the end of stream" );
+      }
+      tokens = new StringTokenizer(line, " \t\n\r\f%");
+    }
+
+    switch(getOSType()) {
+      case OS_TYPE_AIX:
+        this.capacity = Long.parseLong(tokens.nextToken()) * 1024;
+        this.available = Long.parseLong(tokens.nextToken()) * 1024;
+        this.percentUsed = Integer.parseInt(tokens.nextToken());
+        tokens.nextToken();
+        tokens.nextToken();
+        this.mount = tokens.nextToken();
+        this.used = this.capacity - this.available;
+        break;
+
+      case OS_TYPE_WIN:
+      case OS_TYPE_SOLARIS:
+      case OS_TYPE_MAC:
+      case OS_TYPE_UNIX:
+      default:
+        this.capacity = Long.parseLong(tokens.nextToken()) * 1024;
+        this.used = Long.parseLong(tokens.nextToken()) * 1024;
+        this.available = Long.parseLong(tokens.nextToken()) * 1024;
+        this.percentUsed = Integer.parseInt(tokens.nextToken());
+        this.mount = tokens.nextToken();
+        break;
+   }
+  }
+
+  public static void main(String[] args) throws Exception {
+    String path = ".";
+    if (args.length > 0)
+      path = args[0];
+
+    System.out.println(new DF(new File(path), DF_INTERVAL_DEFAULT).toString());
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/DU.java b/src/java/org/apache/hadoop/fs/DU.java
new file mode 100644
index 00000000000..2b65ae09875
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/DU.java
@@ -0,0 +1,198 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Shell;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
+
+/** Filesystem disk space usage statistics.  Uses the unix 'du' program*/
+public class DU extends Shell {
+  private String  dirPath;
+
+  private AtomicLong used = new AtomicLong();
+  private volatile boolean shouldRun = true;
+  private Thread refreshUsed;
+  private IOException duException = null;
+  private long refreshInterval;
+  
+  /**
+   * Keeps track of disk usage.
+   * @param path the path to check disk usage in
+   * @param interval refresh the disk usage at this interval
+   * @throws IOException if we fail to refresh the disk usage
+   */
+  public DU(File path, long interval) throws IOException {
+    super(0);
+    
+    //we set the Shell interval to 0 so it will always run our command
+    //and use this one to set the thread sleep interval
+    this.refreshInterval = interval;
+    this.dirPath = path.getCanonicalPath();
+    
+    //populate the used variable
+    run();
+  }
+  
+  /**
+   * Keeps track of disk usage.
+   * @param path the path to check disk usage in
+   * @param conf configuration object
+   * @throws IOException if we fail to refresh the disk usage
+   */
+  public DU(File path, Configuration conf) throws IOException {
+    this(path, 600000L);
+    //10 minutes default refresh interval
+  }
+
+  /**
+   * This thread refreshes the "used" variable.
+   * 
+   * Future improvements could be to not permanently
+   * run this thread, instead run when getUsed is called.
+   **/
+  class DURefreshThread implements Runnable {
+    
+    public void run() {
+      
+      while(shouldRun) {
+
+        try {
+          Thread.sleep(refreshInterval);
+          
+          try {
+            //update the used variable
+            DU.this.run();
+          } catch (IOException e) {
+            synchronized (DU.this) {
+              //save the latest exception so we can return it in getUsed()
+              duException = e;
+            }
+            
+            LOG.warn("Could not get disk usage information", e);
+          }
+        } catch (InterruptedException e) {
+        }
+      }
+    }
+  }
+  
+  /**
+   * Decrease how much disk space we use.
+   * @param value decrease by this value
+   */
+  public void decDfsUsed(long value) {
+    used.addAndGet(-value);
+  }
+
+  /**
+   * Increase how much disk space we use.
+   * @param value increase by this value
+   */
+  public void incDfsUsed(long value) {
+    used.addAndGet(value);
+  }
+  
+  /**
+   * @return disk space used 
+   * @throws IOException if the shell command fails
+   */
+  public long getUsed() throws IOException {
+    //if the updating thread isn't started, update on demand
+    if(refreshUsed == null) {
+      run();
+    } else {
+      synchronized (DU.this) {
+        //if an exception was thrown in the last run, rethrow
+        if(duException != null) {
+          IOException tmp = duException;
+          duException = null;
+          throw tmp;
+        }
+      }
+    }
+    
+    return used.longValue();
+  }
+
+  /**
+   * @return the path of which we're keeping track of disk usage
+   */
+  public String getDirPath() {
+    return dirPath;
+  }
+  
+  /**
+   * Start the disk usage checking thread.
+   */
+  public void start() {
+    //only start the thread if the interval is sane
+    if(refreshInterval > 0) {
+      refreshUsed = new Thread(new DURefreshThread(), 
+          "refreshUsed-"+dirPath);
+      refreshUsed.setDaemon(true);
+      refreshUsed.start();
+    }
+  }
+  
+  /**
+   * Shut down the refreshing thread.
+   */
+  public void shutdown() {
+    this.shouldRun = false;
+    
+    if(this.refreshUsed != null) {
+      this.refreshUsed.interrupt();
+    }
+  }
+  
+  public String toString() {
+    return
+      "du -sk " + dirPath +"\n" +
+      used + "\t" + dirPath;
+  }
+
+  protected String[] getExecString() {
+    return new String[] {"du", "-sk", dirPath};
+  }
+  
+  protected void parseExecResult(BufferedReader lines) throws IOException {
+    String line = lines.readLine();
+    if (line == null) {
+      throw new IOException("Expecting a line not the end of stream");
+    }
+    String[] tokens = line.split("\t");
+    if(tokens.length == 0) {
+      throw new IOException("Illegal du output");
+    }
+    this.used.set(Long.parseLong(tokens[0])*1024);
+  }
+
+  public static void main(String[] args) throws Exception {
+    String path = ".";
+    if (args.length > 0) {
+      path = args[0];
+    }
+
+    System.out.println(new DU(new File(path), new Configuration()).toString());
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FSDataInputStream.java b/src/java/org/apache/hadoop/fs/FSDataInputStream.java
new file mode 100644
index 00000000000..6c59b701f23
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FSDataInputStream.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.*;
+
+/** Utility that wraps a {@link FSInputStream} in a {@link DataInputStream}
+ * and buffers input through a {@link BufferedInputStream}. */
+public class FSDataInputStream extends DataInputStream
+    implements Seekable, PositionedReadable {
+
+  public FSDataInputStream(InputStream in)
+    throws IOException {
+    super(in);
+    if( !(in instanceof Seekable) || !(in instanceof PositionedReadable) ) {
+      throw new IllegalArgumentException(
+          "In is not an instance of Seekable or PositionedReadable");
+    }
+  }
+  
+  public synchronized void seek(long desired) throws IOException {
+    ((Seekable)in).seek(desired);
+  }
+
+  public long getPos() throws IOException {
+    return ((Seekable)in).getPos();
+  }
+  
+  public int read(long position, byte[] buffer, int offset, int length)
+    throws IOException {
+    return ((PositionedReadable)in).read(position, buffer, offset, length);
+  }
+  
+  public void readFully(long position, byte[] buffer, int offset, int length)
+    throws IOException {
+    ((PositionedReadable)in).readFully(position, buffer, offset, length);
+  }
+  
+  public void readFully(long position, byte[] buffer)
+    throws IOException {
+    ((PositionedReadable)in).readFully(position, buffer, 0, buffer.length);
+  }
+  
+  public boolean seekToNewSource(long targetPos) throws IOException {
+    return ((Seekable)in).seekToNewSource(targetPos); 
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FSDataOutputStream.java b/src/java/org/apache/hadoop/fs/FSDataOutputStream.java
new file mode 100644
index 00000000000..ac13d74c3b2
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FSDataOutputStream.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.*;
+
+/** Utility that wraps a {@link OutputStream} in a {@link DataOutputStream},
+ * buffers output through a {@link BufferedOutputStream} and creates a checksum
+ * file. */
+public class FSDataOutputStream extends DataOutputStream implements Syncable {
+  private OutputStream wrappedStream;
+
+  private static class PositionCache extends FilterOutputStream {
+    private FileSystem.Statistics statistics;
+    long position;
+
+    public PositionCache(OutputStream out, 
+                         FileSystem.Statistics stats,
+                         long pos) throws IOException {
+      super(out);
+      statistics = stats;
+      position = pos;
+    }
+
+    public void write(int b) throws IOException {
+      out.write(b);
+      position++;
+      if (statistics != null) {
+        statistics.incrementBytesWritten(1);
+      }
+    }
+    
+    public void write(byte b[], int off, int len) throws IOException {
+      out.write(b, off, len);
+      position += len;                            // update position
+      if (statistics != null) {
+        statistics.incrementBytesWritten(len);
+      }
+    }
+      
+    public long getPos() throws IOException {
+      return position;                            // return cached position
+    }
+    
+    public void close() throws IOException {
+      out.close();
+    }
+  }
+
+  @Deprecated
+  public FSDataOutputStream(OutputStream out) throws IOException {
+    this(out, null);
+  }
+
+  public FSDataOutputStream(OutputStream out, FileSystem.Statistics stats)
+    throws IOException {
+    this(out, stats, 0);
+  }
+
+  public FSDataOutputStream(OutputStream out, FileSystem.Statistics stats,
+                            long startPosition) throws IOException {
+    super(new PositionCache(out, stats, startPosition));
+    wrappedStream = out;
+  }
+  
+  public long getPos() throws IOException {
+    return ((PositionCache)out).getPos();
+  }
+
+  public void close() throws IOException {
+    out.close();         // This invokes PositionCache.close()
+  }
+
+  // Returns the underlying output stream. This is used by unit tests.
+  public OutputStream getWrappedStream() {
+    return wrappedStream;
+  }
+
+  /** {@inheritDoc} */
+  public void sync() throws IOException {
+    if (wrappedStream instanceof Syncable) {
+      ((Syncable)wrappedStream).sync();
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FSError.java b/src/java/org/apache/hadoop/fs/FSError.java
new file mode 100644
index 00000000000..8dd19125898
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FSError.java
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+/** Thrown for unexpected filesystem errors, presumed to reflect disk errors
+ * in the native filesystem. */
+public class FSError extends Error {
+  private static final long serialVersionUID = 1L;
+
+  FSError(Throwable cause) {
+    super(cause);
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FSInputChecker.java b/src/java/org/apache/hadoop/fs/FSInputChecker.java
new file mode 100644
index 00000000000..1d8e03ff935
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FSInputChecker.java
@@ -0,0 +1,432 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.Checksum;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * This is a generic input stream for verifying checksums for
+ * data before it is read by a user.
+ */
+
+abstract public class FSInputChecker extends FSInputStream {
+  public static final Log LOG 
+  = LogFactory.getLog(FSInputChecker.class);
+  
+  /** The file name from which data is read from */
+  protected Path file;
+  private Checksum sum;
+  private boolean verifyChecksum = true;
+  private byte[] buf;
+  private byte[] checksum;
+  private int pos;
+  private int count;
+  
+  private int numOfRetries;
+  
+  // cached file position
+  private long chunkPos = 0;
+  
+  /** Constructor
+   * 
+   * @param file The name of the file to be read
+   * @param numOfRetries Number of read retries when ChecksumError occurs
+   */
+  protected FSInputChecker( Path file, int numOfRetries) {
+    this.file = file;
+    this.numOfRetries = numOfRetries;
+  }
+  
+  /** Constructor
+   * 
+   * @param file The name of the file to be read
+   * @param numOfRetries Number of read retries when ChecksumError occurs
+   * @param sum the type of Checksum engine
+   * @param chunkSize maximun chunk size
+   * @param checksumSize the number byte of each checksum
+   */
+  protected FSInputChecker( Path file, int numOfRetries, 
+      boolean verifyChecksum, Checksum sum, int chunkSize, int checksumSize ) {
+    this(file, numOfRetries);
+    set(verifyChecksum, sum, chunkSize, checksumSize);
+  }
+  
+  /** Reads in next checksum chunk data into <code>buf</code> at <code>offset</code>
+   * and checksum into <code>checksum</code>.
+   * The method is used for implementing read, therefore, it should be optimized
+   * for sequential reading
+   * @param pos chunkPos
+   * @param buf desitination buffer
+   * @param offset offset in buf at which to store data
+   * @param len maximun number of bytes to read
+   * @return number of bytes read
+   */
+  abstract protected int readChunk(long pos, byte[] buf, int offset, int len,
+      byte[] checksum) throws IOException;
+
+  /** Return position of beginning of chunk containing pos. 
+   *
+   * @param pos a postion in the file
+   * @return the starting position of the chunk which contains the byte
+   */
+  abstract protected long getChunkPosition(long pos);
+
+  /** Return true if there is a need for checksum verification */
+  protected synchronized boolean needChecksum() {
+    return verifyChecksum && sum != null;
+  }
+  
+  /**
+   * Read one checksum-verified byte
+   * 
+   * @return     the next byte of data, or <code>-1</code> if the end of the
+   *             stream is reached.
+   * @exception  IOException  if an I/O error occurs.
+   */
+
+  public synchronized int read() throws IOException {
+    if (pos >= count) {
+      fill();
+      if (pos >= count) {
+        return -1;
+      }
+    }
+    return buf[pos++] & 0xff;
+  }
+  
+  /**
+   * Read checksum verified bytes from this byte-input stream into 
+   * the specified byte array, starting at the given offset.
+   *
+   * <p> This method implements the general contract of the corresponding
+   * <code>{@link InputStream#read(byte[], int, int) read}</code> method of
+   * the <code>{@link InputStream}</code> class.  As an additional
+   * convenience, it attempts to read as many bytes as possible by repeatedly
+   * invoking the <code>read</code> method of the underlying stream.  This
+   * iterated <code>read</code> continues until one of the following
+   * conditions becomes true: <ul>
+   *
+   *   <li> The specified number of bytes have been read,
+   *
+   *   <li> The <code>read</code> method of the underlying stream returns
+   *   <code>-1</code>, indicating end-of-file.
+   *
+   * </ul> If the first <code>read</code> on the underlying stream returns
+   * <code>-1</code> to indicate end-of-file then this method returns
+   * <code>-1</code>.  Otherwise this method returns the number of bytes
+   * actually read.
+   *
+   * @param      b     destination buffer.
+   * @param      off   offset at which to start storing bytes.
+   * @param      len   maximum number of bytes to read.
+   * @return     the number of bytes read, or <code>-1</code> if the end of
+   *             the stream has been reached.
+   * @exception  IOException  if an I/O error occurs.
+   *             ChecksumException if any checksum error occurs
+   */
+  public synchronized int read(byte[] b, int off, int len) throws IOException {
+    // parameter check
+    if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
+      throw new IndexOutOfBoundsException();
+    } else if (len == 0) {
+      return 0;
+    }
+
+    int n = 0;
+    for (;;) {
+      int nread = read1(b, off + n, len - n);
+      if (nread <= 0) 
+        return (n == 0) ? nread : n;
+      n += nread;
+      if (n >= len)
+        return n;
+    }
+  }
+  
+  /**
+   * Fills the buffer with a chunk data. 
+   * No mark is supported.
+   * This method assumes that all data in the buffer has already been read in,
+   * hence pos > count.
+   */
+  private void fill(  ) throws IOException {
+    assert(pos>=count);
+    // fill internal buffer
+    count = readChecksumChunk(buf, 0, buf.length);
+  }
+  
+  /*
+   * Read characters into a portion of an array, reading from the underlying
+   * stream at most once if necessary.
+   */
+  private int read1(byte b[], int off, int len)
+  throws IOException {
+    int avail = count-pos;
+    if( avail <= 0 ) {
+      if(len>=buf.length) {
+        // read a chunk to user buffer directly; avoid one copy
+        int nread = readChecksumChunk(b, off, len);
+        return nread;
+      } else {
+        // read a chunk into the local buffer
+        fill();
+        if( count <= 0 ) {
+          return -1;
+        } else {
+          avail = count;
+        }
+      }
+    }
+    
+    // copy content of the local buffer to the user buffer
+    int cnt = (avail < len) ? avail : len;
+    System.arraycopy(buf, pos, b, off, cnt);
+    pos += cnt;
+    return cnt;    
+  }
+  
+  /* Read up one checksum chunk to array <i>b</i> at pos <i>off</i>
+   * It requires a checksum chunk boundary
+   * in between <cur_pos, cur_pos+len> 
+   * and it stops reading at the boundary or at the end of the stream;
+   * Otherwise an IllegalArgumentException is thrown.
+   * This makes sure that all data read are checksum verified.
+   * 
+   * @param b   the buffer into which the data is read.
+   * @param off the start offset in array <code>b</code>
+   *            at which the data is written.
+   * @param len the maximum number of bytes to read.
+   * @return    the total number of bytes read into the buffer, or
+   *            <code>-1</code> if there is no more data because the end of
+   *            the stream has been reached.
+   * @throws IOException if an I/O error occurs.
+   */ 
+  private int readChecksumChunk(byte b[], int off, int len)
+  throws IOException {
+    // invalidate buffer
+    count = pos = 0;
+          
+    int read = 0;
+    boolean retry = true;
+    int retriesLeft = numOfRetries; 
+    do {
+      retriesLeft--;
+
+      try {
+        read = readChunk(chunkPos, b, off, len, checksum);
+        if( read > 0 ) {
+          if( needChecksum() ) {
+            sum.update(b, off, read);
+            verifySum(chunkPos);
+          }
+          chunkPos += read;
+        } 
+        retry = false;
+      } catch (ChecksumException ce) {
+          LOG.info("Found checksum error: b[" + off + ", " + (off+read) + "]="
+              + StringUtils.byteToHexString(b, off, off + read), ce);
+          if (retriesLeft == 0) {
+            throw ce;
+          }
+          
+          // try a new replica
+          if (seekToNewSource(chunkPos)) {
+            // Since at least one of the sources is different, 
+            // the read might succeed, so we'll retry.
+            seek(chunkPos);
+          } else {
+            // Neither the data stream nor the checksum stream are being read
+            // from different sources, meaning we'll still get a checksum error 
+            // if we try to do the read again.  We throw an exception instead.
+            throw ce;
+          }
+        }
+    } while (retry);
+    return read;
+  }
+  
+  /* verify checksum for the chunk.
+   * @throws ChecksumException if there is a mismatch
+   */
+  private void verifySum(long errPos) throws ChecksumException {
+    long crc = getChecksum();
+    long sumValue = sum.getValue();
+    sum.reset();
+    if (crc != sumValue) {
+      throw new ChecksumException(
+          "Checksum error: "+file+" at "+errPos, errPos);
+    }
+  }
+  
+  /* calculate checksum value */
+  private long getChecksum() {
+    return checksum2long(checksum);
+  }
+
+  /** Convert a checksum byte array to a long */
+  static public long checksum2long(byte[] checksum) {
+    long crc = 0L;
+    for(int i=0; i<checksum.length; i++) {
+      crc |= (0xffL&(long)checksum[i])<<((checksum.length-i-1)*8);
+    }
+    return crc;
+  }
+  
+  @Override
+  public synchronized long getPos() throws IOException {
+    return chunkPos-(count-pos);
+  }
+
+  @Override
+  public synchronized int available() throws IOException {
+    return count-pos;
+  }
+  
+  /**
+   * Skips over and discards <code>n</code> bytes of data from the
+   * input stream.
+   *
+   * <p>This method may skip more bytes than are remaining in the backing
+   * file. This produces no exception and the number of bytes skipped
+   * may include some number of bytes that were beyond the EOF of the
+   * backing file. Attempting to read from the stream after skipping past
+   * the end will result in -1 indicating the end of the file.
+   *
+   *<p>If <code>n</code> is negative, no bytes are skipped.
+   *
+   * @param      n   the number of bytes to be skipped.
+   * @return     the actual number of bytes skipped.
+   * @exception  IOException  if an I/O error occurs.
+   *             ChecksumException if the chunk to skip to is corrupted
+   */
+  public synchronized long skip(long n) throws IOException {
+    if (n <= 0) {
+      return 0;
+    }
+
+    seek(getPos()+n);
+    return n;
+  }
+
+  /**
+   * Seek to the given position in the stream.
+   * The next read() will be from that position.
+   * 
+   * <p>This method may seek past the end of the file.
+   * This produces no exception and an attempt to read from
+   * the stream will result in -1 indicating the end of the file.
+   *
+   * @param      pos   the postion to seek to.
+   * @exception  IOException  if an I/O error occurs.
+   *             ChecksumException if the chunk to seek to is corrupted
+   */
+
+  public synchronized void seek(long pos) throws IOException {
+    if( pos<0 ) {
+      return;
+    }
+    // optimize: check if the pos is in the buffer
+    long start = chunkPos - this.count;
+    if( pos>=start && pos<chunkPos) {
+      this.pos = (int)(pos-start);
+      return;
+    }
+    
+    // reset the current state
+    resetState();
+    
+    // seek to a checksum boundary
+    chunkPos = getChunkPosition(pos);
+    
+    // scan to the desired position
+    int delta = (int)(pos - chunkPos);
+    if( delta > 0) {
+      readFully(this, new byte[delta], 0, delta);
+    }
+  }
+
+  /**
+   * A utility function that tries to read up to <code>len</code> bytes from
+   * <code>stm</code>
+   * 
+   * @param stm    an input stream
+   * @param buf    destiniation buffer
+   * @param offset offset at which to store data
+   * @param len    number of bytes to read
+   * @return actual number of bytes read
+   * @throws IOException if there is any IO error
+   */
+  protected static int readFully(InputStream stm, 
+      byte[] buf, int offset, int len) throws IOException {
+    int n = 0;
+    for (;;) {
+      int nread = stm.read(buf, offset + n, len - n);
+      if (nread <= 0) 
+        return (n == 0) ? nread : n;
+      n += nread;
+      if (n >= len)
+        return n;
+    }
+  }
+  
+  /**
+   * Set the checksum related parameters
+   * @param verifyChecksum whether to verify checksum
+   * @param sum which type of checksum to use
+   * @param maxChunkSize maximun chunk size
+   * @param checksumSize checksum size
+   */
+  final protected synchronized void set(boolean verifyChecksum,
+      Checksum sum, int maxChunkSize, int checksumSize ) {
+    this.verifyChecksum = verifyChecksum;
+    this.sum = sum;
+    this.buf = new byte[maxChunkSize];
+    this.checksum = new byte[checksumSize];
+    this.count = 0;
+    this.pos = 0;
+  }
+
+  final public boolean markSupported() {
+    return false;
+  }
+  
+  final public void mark(int readlimit) {
+  }
+  
+  final public void reset() throws IOException {
+    throw new IOException("mark/reset not supported");
+  }
+  
+
+  /* reset this FSInputChecker's state */
+  private void resetState() {
+    // invalidate buffer
+    count = 0;
+    pos = 0;
+    // reset Checksum
+    if (sum != null) {
+      sum.reset();
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FSInputStream.java b/src/java/org/apache/hadoop/fs/FSInputStream.java
new file mode 100644
index 00000000000..91cac46cdc5
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FSInputStream.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.*;
+
+/****************************************************************
+ * FSInputStream is a generic old InputStream with a little bit
+ * of RAF-style seek ability.
+ *
+ *****************************************************************/
+public abstract class FSInputStream extends InputStream
+    implements Seekable, PositionedReadable {
+  /**
+   * Seek to the given offset from the start of the file.
+   * The next read() will be from that location.  Can't
+   * seek past the end of the file.
+   */
+  public abstract void seek(long pos) throws IOException;
+
+  /**
+   * Return the current offset from the start of the file
+   */
+  public abstract long getPos() throws IOException;
+
+  /**
+   * Seeks a different copy of the data.  Returns true if 
+   * found a new source, false otherwise.
+   */
+  public abstract boolean seekToNewSource(long targetPos) throws IOException;
+
+  public int read(long position, byte[] buffer, int offset, int length)
+    throws IOException {
+    synchronized (this) {
+      long oldPos = getPos();
+      int nread = -1;
+      try {
+        seek(position);
+        nread = read(buffer, offset, length);
+      } finally {
+        seek(oldPos);
+      }
+      return nread;
+    }
+  }
+    
+  public void readFully(long position, byte[] buffer, int offset, int length)
+    throws IOException {
+    int nread = 0;
+    while (nread < length) {
+      int nbytes = read(position+nread, buffer, offset+nread, length-nread);
+      if (nbytes < 0) {
+        throw new EOFException("End of file reached before reading fully.");
+      }
+      nread += nbytes;
+    }
+  }
+    
+  public void readFully(long position, byte[] buffer)
+    throws IOException {
+    readFully(position, buffer, 0, buffer.length);
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FSOutputSummer.java b/src/java/org/apache/hadoop/fs/FSOutputSummer.java
new file mode 100644
index 00000000000..d730671f539
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FSOutputSummer.java
@@ -0,0 +1,176 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.zip.Checksum;
+
+/**
+ * This is a generic output stream for generating checksums for
+ * data before it is written to the underlying stream
+ */
+
+abstract public class FSOutputSummer extends OutputStream {
+  // data checksum
+  private Checksum sum;
+  // internal buffer for storing data before it is checksumed
+  private byte buf[];
+  // internal buffer for storing checksum
+  private byte checksum[];
+  // The number of valid bytes in the buffer.
+  private int count;
+  
+  protected FSOutputSummer(Checksum sum, int maxChunkSize, int checksumSize) {
+    this.sum = sum;
+    this.buf = new byte[maxChunkSize];
+    this.checksum = new byte[checksumSize];
+    this.count = 0;
+  }
+  
+  /* write the data chunk in <code>b</code> staring at <code>offset</code> with
+   * a length of <code>len</code>, and its checksum
+   */
+  protected abstract void writeChunk(byte[] b, int offset, int len, byte[] checksum)
+  throws IOException;
+
+  /** Write one byte */
+  public synchronized void write(int b) throws IOException {
+    sum.update(b);
+    buf[count++] = (byte)b;
+    if(count == buf.length) {
+      flushBuffer();
+    }
+  }
+
+  /**
+   * Writes <code>len</code> bytes from the specified byte array 
+   * starting at offset <code>off</code> and generate a checksum for
+   * each data chunk.
+   *
+   * <p> This method stores bytes from the given array into this
+   * stream's buffer before it gets checksumed. The buffer gets checksumed 
+   * and flushed to the underlying output stream when all data 
+   * in a checksum chunk are in the buffer.  If the buffer is empty and
+   * requested length is at least as large as the size of next checksum chunk
+   * size, this method will checksum and write the chunk directly 
+   * to the underlying output stream.  Thus it avoids uneccessary data copy.
+   *
+   * @param      b     the data.
+   * @param      off   the start offset in the data.
+   * @param      len   the number of bytes to write.
+   * @exception  IOException  if an I/O error occurs.
+   */
+  public synchronized void write(byte b[], int off, int len)
+  throws IOException {
+    if (off < 0 || len < 0 || off > b.length - len) {
+      throw new ArrayIndexOutOfBoundsException();
+    }
+
+    for (int n=0;n<len;n+=write1(b, off+n, len-n)) {
+    }
+  }
+  
+  /**
+   * Write a portion of an array, flushing to the underlying
+   * stream at most once if necessary.
+   */
+  private int write1(byte b[], int off, int len) throws IOException {
+    if(count==0 && len>=buf.length) {
+      // local buffer is empty and user data has one chunk
+      // checksum and output data
+      final int length = buf.length;
+      sum.update(b, off, length);
+      writeChecksumChunk(b, off, length, false);
+      return length;
+    }
+    
+    // copy user data to local buffer
+    int bytesToCopy = buf.length-count;
+    bytesToCopy = (len<bytesToCopy) ? len : bytesToCopy;
+    sum.update(b, off, bytesToCopy);
+    System.arraycopy(b, off, buf, count, bytesToCopy);
+    count += bytesToCopy;
+    if (count == buf.length) {
+      // local buffer is full
+      flushBuffer();
+    } 
+    return bytesToCopy;
+  }
+
+  /* Forces any buffered output bytes to be checksumed and written out to
+   * the underlying output stream. 
+   */
+  protected synchronized void flushBuffer() throws IOException {
+    flushBuffer(false);
+  }
+
+  /* Forces any buffered output bytes to be checksumed and written out to
+   * the underlying output stream.  If keep is true, then the state of 
+   * this object remains intact.
+   */
+  protected synchronized void flushBuffer(boolean keep) throws IOException {
+    if (count != 0) {
+      int chunkLen = count;
+      count = 0;
+      writeChecksumChunk(buf, 0, chunkLen, keep);
+      if (keep) {
+        count = chunkLen;
+      }
+    }
+  }
+  
+  /** Generate checksum for the data chunk and output data chunk & checksum
+   * to the underlying output stream. If keep is true then keep the
+   * current checksum intact, do not reset it.
+   */
+  private void writeChecksumChunk(byte b[], int off, int len, boolean keep)
+  throws IOException {
+    int tempChecksum = (int)sum.getValue();
+    if (!keep) {
+      sum.reset();
+    }
+    int2byte(tempChecksum, checksum);
+    writeChunk(b, off, len, checksum);
+  }
+
+  /**
+   * Converts a checksum integer value to a byte stream
+   */
+  static public byte[] convertToByteStream(Checksum sum, int checksumSize) {
+    return int2byte((int)sum.getValue(), new byte[checksumSize]);
+  }
+
+  static byte[] int2byte(int integer, byte[] bytes) {
+    bytes[0] = (byte)((integer >>> 24) & 0xFF);
+    bytes[1] = (byte)((integer >>> 16) & 0xFF);
+    bytes[2] = (byte)((integer >>>  8) & 0xFF);
+    bytes[3] = (byte)((integer >>>  0) & 0xFF);
+    return bytes;
+  }
+
+  /**
+   * Resets existing buffer with a new one of the specified size.
+   */
+  protected synchronized void resetChecksumChunk(int size) {
+    sum.reset();
+    this.buf = new byte[size];
+    this.count = 0;
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FileChecksum.java b/src/java/org/apache/hadoop/fs/FileChecksum.java
new file mode 100644
index 00000000000..4fe66d0cd70
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FileChecksum.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.io.Writable;
+
+/** An abstract class representing file checksums for files. */
+public abstract class FileChecksum implements Writable {
+  /** The checksum algorithm name */ 
+  public abstract String getAlgorithmName();
+
+  /** The length of the checksum in bytes */ 
+  public abstract int getLength();
+
+  /** The value of the checksum in bytes */ 
+  public abstract byte[] getBytes();
+
+  /** Return true if both the algorithms and the values are the same. */
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    if (other == null || !(other instanceof FileChecksum)) {
+      return false;
+    }
+
+    final FileChecksum that = (FileChecksum)other;
+    return this.getAlgorithmName().equals(that.getAlgorithmName())
+      && Arrays.equals(this.getBytes(), that.getBytes());
+  }
+  
+  /** {@inheritDoc} */
+  public int hashCode() {
+    return getAlgorithmName().hashCode() ^ Arrays.hashCode(getBytes());
+  }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/fs/FileStatus.java b/src/java/org/apache/hadoop/fs/FileStatus.java
new file mode 100644
index 00000000000..124984658cd
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FileStatus.java
@@ -0,0 +1,252 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/** Interface that represents the client side information for a file.
+ */
+public class FileStatus implements Writable, Comparable {
+
+  private Path path;
+  private long length;
+  private boolean isdir;
+  private short block_replication;
+  private long blocksize;
+  private long modification_time;
+  private long access_time;
+  private FsPermission permission;
+  private String owner;
+  private String group;
+  
+  public FileStatus() { this(0, false, 0, 0, 0, 0, null, null, null, null); }
+  
+  //We should deprecate this soon?
+  public FileStatus(long length, boolean isdir, int block_replication,
+                    long blocksize, long modification_time, Path path) {
+
+    this(length, isdir, block_replication, blocksize, modification_time,
+         0, null, null, null, path);
+  }
+  
+  public FileStatus(long length, boolean isdir, int block_replication,
+                    long blocksize, long modification_time, long access_time,
+                    FsPermission permission, String owner, String group, 
+                    Path path) {
+    this.length = length;
+    this.isdir = isdir;
+    this.block_replication = (short)block_replication;
+    this.blocksize = blocksize;
+    this.modification_time = modification_time;
+    this.access_time = access_time;
+    this.permission = (permission == null) ? 
+                      FsPermission.getDefault() : permission;
+    this.owner = (owner == null) ? "" : owner;
+    this.group = (group == null) ? "" : group;
+    this.path = path;
+  }
+
+  /* 
+   * @return the length of this file, in blocks
+   */
+  public long getLen() {
+    return length;
+  }
+
+  /**
+   * Is this a directory?
+   * @return true if this is a directory
+   */
+  public boolean isDir() {
+    return isdir;
+  }
+
+  /**
+   * Get the block size of the file.
+   * @return the number of bytes
+   */
+  public long getBlockSize() {
+    return blocksize;
+  }
+
+  /**
+   * Get the replication factor of a file.
+   * @return the replication factor of a file.
+   */
+  public short getReplication() {
+    return block_replication;
+  }
+
+  /**
+   * Get the modification time of the file.
+   * @return the modification time of file in milliseconds since January 1, 1970 UTC.
+   */
+  public long getModificationTime() {
+    return modification_time;
+  }
+
+  /**
+   * Get the access time of the file.
+   * @return the access time of file in milliseconds since January 1, 1970 UTC.
+   */
+  public long getAccessTime() {
+    return access_time;
+  }
+
+  /**
+   * Get FsPermission associated with the file.
+   * @return permssion. If a filesystem does not have a notion of permissions
+   *         or if permissions could not be determined, then default 
+   *         permissions equivalent of "rwxrwxrwx" is returned.
+   */
+  public FsPermission getPermission() {
+    return permission;
+  }
+  
+  /**
+   * Get the owner of the file.
+   * @return owner of the file. The string could be empty if there is no
+   *         notion of owner of a file in a filesystem or if it could not 
+   *         be determined (rare).
+   */
+  public String getOwner() {
+    return owner;
+  }
+  
+  /**
+   * Get the group associated with the file.
+   * @return group for the file. The string could be empty if there is no
+   *         notion of group of a file in a filesystem or if it could not 
+   *         be determined (rare).
+   */
+  public String getGroup() {
+    return group;
+  }
+  
+  public Path getPath() {
+    return path;
+  }
+
+  /* These are provided so that these values could be loaded lazily 
+   * by a filesystem (e.g. local file system).
+   */
+  
+  /**
+   * Sets permission.
+   * @param permission if permission is null, default value is set
+   */
+  protected void setPermission(FsPermission permission) {
+    this.permission = (permission == null) ? 
+                      FsPermission.getDefault() : permission;
+  }
+  
+  /**
+   * Sets owner.
+   * @param owner if it is null, default value is set
+   */  
+  protected void setOwner(String owner) {
+    this.owner = (owner == null) ? "" : owner;
+  }
+  
+  /**
+   * Sets group.
+   * @param group if it is null, default value is set
+   */  
+  protected void setGroup(String group) {
+    this.group = (group == null) ? "" :  group;
+  }
+
+  //////////////////////////////////////////////////
+  // Writable
+  //////////////////////////////////////////////////
+  public void write(DataOutput out) throws IOException {
+    Text.writeString(out, getPath().toString());
+    out.writeLong(length);
+    out.writeBoolean(isdir);
+    out.writeShort(block_replication);
+    out.writeLong(blocksize);
+    out.writeLong(modification_time);
+    out.writeLong(access_time);
+    permission.write(out);
+    Text.writeString(out, owner);
+    Text.writeString(out, group);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    String strPath = Text.readString(in);
+    this.path = new Path(strPath);
+    this.length = in.readLong();
+    this.isdir = in.readBoolean();
+    this.block_replication = in.readShort();
+    blocksize = in.readLong();
+    modification_time = in.readLong();
+    access_time = in.readLong();
+    permission.readFields(in);
+    owner = Text.readString(in);
+    group = Text.readString(in);
+  }
+
+  /**
+   * Compare this object to another object
+   * 
+   * @param   o the object to be compared.
+   * @return  a negative integer, zero, or a positive integer as this object
+   *   is less than, equal to, or greater than the specified object.
+   * 
+   * @throws ClassCastException if the specified object's is not of 
+   *         type FileStatus
+   */
+  public int compareTo(Object o) {
+    FileStatus other = (FileStatus)o;
+    return this.getPath().compareTo(other.getPath());
+  }
+  
+  /** Compare if this object is equal to another object
+   * @param   o the object to be compared.
+   * @return  true if two file status has the same path name; false if not.
+   */
+  public boolean equals(Object o) {
+    if (o == null) {
+      return false;
+    }
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof FileStatus)) {
+      return false;
+    }
+    FileStatus other = (FileStatus)o;
+    return this.getPath().equals(other.getPath());
+  }
+  
+  /**
+   * Returns a hash code value for the object, which is defined as
+   * the hash code of the path name.
+   *
+   * @return  a hash code value for the path name.
+   */
+  public int hashCode() {
+    return getPath().hashCode();
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FileSystem.java b/src/java/org/apache/hadoop/fs/FileSystem.java
new file mode 100644
index 00000000000..fcc5817d27e
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FileSystem.java
@@ -0,0 +1,1648 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.Closeable;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.IdentityHashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Pattern;
+
+import javax.security.auth.login.LoginException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.io.MultipleIOException;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/****************************************************************
+ * An abstract base class for a fairly generic filesystem.  It
+ * may be implemented as a distributed filesystem, or as a "local"
+ * one that reflects the locally-connected disk.  The local version
+ * exists for small Hadoop instances and for testing.
+ *
+ * <p>
+ *
+ * All user code that may potentially use the Hadoop Distributed
+ * File System should be written to use a FileSystem object.  The
+ * Hadoop DFS is a multi-machine system that appears as a single
+ * disk.  It's useful because of its fault tolerance and potentially
+ * very large capacity.
+ * 
+ * <p>
+ * The local implementation is {@link LocalFileSystem} and distributed
+ * implementation is DistributedFileSystem.
+ *****************************************************************/
+public abstract class FileSystem extends Configured implements Closeable {
+  private static final String FS_DEFAULT_NAME_KEY = "fs.default.name";
+
+  public static final Log LOG = LogFactory.getLog(FileSystem.class);
+
+  /** FileSystem cache */
+  private static final Cache CACHE = new Cache();
+
+  /** The key this instance is stored under in the cache. */
+  private Cache.Key key;
+
+  /** Recording statistics per a FileSystem class */
+  private static final Map<Class<? extends FileSystem>, Statistics> 
+    statisticsTable =
+      new IdentityHashMap<Class<? extends FileSystem>, Statistics>();
+  
+  /**
+   * The statistics for this file system.
+   */
+  protected Statistics statistics;
+
+  /**
+   * A cache of files that should be deleted when filsystem is closed
+   * or the JVM is exited.
+   */
+  private Set<Path> deleteOnExit = new TreeSet<Path>();
+
+  /** Returns the configured filesystem implementation.*/
+  public static FileSystem get(Configuration conf) throws IOException {
+    return get(getDefaultUri(conf), conf);
+  }
+  
+  /** Get the default filesystem URI from a configuration.
+   * @param conf the configuration to access
+   * @return the uri of the default filesystem
+   */
+  public static URI getDefaultUri(Configuration conf) {
+    return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, "file:///")));
+  }
+
+  /** Set the default filesystem URI in a configuration.
+   * @param conf the configuration to alter
+   * @param uri the new default filesystem uri
+   */
+  public static void setDefaultUri(Configuration conf, URI uri) {
+    conf.set(FS_DEFAULT_NAME_KEY, uri.toString());
+  }
+
+  /** Set the default filesystem URI in a configuration.
+   * @param conf the configuration to alter
+   * @param uri the new default filesystem uri
+   */
+  public static void setDefaultUri(Configuration conf, String uri) {
+    setDefaultUri(conf, URI.create(fixName(uri)));
+  }
+
+  /** Called after a new FileSystem instance is constructed.
+   * @param name a uri whose authority section names the host, port, etc.
+   *   for this FileSystem
+   * @param conf the configuration
+   */
+  public void initialize(URI name, Configuration conf) throws IOException {
+    statistics = getStatistics(name.getScheme(), getClass());    
+  }
+
+  /** Returns a URI whose scheme and authority identify this FileSystem.*/
+  public abstract URI getUri();
+  
+  /** Update old-format filesystem names, for back-compatibility.  This should
+   * eventually be replaced with a checkName() method that throws an exception
+   * for old-format names. */ 
+  private static String fixName(String name) {
+    // convert old-format name to new-format name
+    if (name.equals("local")) {         // "local" is now "file:///".
+      LOG.warn("\"local\" is a deprecated filesystem name."
+               +" Use \"file:///\" instead.");
+      name = "file:///";
+    } else if (name.indexOf('/')==-1) {   // unqualified is "hdfs://"
+      LOG.warn("\""+name+"\" is a deprecated filesystem name."
+               +" Use \"hdfs://"+name+"/\" instead.");
+      name = "hdfs://"+name;
+    }
+    return name;
+  }
+
+  /**
+   * Get the local file syste
+   * @param conf the configuration to configure the file system with
+   * @return a LocalFileSystem
+   */
+  public static LocalFileSystem getLocal(Configuration conf)
+    throws IOException {
+    return (LocalFileSystem)get(LocalFileSystem.NAME, conf);
+  }
+
+  /** Returns the FileSystem for this URI's scheme and authority.  The scheme
+   * of the URI determines a configuration property name,
+   * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class.
+   * The entire URI is passed to the FileSystem instance's initialize method.
+   */
+  public static FileSystem get(URI uri, Configuration conf) throws IOException {
+    String scheme = uri.getScheme();
+    String authority = uri.getAuthority();
+
+    if (scheme == null) {                       // no scheme: use default FS
+      return get(conf);
+    }
+
+    if (authority == null) {                       // no authority
+      URI defaultUri = getDefaultUri(conf);
+      if (scheme.equals(defaultUri.getScheme())    // if scheme matches default
+          && defaultUri.getAuthority() != null) {  // & default has authority
+        return get(defaultUri, conf);              // return default
+      }
+    }
+
+    return CACHE.get(uri, conf);
+  }
+
+  /** Returns the FileSystem for this URI's scheme and authority.  The scheme
+   * of the URI determines a configuration property name,
+   * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class.
+   * The entire URI is passed to the FileSystem instance's initialize method.
+   * This always returns a new FileSystem object.
+   */
+  public static FileSystem newInstance(URI uri, Configuration conf) throws IOException {
+    String scheme = uri.getScheme();
+    String authority = uri.getAuthority();
+
+    if (scheme == null) {                       // no scheme: use default FS
+      return newInstance(conf);
+    }
+
+    if (authority == null) {                       // no authority
+      URI defaultUri = getDefaultUri(conf);
+      if (scheme.equals(defaultUri.getScheme())    // if scheme matches default
+          && defaultUri.getAuthority() != null) {  // & default has authority
+        return newInstance(defaultUri, conf);              // return default
+      }
+    }
+    return CACHE.getUnique(uri, conf);
+  }
+
+  /** Returns a unique configured filesystem implementation.
+   * This always returns a new FileSystem object. */
+  public static FileSystem newInstance(Configuration conf) throws IOException {
+    return newInstance(getDefaultUri(conf), conf);
+  }
+
+  /**
+   * Get a unique local file system object
+   * @param conf the configuration to configure the file system with
+   * @return a LocalFileSystem
+   * This always returns a new FileSystem object.
+   */
+  public static LocalFileSystem newInstanceLocal(Configuration conf)
+    throws IOException {
+    return (LocalFileSystem)newInstance(LocalFileSystem.NAME, conf);
+  }
+
+  private static class ClientFinalizer extends Thread {
+    public synchronized void run() {
+      try {
+        FileSystem.closeAll();
+      } catch (IOException e) {
+        LOG.info("FileSystem.closeAll() threw an exception:\n" + e);
+      }
+    }
+  }
+  private static final ClientFinalizer clientFinalizer = new ClientFinalizer();
+
+  /**
+   * Close all cached filesystems. Be sure those filesystems are not
+   * used anymore.
+   * 
+   * @throws IOException
+   */
+  public static void closeAll() throws IOException {
+    CACHE.closeAll();
+  }
+
+  /** Make sure that a path specifies a FileSystem. */
+  public Path makeQualified(Path path) {
+    checkPath(path);
+    return path.makeQualified(this);
+  }
+    
+  /** create a file with the provided permission
+   * The permission of the file is set to be the provided permission as in
+   * setPermission, not permission&~umask
+   * 
+   * It is implemented using two RPCs. It is understood that it is inefficient,
+   * but the implementation is thread-safe. The other option is to change the
+   * value of umask in configuration to be 0, but it is not thread-safe.
+   * 
+   * @param fs file system handle
+   * @param file the name of the file to be created
+   * @param permission the permission of the file
+   * @return an output stream
+   * @throws IOException
+   */
+  public static FSDataOutputStream create(FileSystem fs,
+      Path file, FsPermission permission) throws IOException {
+    // create the file with default permission
+    FSDataOutputStream out = fs.create(file);
+    // set its permission to the supplied one
+    fs.setPermission(file, permission);
+    return out;
+  }
+
+  /** create a directory with the provided permission
+   * The permission of the directory is set to be the provided permission as in
+   * setPermission, not permission&~umask
+   * 
+   * @see #create(FileSystem, Path, FsPermission)
+   * 
+   * @param fs file system handle
+   * @param dir the name of the directory to be created
+   * @param permission the permission of the directory
+   * @return true if the directory creation succeeds; false otherwise
+   * @throws IOException
+   */
+  public static boolean mkdirs(FileSystem fs, Path dir, FsPermission permission)
+  throws IOException {
+    // create the directory using the default permission
+    boolean result = fs.mkdirs(dir);
+    // set its permission to be the supplied one
+    fs.setPermission(dir, permission);
+    return result;
+  }
+
+  ///////////////////////////////////////////////////////////////
+  // FileSystem
+  ///////////////////////////////////////////////////////////////
+
+  protected FileSystem() {
+    super(null);
+  }
+
+  /** Check that a Path belongs to this FileSystem. */
+  protected void checkPath(Path path) {
+    URI uri = path.toUri();
+    if (uri.getScheme() == null)                // fs is relative 
+      return;
+    String thisScheme = this.getUri().getScheme();
+    String thatScheme = uri.getScheme();
+    String thisAuthority = this.getUri().getAuthority();
+    String thatAuthority = uri.getAuthority();
+    //authority and scheme are not case sensitive
+    if (thisScheme.equalsIgnoreCase(thatScheme)) {// schemes match
+      if (thisAuthority == thatAuthority ||       // & authorities match
+          (thisAuthority != null && 
+           thisAuthority.equalsIgnoreCase(thatAuthority)))
+        return;
+
+      if (thatAuthority == null &&                // path's authority is null
+          thisAuthority != null) {                // fs has an authority
+        URI defaultUri = getDefaultUri(getConf()); // & is the conf default 
+        if (thisScheme.equalsIgnoreCase(defaultUri.getScheme()) &&
+            thisAuthority.equalsIgnoreCase(defaultUri.getAuthority()))
+          return;
+        try {                                     // or the default fs's uri
+          defaultUri = get(getConf()).getUri();
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+        if (thisScheme.equalsIgnoreCase(defaultUri.getScheme()) &&
+            thisAuthority.equalsIgnoreCase(defaultUri.getAuthority()))
+          return;
+      }
+    }
+    throw new IllegalArgumentException("Wrong FS: "+path+
+                                       ", expected: "+this.getUri());
+  }
+
+  /**
+   * Return an array containing hostnames, offset and size of 
+   * portions of the given file.  For a nonexistent 
+   * file or regions, null will be returned.
+   *
+   * This call is most helpful with DFS, where it returns 
+   * hostnames of machines that contain the given file.
+   *
+   * The FileSystem will simply return an elt containing 'localhost'.
+   */
+  public BlockLocation[] getFileBlockLocations(FileStatus file, 
+      long start, long len) throws IOException {
+    if (file == null) {
+      return null;
+    }
+
+    if ( (start<0) || (len < 0) ) {
+      throw new IllegalArgumentException("Invalid start or len parameter");
+    }
+
+    if (file.getLen() < start) {
+      return new BlockLocation[0];
+
+    }
+    String[] name = { "localhost:50010" };
+    String[] host = { "localhost" };
+    return new BlockLocation[] { new BlockLocation(name, host, 0, file.getLen()) };
+  }
+  
+  /**
+   * Opens an FSDataInputStream at the indicated Path.
+   * @param f the file name to open
+   * @param bufferSize the size of the buffer to be used.
+   */
+  public abstract FSDataInputStream open(Path f, int bufferSize)
+    throws IOException;
+    
+  /**
+   * Opens an FSDataInputStream at the indicated Path.
+   * @param f the file to open
+   */
+  public FSDataInputStream open(Path f) throws IOException {
+    return open(f, getConf().getInt("io.file.buffer.size", 4096));
+  }
+
+  /**
+   * Opens an FSDataOutputStream at the indicated Path.
+   * Files are overwritten by default.
+   */
+  public FSDataOutputStream create(Path f) throws IOException {
+    return create(f, true);
+  }
+
+  /**
+   * Opens an FSDataOutputStream at the indicated Path.
+   */
+  public FSDataOutputStream create(Path f, boolean overwrite)
+    throws IOException {
+    return create(f, overwrite, 
+                  getConf().getInt("io.file.buffer.size", 4096),
+                  getDefaultReplication(),
+                  getDefaultBlockSize());
+  }
+
+  /**
+   * Create an FSDataOutputStream at the indicated Path with write-progress
+   * reporting.
+   * Files are overwritten by default.
+   */
+  public FSDataOutputStream create(Path f, Progressable progress) throws IOException {
+    return create(f, true, 
+                  getConf().getInt("io.file.buffer.size", 4096),
+                  getDefaultReplication(),
+                  getDefaultBlockSize(), progress);
+  }
+
+  /**
+   * Opens an FSDataOutputStream at the indicated Path.
+   * Files are overwritten by default.
+   */
+  public FSDataOutputStream create(Path f, short replication)
+    throws IOException {
+    return create(f, true, 
+                  getConf().getInt("io.file.buffer.size", 4096),
+                  replication,
+                  getDefaultBlockSize());
+  }
+
+  /**
+   * Opens an FSDataOutputStream at the indicated Path with write-progress
+   * reporting.
+   * Files are overwritten by default.
+   */
+  public FSDataOutputStream create(Path f, short replication, Progressable progress)
+    throws IOException {
+    return create(f, true, 
+                  getConf().getInt("io.file.buffer.size", 4096),
+                  replication,
+                  getDefaultBlockSize(), progress);
+  }
+
+    
+  /**
+   * Opens an FSDataOutputStream at the indicated Path.
+   * @param f the file name to open
+   * @param overwrite if a file with this name already exists, then if true,
+   *   the file will be overwritten, and if false an error will be thrown.
+   * @param bufferSize the size of the buffer to be used.
+   */
+  public FSDataOutputStream create(Path f, 
+                                   boolean overwrite,
+                                   int bufferSize
+                                   ) throws IOException {
+    return create(f, overwrite, bufferSize, 
+                  getDefaultReplication(),
+                  getDefaultBlockSize());
+  }
+    
+  /**
+   * Opens an FSDataOutputStream at the indicated Path with write-progress
+   * reporting.
+   * @param f the file name to open
+   * @param overwrite if a file with this name already exists, then if true,
+   *   the file will be overwritten, and if false an error will be thrown.
+   * @param bufferSize the size of the buffer to be used.
+   */
+  public FSDataOutputStream create(Path f, 
+                                   boolean overwrite,
+                                   int bufferSize,
+                                   Progressable progress
+                                   ) throws IOException {
+    return create(f, overwrite, bufferSize, 
+                  getDefaultReplication(),
+                  getDefaultBlockSize(), progress);
+  }
+    
+    
+  /**
+   * Opens an FSDataOutputStream at the indicated Path.
+   * @param f the file name to open
+   * @param overwrite if a file with this name already exists, then if true,
+   *   the file will be overwritten, and if false an error will be thrown.
+   * @param bufferSize the size of the buffer to be used.
+   * @param replication required block replication for the file. 
+   */
+  public FSDataOutputStream create(Path f, 
+                                   boolean overwrite,
+                                   int bufferSize,
+                                   short replication,
+                                   long blockSize
+                                   ) throws IOException {
+    return create(f, overwrite, bufferSize, replication, blockSize, null);
+  }
+
+  /**
+   * Opens an FSDataOutputStream at the indicated Path with write-progress
+   * reporting.
+   * @param f the file name to open
+   * @param overwrite if a file with this name already exists, then if true,
+   *   the file will be overwritten, and if false an error will be thrown.
+   * @param bufferSize the size of the buffer to be used.
+   * @param replication required block replication for the file. 
+   */
+  public FSDataOutputStream create(Path f,
+                                            boolean overwrite,
+                                            int bufferSize,
+                                            short replication,
+                                            long blockSize,
+                                            Progressable progress
+                                            ) throws IOException {
+    return this.create(f, FsPermission.getDefault(),
+        overwrite, bufferSize, replication, blockSize, progress);
+  }
+
+  /**
+   * Opens an FSDataOutputStream at the indicated Path with write-progress
+   * reporting.
+   * @param f the file name to open
+   * @param permission
+   * @param overwrite if a file with this name already exists, then if true,
+   *   the file will be overwritten, and if false an error will be thrown.
+   * @param bufferSize the size of the buffer to be used.
+   * @param replication required block replication for the file.
+   * @param blockSize
+   * @param progress
+   * @throws IOException
+   * @see #setPermission(Path, FsPermission)
+   */
+  public abstract FSDataOutputStream create(Path f,
+      FsPermission permission,
+      boolean overwrite,
+      int bufferSize,
+      short replication,
+      long blockSize,
+      Progressable progress) throws IOException;
+
+  /**
+   * Creates the given Path as a brand-new zero-length file.  If
+   * create fails, or if it already existed, return false.
+   */
+  public boolean createNewFile(Path f) throws IOException {
+    if (exists(f)) {
+      return false;
+    } else {
+      create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close();
+      return true;
+    }
+  }
+
+  /**
+   * Append to an existing file (optional operation).
+   * Same as append(f, getConf().getInt("io.file.buffer.size", 4096), null)
+   * @param f the existing file to be appended.
+   * @throws IOException
+   */
+  public FSDataOutputStream append(Path f) throws IOException {
+    return append(f, getConf().getInt("io.file.buffer.size", 4096), null);
+  }
+  /**
+   * Append to an existing file (optional operation).
+   * Same as append(f, bufferSize, null).
+   * @param f the existing file to be appended.
+   * @param bufferSize the size of the buffer to be used.
+   * @throws IOException
+   */
+  public FSDataOutputStream append(Path f, int bufferSize) throws IOException {
+    return append(f, bufferSize, null);
+  }
+
+  /**
+   * Append to an existing file (optional operation).
+   * @param f the existing file to be appended.
+   * @param bufferSize the size of the buffer to be used.
+   * @param progress for reporting progress if it is not null.
+   * @throws IOException
+   */
+  public abstract FSDataOutputStream append(Path f, int bufferSize,
+      Progressable progress) throws IOException;
+
+  /**
+   * Set replication for an existing file.
+   * 
+   * @param src file name
+   * @param replication new replication
+   * @throws IOException
+   * @return true if successful;
+   *         false if file does not exist or is a directory
+   */
+  public boolean setReplication(Path src, short replication)
+    throws IOException {
+    return true;
+  }
+
+  /**
+   * Renames Path src to Path dst.  Can take place on local fs
+   * or remote DFS.
+   */
+  public abstract boolean rename(Path src, Path dst) throws IOException;
+    
+  /** Delete a file.
+   *
+   * @param f the path to delete.
+   * @param recursive if path is a directory and set to 
+   * true, the directory is deleted else throws an exception. In
+   * case of a file the recursive can be set to either true or false. 
+   * @return  true if delete is successful else false. 
+   * @throws IOException
+   */
+  public abstract boolean delete(Path f, boolean recursive) throws IOException;
+
+  /**
+   * Mark a path to be deleted when FileSystem is closed.
+   * When the JVM shuts down,
+   * all FileSystem objects will be closed automatically.
+   * Then,
+   * the marked path will be deleted as a result of closing the FileSystem.
+   *
+   * The path has to exist in the file system.
+   * 
+   * @param f the path to delete.
+   * @return  true if deleteOnExit is successful, otherwise false.
+   * @throws IOException
+   */
+  public boolean deleteOnExit(Path f) throws IOException {
+    if (!exists(f)) {
+      return false;
+    }
+    synchronized (deleteOnExit) {
+      deleteOnExit.add(f);
+    }
+    return true;
+  }
+
+  /**
+   * Delete all files that were marked as delete-on-exit. This recursively
+   * deletes all files in the specified paths.
+   */
+  protected void processDeleteOnExit() {
+    synchronized (deleteOnExit) {
+      for (Iterator<Path> iter = deleteOnExit.iterator(); iter.hasNext();) {
+        Path path = iter.next();
+        try {
+          delete(path, true);
+        }
+        catch (IOException e) {
+          LOG.info("Ignoring failure to deleteOnExit for path " + path);
+        }
+        iter.remove();
+      }
+    }
+  }
+  
+  /** Check if exists.
+   * @param f source file
+   */
+  public boolean exists(Path f) throws IOException {
+    try {
+      return getFileStatus(f) != null;
+    } catch (FileNotFoundException e) {
+      return false;
+    }
+  }
+
+  /** True iff the named path is a directory.
+   * Note: Avoid using this method. Instead reuse the FileStatus 
+   * returned by getFileStatus() or listStatus() methods.
+   */
+  public boolean isDirectory(Path f) throws IOException {
+    try {
+      return getFileStatus(f).isDir();
+    } catch (FileNotFoundException e) {
+      return false;               // f does not exist
+    }
+  }
+
+  /** True iff the named path is a regular file.
+   * Note: Avoid using this method. Instead reuse the FileStatus 
+   * returned by getFileStatus() or listStatus() methods.
+   */
+  public boolean isFile(Path f) throws IOException {
+    try {
+      return !getFileStatus(f).isDir();
+    } catch (FileNotFoundException e) {
+      return false;               // f does not exist
+    }
+  }
+    
+  /** Return the {@link ContentSummary} of a given {@link Path}. */
+  public ContentSummary getContentSummary(Path f) throws IOException {
+    FileStatus status = getFileStatus(f);
+    if (!status.isDir()) {
+      // f is a file
+      return new ContentSummary(status.getLen(), 1, 0);
+    }
+    // f is a directory
+    long[] summary = {0, 0, 1};
+    for(FileStatus s : listStatus(f)) {
+      ContentSummary c = s.isDir() ? getContentSummary(s.getPath()) :
+                                     new ContentSummary(s.getLen(), 1, 0);
+      summary[0] += c.getLength();
+      summary[1] += c.getFileCount();
+      summary[2] += c.getDirectoryCount();
+    }
+    return new ContentSummary(summary[0], summary[1], summary[2]);
+  }
+
+  final private static PathFilter DEFAULT_FILTER = new PathFilter() {
+      public boolean accept(Path file) {
+        return true;
+      }     
+    };
+    
+  /**
+   * List the statuses of the files/directories in the given path if the path is
+   * a directory.
+   * 
+   * @param f
+   *          given path
+   * @return the statuses of the files/directories in the given patch
+   * @throws IOException
+   */
+  public abstract FileStatus[] listStatus(Path f) throws IOException;
+    
+  /*
+   * Filter files/directories in the given path using the user-supplied path
+   * filter. Results are added to the given array <code>results</code>.
+   */
+  private void listStatus(ArrayList<FileStatus> results, Path f,
+      PathFilter filter) throws IOException {
+    FileStatus listing[] = listStatus(f);
+    if (listing != null) {
+      for (int i = 0; i < listing.length; i++) {
+        if (filter.accept(listing[i].getPath())) {
+          results.add(listing[i]);
+        }
+      }
+    }
+  }
+
+  /**
+   * Filter files/directories in the given path using the user-supplied path
+   * filter.
+   * 
+   * @param f
+   *          a path name
+   * @param filter
+   *          the user-supplied path filter
+   * @return an array of FileStatus objects for the files under the given path
+   *         after applying the filter
+   * @throws IOException
+   *           if encounter any problem while fetching the status
+   */
+  public FileStatus[] listStatus(Path f, PathFilter filter) throws IOException {
+    ArrayList<FileStatus> results = new ArrayList<FileStatus>();
+    listStatus(results, f, filter);
+    return results.toArray(new FileStatus[results.size()]);
+  }
+
+  /**
+   * Filter files/directories in the given list of paths using default
+   * path filter.
+   * 
+   * @param files
+   *          a list of paths
+   * @return a list of statuses for the files under the given paths after
+   *         applying the filter default Path filter
+   * @exception IOException
+   */
+  public FileStatus[] listStatus(Path[] files)
+      throws IOException {
+    return listStatus(files, DEFAULT_FILTER);
+  }
+
+  /**
+   * Filter files/directories in the given list of paths using user-supplied
+   * path filter.
+   * 
+   * @param files
+   *          a list of paths
+   * @param filter
+   *          the user-supplied path filter
+   * @return a list of statuses for the files under the given paths after
+   *         applying the filter
+   * @exception IOException
+   */
+  public FileStatus[] listStatus(Path[] files, PathFilter filter)
+      throws IOException {
+    ArrayList<FileStatus> results = new ArrayList<FileStatus>();
+    for (int i = 0; i < files.length; i++) {
+      listStatus(results, files[i], filter);
+    }
+    return results.toArray(new FileStatus[results.size()]);
+  }
+
+  /**
+   * <p>Return all the files that match filePattern and are not checksum
+   * files. Results are sorted by their names.
+   * 
+   * <p>
+   * A filename pattern is composed of <i>regular</i> characters and
+   * <i>special pattern matching</i> characters, which are:
+   *
+   * <dl>
+   *  <dd>
+   *   <dl>
+   *    <p>
+   *    <dt> <tt> ? </tt>
+   *    <dd> Matches any single character.
+   *
+   *    <p>
+   *    <dt> <tt> * </tt>
+   *    <dd> Matches zero or more characters.
+   *
+   *    <p>
+   *    <dt> <tt> [<i>abc</i>] </tt>
+   *    <dd> Matches a single character from character set
+   *     <tt>{<i>a,b,c</i>}</tt>.
+   *
+   *    <p>
+   *    <dt> <tt> [<i>a</i>-<i>b</i>] </tt>
+   *    <dd> Matches a single character from the character range
+   *     <tt>{<i>a...b</i>}</tt>.  Note that character <tt><i>a</i></tt> must be
+   *     lexicographically less than or equal to character <tt><i>b</i></tt>.
+   *
+   *    <p>
+   *    <dt> <tt> [^<i>a</i>] </tt>
+   *    <dd> Matches a single character that is not from character set or range
+   *     <tt>{<i>a</i>}</tt>.  Note that the <tt>^</tt> character must occur
+   *     immediately to the right of the opening bracket.
+   *
+   *    <p>
+   *    <dt> <tt> \<i>c</i> </tt>
+   *    <dd> Removes (escapes) any special meaning of character <i>c</i>.
+   *
+   *    <p>
+   *    <dt> <tt> {ab,cd} </tt>
+   *    <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt>
+   *    
+   *    <p>
+   *    <dt> <tt> {ab,c{de,fh}} </tt>
+   *    <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt>
+   *
+   *   </dl>
+   *  </dd>
+   * </dl>
+   *
+   * @param pathPattern a regular expression specifying a pth pattern
+
+   * @return an array of paths that match the path pattern
+   * @throws IOException
+   */
+  public FileStatus[] globStatus(Path pathPattern) throws IOException {
+    return globStatus(pathPattern, DEFAULT_FILTER);
+  }
+  
+  /**
+   * Return an array of FileStatus objects whose path names match pathPattern
+   * and is accepted by the user-supplied path filter. Results are sorted by
+   * their path names.
+   * Return null if pathPattern has no glob and the path does not exist.
+   * Return an empty array if pathPattern has a glob and no path matches it. 
+   * 
+   * @param pathPattern
+   *          a regular expression specifying the path pattern
+   * @param filter
+   *          a user-supplied path filter
+   * @return an array of FileStatus objects
+   * @throws IOException if any I/O error occurs when fetching file status
+   */
+  public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
+      throws IOException {
+    String filename = pathPattern.toUri().getPath();
+    List<String> filePatterns = GlobExpander.expand(filename);
+    if (filePatterns.size() == 1) {
+      return globStatusInternal(pathPattern, filter);
+    } else {
+      List<FileStatus> results = new ArrayList<FileStatus>();
+      for (String filePattern : filePatterns) {
+        FileStatus[] files = globStatusInternal(new Path(filePattern), filter);
+        for (FileStatus file : files) {
+          results.add(file);
+        }
+      }
+      return results.toArray(new FileStatus[results.size()]);
+    }
+  }
+
+  private FileStatus[] globStatusInternal(Path pathPattern, PathFilter filter)
+      throws IOException {
+    Path[] parents = new Path[1];
+    int level = 0;
+    String filename = pathPattern.toUri().getPath();
+    
+    // path has only zero component
+    if ("".equals(filename) || Path.SEPARATOR.equals(filename)) {
+      return getFileStatus(new Path[]{pathPattern});
+    }
+
+    // path has at least one component
+    String[] components = filename.split(Path.SEPARATOR);
+    // get the first component
+    if (pathPattern.isAbsolute()) {
+      parents[0] = new Path(Path.SEPARATOR);
+      level = 1;
+    } else {
+      parents[0] = new Path(Path.CUR_DIR);
+    }
+
+    // glob the paths that match the parent path, i.e., [0, components.length-1]
+    boolean[] hasGlob = new boolean[]{false};
+    Path[] parentPaths = globPathsLevel(parents, components, level, hasGlob);
+    FileStatus[] results;
+    if (parentPaths == null || parentPaths.length == 0) {
+      results = null;
+    } else {
+      // Now work on the last component of the path
+      GlobFilter fp = new GlobFilter(components[components.length - 1], filter);
+      if (fp.hasPattern()) { // last component has a pattern
+        // list parent directories and then glob the results
+        results = listStatus(parentPaths, fp);
+        hasGlob[0] = true;
+      } else { // last component does not have a pattern
+        // get all the path names
+        ArrayList<Path> filteredPaths = new ArrayList<Path>(parentPaths.length);
+        for (int i = 0; i < parentPaths.length; i++) {
+          parentPaths[i] = new Path(parentPaths[i],
+            components[components.length - 1]);
+          if (fp.accept(parentPaths[i])) {
+            filteredPaths.add(parentPaths[i]);
+          }
+        }
+        // get all their statuses
+        results = getFileStatus(
+            filteredPaths.toArray(new Path[filteredPaths.size()]));
+      }
+    }
+
+    // Decide if the pathPattern contains a glob or not
+    if (results == null) {
+      if (hasGlob[0]) {
+        results = new FileStatus[0];
+      }
+    } else {
+      if (results.length == 0 ) {
+        if (!hasGlob[0]) {
+          results = null;
+        }
+      } else {
+        Arrays.sort(results);
+      }
+    }
+    return results;
+  }
+
+  /*
+   * For a path of N components, return a list of paths that match the
+   * components [<code>level</code>, <code>N-1</code>].
+   */
+  private Path[] globPathsLevel(Path[] parents, String[] filePattern,
+      int level, boolean[] hasGlob) throws IOException {
+    if (level == filePattern.length - 1)
+      return parents;
+    if (parents == null || parents.length == 0) {
+      return null;
+    }
+    GlobFilter fp = new GlobFilter(filePattern[level]);
+    if (fp.hasPattern()) {
+      parents = FileUtil.stat2Paths(listStatus(parents, fp));
+      hasGlob[0] = true;
+    } else {
+      for (int i = 0; i < parents.length; i++) {
+        parents[i] = new Path(parents[i], filePattern[level]);
+      }
+    }
+    return globPathsLevel(parents, filePattern, level + 1, hasGlob);
+  }
+
+  /* A class that could decide if a string matches the glob or not */
+  private static class GlobFilter implements PathFilter {
+    private PathFilter userFilter = DEFAULT_FILTER;
+    private Pattern regex;
+    private boolean hasPattern = false;
+      
+    /** Default pattern character: Escape any special meaning. */
+    private static final char  PAT_ESCAPE = '\\';
+    /** Default pattern character: Any single character. */
+    private static final char  PAT_ANY = '.';
+    /** Default pattern character: Character set close. */
+    private static final char  PAT_SET_CLOSE = ']';
+      
+    GlobFilter() {
+    }
+      
+    GlobFilter(String filePattern) throws IOException {
+      setRegex(filePattern);
+    }
+      
+    GlobFilter(String filePattern, PathFilter filter) throws IOException {
+      userFilter = filter;
+      setRegex(filePattern);
+    }
+      
+    private boolean isJavaRegexSpecialChar(char pChar) {
+      return pChar == '.' || pChar == '$' || pChar == '(' || pChar == ')' ||
+             pChar == '|' || pChar == '+';
+    }
+    void setRegex(String filePattern) throws IOException {
+      int len;
+      int setOpen;
+      int curlyOpen;
+      boolean setRange;
+
+      StringBuilder fileRegex = new StringBuilder();
+
+      // Validate the pattern
+      len = filePattern.length();
+      if (len == 0)
+        return;
+
+      setOpen = 0;
+      setRange = false;
+      curlyOpen = 0;
+
+      for (int i = 0; i < len; i++) {
+        char pCh;
+          
+        // Examine a single pattern character
+        pCh = filePattern.charAt(i);
+        if (pCh == PAT_ESCAPE) {
+          fileRegex.append(pCh);
+          i++;
+          if (i >= len)
+            error("An escaped character does not present", filePattern, i);
+          pCh = filePattern.charAt(i);
+        } else if (isJavaRegexSpecialChar(pCh)) {
+          fileRegex.append(PAT_ESCAPE);
+        } else if (pCh == '*') {
+          fileRegex.append(PAT_ANY);
+          hasPattern = true;
+        } else if (pCh == '?') {
+          pCh = PAT_ANY;
+          hasPattern = true;
+        } else if (pCh == '{') {
+          fileRegex.append('(');
+          pCh = '(';
+          curlyOpen++;
+          hasPattern = true;
+        } else if (pCh == ',' && curlyOpen > 0) {
+          fileRegex.append(")|");
+          pCh = '(';
+        } else if (pCh == '}' && curlyOpen > 0) {
+          // End of a group
+          curlyOpen--;
+          fileRegex.append(")");
+          pCh = ')';
+        } else if (pCh == '[' && setOpen == 0) {
+          setOpen++;
+          hasPattern = true;
+        } else if (pCh == '^' && setOpen > 0) {
+        } else if (pCh == '-' && setOpen > 0) {
+          // Character set range
+          setRange = true;
+        } else if (pCh == PAT_SET_CLOSE && setRange) {
+          // Incomplete character set range
+          error("Incomplete character set range", filePattern, i);
+        } else if (pCh == PAT_SET_CLOSE && setOpen > 0) {
+          // End of a character set
+          if (setOpen < 2)
+            error("Unexpected end of set", filePattern, i);
+          setOpen = 0;
+        } else if (setOpen > 0) {
+          // Normal character, or the end of a character set range
+          setOpen++;
+          setRange = false;
+        }
+        fileRegex.append(pCh);
+      }
+        
+      // Check for a well-formed pattern
+      if (setOpen > 0 || setRange || curlyOpen > 0) {
+        // Incomplete character set or character range
+        error("Expecting set closure character or end of range, or }", 
+            filePattern, len);
+      }
+      regex = Pattern.compile(fileRegex.toString());
+    }
+      
+    boolean hasPattern() {
+      return hasPattern;
+    }
+      
+    public boolean accept(Path path) {
+      return regex.matcher(path.getName()).matches() && userFilter.accept(path);
+    }
+      
+    private void error(String s, String pattern, int pos) throws IOException {
+      throw new IOException("Illegal file pattern: "
+                            +s+ " for glob "+ pattern + " at " + pos);
+    }
+  }
+    
+  /** Return the current user's home directory in this filesystem.
+   * The default implementation returns "/user/$USER/".
+   */
+  public Path getHomeDirectory() {
+    return new Path("/user/"+System.getProperty("user.name"))
+      .makeQualified(this);
+  }
+
+
+  /**
+   * Set the current working directory for the given file system. All relative
+   * paths will be resolved relative to it.
+   * 
+   * @param new_dir
+   */
+  public abstract void setWorkingDirectory(Path new_dir);
+    
+  /**
+   * Get the current working directory for the given file system
+   * @return the directory pathname
+   */
+  public abstract Path getWorkingDirectory();
+
+  /**
+   * Call {@link #mkdirs(Path, FsPermission)} with default permission.
+   */
+  public boolean mkdirs(Path f) throws IOException {
+    return mkdirs(f, FsPermission.getDefault());
+  }
+
+  /**
+   * Make the given file and all non-existent parents into
+   * directories. Has the semantics of Unix 'mkdir -p'.
+   * Existence of the directory hierarchy is not an error.
+   */
+  public abstract boolean mkdirs(Path f, FsPermission permission
+      ) throws IOException;
+
+  /**
+   * The src file is on the local disk.  Add it to FS at
+   * the given dst name and the source is kept intact afterwards
+   */
+  public void copyFromLocalFile(Path src, Path dst)
+    throws IOException {
+    copyFromLocalFile(false, src, dst);
+  }
+
+  /**
+   * The src files is on the local disk.  Add it to FS at
+   * the given dst name, removing the source afterwards.
+   */
+  public void moveFromLocalFile(Path[] srcs, Path dst)
+    throws IOException {
+    copyFromLocalFile(true, true, srcs, dst);
+  }
+
+  /**
+   * The src file is on the local disk.  Add it to FS at
+   * the given dst name, removing the source afterwards.
+   */
+  public void moveFromLocalFile(Path src, Path dst)
+    throws IOException {
+    copyFromLocalFile(true, src, dst);
+  }
+
+  /**
+   * The src file is on the local disk.  Add it to FS at
+   * the given dst name.
+   * delSrc indicates if the source should be removed
+   */
+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+    copyFromLocalFile(delSrc, true, src, dst);
+  }
+  
+  /**
+   * The src files are on the local disk.  Add it to FS at
+   * the given dst name.
+   * delSrc indicates if the source should be removed
+   */
+  public void copyFromLocalFile(boolean delSrc, boolean overwrite, 
+                                Path[] srcs, Path dst)
+    throws IOException {
+    Configuration conf = getConf();
+    FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf);
+  }
+  
+  /**
+   * The src file is on the local disk.  Add it to FS at
+   * the given dst name.
+   * delSrc indicates if the source should be removed
+   */
+  public void copyFromLocalFile(boolean delSrc, boolean overwrite, 
+                                Path src, Path dst)
+    throws IOException {
+    Configuration conf = getConf();
+    FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf);
+  }
+    
+  /**
+   * The src file is under FS, and the dst is on the local disk.
+   * Copy it from FS control to the local dst name.
+   */
+  public void copyToLocalFile(Path src, Path dst) throws IOException {
+    copyToLocalFile(false, src, dst);
+  }
+    
+  /**
+   * The src file is under FS, and the dst is on the local disk.
+   * Copy it from FS control to the local dst name.
+   * Remove the source afterwards
+   */
+  public void moveToLocalFile(Path src, Path dst) throws IOException {
+    copyToLocalFile(true, src, dst);
+  }
+
+  /**
+   * The src file is under FS, and the dst is on the local disk.
+   * Copy it from FS control to the local dst name.
+   * delSrc indicates if the src will be removed or not.
+   */   
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+    FileUtil.copy(this, src, getLocal(getConf()), dst, delSrc, getConf());
+  }
+
+  /**
+   * Returns a local File that the user can write output to.  The caller
+   * provides both the eventual FS target name and the local working
+   * file.  If the FS is local, we write directly into the target.  If
+   * the FS is remote, we write into the tmp local area.
+   */
+  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+    return tmpLocalFile;
+  }
+
+  /**
+   * Called when we're all done writing to the target.  A local FS will
+   * do nothing, because we've written to exactly the right place.  A remote
+   * FS will copy the contents of tmpLocalFile to the correct target at
+   * fsOutputFile.
+   */
+  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+    moveFromLocalFile(tmpLocalFile, fsOutputFile);
+  }
+
+  /**
+   * No more filesystem operations are needed.  Will
+   * release any held locks.
+   */
+  public void close() throws IOException {
+    // delete all files that were marked as delete-on-exit.
+    processDeleteOnExit();
+    CACHE.remove(this.key, this);
+  }
+
+  /** Return the total size of all files in the filesystem.*/
+  public long getUsed() throws IOException{
+    long used = 0;
+    FileStatus[] files = listStatus(new Path("/"));
+    for(FileStatus file:files){
+      used += file.getLen();
+    }
+    return used;
+  }
+
+  /** Return the number of bytes that large input files should be optimally
+   * be split into to minimize i/o time. */
+  public long getDefaultBlockSize() {
+    // default to 32MB: large enough to minimize the impact of seeks
+    return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024);
+  }
+    
+  /**
+   * Get the default replication.
+   */
+  public short getDefaultReplication() { return 1; }
+
+  /**
+   * Return a file status object that represents the path.
+   * @param f The path we want information from
+   * @return a FileStatus object
+   * @throws FileNotFoundException when the path does not exist;
+   *         IOException see specific implementation
+   */
+  public abstract FileStatus getFileStatus(Path f) throws IOException;
+
+  /**
+   * Get the checksum of a file.
+   *
+   * @param f The file path
+   * @return The file checksum.  The default return value is null,
+   *  which indicates that no checksum algorithm is implemented
+   *  in the corresponding FileSystem.
+   */
+  public FileChecksum getFileChecksum(Path f) throws IOException {
+    return null;
+  }
+  
+  /**
+   * Set the verify checksum flag. This is only applicable if the 
+   * corresponding FileSystem supports checksum. By default doesn't do anything.
+   * @param verifyChecksum
+   */
+  public void setVerifyChecksum(boolean verifyChecksum) {
+    //doesn't do anything
+  }
+
+  /**
+   * Return a list of file status objects that corresponds to the list of paths
+   * excluding those non-existent paths.
+   * 
+   * @param paths
+   *          the list of paths we want information from
+   * @return a list of FileStatus objects
+   * @throws IOException
+   *           see specific implementation
+   */
+  private FileStatus[] getFileStatus(Path[] paths) throws IOException {
+    if (paths == null) {
+      return null;
+    }
+    ArrayList<FileStatus> results = new ArrayList<FileStatus>(paths.length);
+    for (int i = 0; i < paths.length; i++) {
+      try {
+        results.add(getFileStatus(paths[i]));
+      } catch (FileNotFoundException e) { // do nothing
+      }
+    }
+    return results.toArray(new FileStatus[results.size()]);
+  }
+  
+  /**
+   * Returns a status object describing the use and capacity of the
+   * file system. If the file system has multiple partitions, the
+   * use and capacity of the root partition is reflected.
+   * 
+   * @return a FsStatus object
+   * @throws IOException
+   *           see specific implementation
+   */
+  public FsStatus getStatus() throws IOException {
+    return getStatus(null);
+  }
+
+  /**
+   * Returns a status object describing the use and capacity of the
+   * file system. If the file system has multiple partitions, the
+   * use and capacity of the partition pointed to by the specified
+   * path is reflected.
+   * @param p Path for which status should be obtained. null means
+   * the default partition. 
+   * @return a FsStatus object
+   * @throws IOException
+   *           see specific implementation
+   */
+  public FsStatus getStatus(Path p) throws IOException {
+    return new FsStatus(Long.MAX_VALUE, 0, Long.MAX_VALUE);
+  }
+
+  /**
+   * Set permission of a path.
+   * @param p
+   * @param permission
+   */
+  public void setPermission(Path p, FsPermission permission
+      ) throws IOException {
+  }
+
+  /**
+   * Set owner of a path (i.e. a file or a directory).
+   * The parameters username and groupname cannot both be null.
+   * @param p The path
+   * @param username If it is null, the original username remains unchanged.
+   * @param groupname If it is null, the original groupname remains unchanged.
+   */
+  public void setOwner(Path p, String username, String groupname
+      ) throws IOException {
+  }
+
+  /**
+   * Set access time of a file
+   * @param p The path
+   * @param mtime Set the modification time of this file.
+   *              The number of milliseconds since Jan 1, 1970. 
+   *              A value of -1 means that this call should not set modification time.
+   * @param atime Set the access time of this file.
+   *              The number of milliseconds since Jan 1, 1970. 
+   *              A value of -1 means that this call should not set access time.
+   */
+  public void setTimes(Path p, long mtime, long atime
+      ) throws IOException {
+  }
+
+  private static FileSystem createFileSystem(URI uri, Configuration conf
+      ) throws IOException {
+    Class<?> clazz = conf.getClass("fs." + uri.getScheme() + ".impl", null);
+    if (clazz == null) {
+      throw new IOException("No FileSystem for scheme: " + uri.getScheme());
+    }
+    FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf);
+    fs.initialize(uri, conf);
+    return fs;
+  }
+
+  /** Caching FileSystem objects */
+  static class Cache {
+    private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>();
+
+    /** A variable that makes all objects in the cache unique */
+    private static AtomicLong unique = new AtomicLong(1);
+
+    synchronized FileSystem get(URI uri, Configuration conf) throws IOException{
+      Key key = new Key(uri, conf);
+      return getInternal(uri, conf, key);
+    }
+
+    /** The objects inserted into the cache using this method are all unique */
+    synchronized FileSystem getUnique(URI uri, Configuration conf) throws IOException{
+      Key key = new Key(uri, conf, unique.getAndIncrement());
+      return getInternal(uri, conf, key);
+    }
+
+    private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{
+      FileSystem fs = map.get(key);
+      if (fs == null) {
+        fs = createFileSystem(uri, conf);
+        if (map.isEmpty() && !clientFinalizer.isAlive()) {
+          Runtime.getRuntime().addShutdownHook(clientFinalizer);
+        }
+        fs.key = key;
+        map.put(key, fs);
+      }
+      return fs;
+    }
+
+    synchronized void remove(Key key, FileSystem fs) {
+      if (map.containsKey(key) && fs == map.get(key)) {
+        map.remove(key);
+        if (map.isEmpty() && !clientFinalizer.isAlive()) {
+          if (!Runtime.getRuntime().removeShutdownHook(clientFinalizer)) {
+            LOG.info("Could not cancel cleanup thread, though no " +
+                     "FileSystems are open");
+          }
+        }
+      }
+    }
+
+    synchronized void closeAll() throws IOException {
+      List<IOException> exceptions = new ArrayList<IOException>();
+      for(; !map.isEmpty(); ) {
+        Map.Entry<Key, FileSystem> e = map.entrySet().iterator().next();
+        final Key key = e.getKey();
+        final FileSystem fs = e.getValue();
+
+        //remove from cache
+        remove(key, fs);
+
+        if (fs != null) {
+          try {
+            fs.close();
+          }
+          catch(IOException ioe) {
+            exceptions.add(ioe);
+          }
+        }
+      }
+
+      if (!exceptions.isEmpty()) {
+        throw MultipleIOException.createIOException(exceptions);
+      }
+    }
+
+    /** FileSystem.Cache.Key */
+    static class Key {
+      final String scheme;
+      final String authority;
+      final String username;
+      final long unique;   // an artificial way to make a key unique
+
+      Key(URI uri, Configuration conf) throws IOException {
+        this(uri, conf, 0);
+      }
+
+      Key(URI uri, Configuration conf, long unique) throws IOException {
+        scheme = uri.getScheme()==null?"":uri.getScheme().toLowerCase();
+        authority = uri.getAuthority()==null?"":uri.getAuthority().toLowerCase();
+        this.unique = unique;
+        UserGroupInformation ugi = UserGroupInformation.readFrom(conf);
+        if (ugi == null) {
+          try {
+            ugi = UserGroupInformation.login(conf);
+          } catch(LoginException e) {
+            LOG.warn("uri=" + uri, e);
+          }
+        }
+        username = ugi == null? null: ugi.getUserName();
+      }
+
+      /** {@inheritDoc} */
+      public int hashCode() {
+        return (scheme + authority + username).hashCode() + (int)unique;
+      }
+
+      static boolean isEqual(Object a, Object b) {
+        return a == b || (a != null && a.equals(b));        
+      }
+
+      /** {@inheritDoc} */
+      public boolean equals(Object obj) {
+        if (obj == this) {
+          return true;
+        }
+        if (obj != null && obj instanceof Key) {
+          Key that = (Key)obj;
+          return isEqual(this.scheme, that.scheme)
+                 && isEqual(this.authority, that.authority)
+                 && isEqual(this.username, that.username)
+                 && (this.unique == that.unique);
+        }
+        return false;        
+      }
+
+      /** {@inheritDoc} */
+      public String toString() {
+        return username + "@" + scheme + "://" + authority;        
+      }
+    }
+  }
+  
+  public static final class Statistics {
+    private final String scheme;
+    private AtomicLong bytesRead = new AtomicLong();
+    private AtomicLong bytesWritten = new AtomicLong();
+    
+    public Statistics(String scheme) {
+      this.scheme = scheme;
+    }
+
+    /**
+     * Increment the bytes read in the statistics
+     * @param newBytes the additional bytes read
+     */
+    public void incrementBytesRead(long newBytes) {
+      bytesRead.getAndAdd(newBytes);
+    }
+    
+    /**
+     * Increment the bytes written in the statistics
+     * @param newBytes the additional bytes written
+     */
+    public void incrementBytesWritten(long newBytes) {
+      bytesWritten.getAndAdd(newBytes);
+    }
+    
+    /**
+     * Get the total number of bytes read
+     * @return the number of bytes
+     */
+    public long getBytesRead() {
+      return bytesRead.get();
+    }
+    
+    /**
+     * Get the total number of bytes written
+     * @return the number of bytes
+     */
+    public long getBytesWritten() {
+      return bytesWritten.get();
+    }
+    
+    public String toString() {
+      return bytesRead + " bytes read and " + bytesWritten + 
+             " bytes written";
+    }
+    
+    /**
+     * Reset the counts of bytes to 0.
+     */
+    public void reset() {
+      bytesWritten.set(0);
+      bytesRead.set(0);
+    }
+    
+    /**
+     * Get the uri scheme associated with this statistics object.
+     * @return the schema associated with this set of statistics
+     */
+    public String getScheme() {
+      return scheme;
+    }
+  }
+  
+  /**
+   * Get the Map of Statistics object indexed by URI Scheme.
+   * @return a Map having a key as URI scheme and value as Statistics object
+   * @deprecated use {@link #getAllStatistics} instead
+   */
+  @Deprecated
+  public static synchronized Map<String, Statistics> getStatistics() {
+    Map<String, Statistics> result = new HashMap<String, Statistics>();
+    for(Statistics stat: statisticsTable.values()) {
+      result.put(stat.getScheme(), stat);
+    }
+    return result;
+  }
+
+  /**
+   * Return the FileSystem classes that have Statistics
+   */
+  public static synchronized List<Statistics> getAllStatistics() {
+    return new ArrayList<Statistics>(statisticsTable.values());
+  }
+  
+  /**
+   * Get the statistics for a particular file system
+   * @param cls the class to lookup
+   * @return a statistics object
+   */
+  public static synchronized 
+  Statistics getStatistics(String scheme, Class<? extends FileSystem> cls) {
+    Statistics result = statisticsTable.get(cls);
+    if (result == null) {
+      result = new Statistics(scheme);
+      statisticsTable.put(cls, result);
+    }
+    return result;
+  }
+  
+  public static synchronized void clearStatistics() {
+    for(Statistics stat: statisticsTable.values()) {
+      stat.reset();
+    }
+  }
+
+  public static synchronized
+  void printStatistics() throws IOException {
+    for (Map.Entry<Class<? extends FileSystem>, Statistics> pair: 
+            statisticsTable.entrySet()) {
+      System.out.println("  FileSystem " + pair.getKey().getName() + 
+                         ": " + pair.getValue());
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FileUtil.java b/src/java/org/apache/hadoop/fs/FileUtil.java
new file mode 100644
index 00000000000..d1b1d0b89f8
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FileUtil.java
@@ -0,0 +1,794 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.*;
+import java.util.Enumeration;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.util.Shell.ShellCommandExecutor;
+import org.mortbay.log.Log;
+
+/**
+ * A collection of file-processing util methods
+ */
+public class FileUtil {
+  /**
+   * convert an array of FileStatus to an array of Path
+   * 
+   * @param stats
+   *          an array of FileStatus objects
+   * @return an array of paths corresponding to the input
+   */
+  public static Path[] stat2Paths(FileStatus[] stats) {
+    if (stats == null)
+      return null;
+    Path[] ret = new Path[stats.length];
+    for (int i = 0; i < stats.length; ++i) {
+      ret[i] = stats[i].getPath();
+    }
+    return ret;
+  }
+
+  /**
+   * convert an array of FileStatus to an array of Path.
+   * If stats if null, return path
+   * @param stats
+   *          an array of FileStatus objects
+   * @param path
+   *          default path to return in stats is null
+   * @return an array of paths corresponding to the input
+   */
+  public static Path[] stat2Paths(FileStatus[] stats, Path path) {
+    if (stats == null)
+      return new Path[]{path};
+    else
+      return stat2Paths(stats);
+  }
+  
+  /**
+   * Delete a directory and all its contents.  If
+   * we return false, the directory may be partially-deleted.
+   */
+  public static boolean fullyDelete(File dir) throws IOException {
+    File contents[] = dir.listFiles();
+    if (contents != null) {
+      for (int i = 0; i < contents.length; i++) {
+        if (contents[i].isFile()) {
+          if (!contents[i].delete()) {
+            return false;
+          }
+        } else {
+          //try deleting the directory
+          // this might be a symlink
+          boolean b = false;
+          b = contents[i].delete();
+          if (b){
+            //this was indeed a symlink or an empty directory
+            continue;
+          }
+          // if not an empty directory or symlink let
+          // fullydelete handle it.
+          if (!fullyDelete(contents[i])) {
+            return false;
+          }
+        }
+      }
+    }
+    return dir.delete();
+  }
+
+  /**
+   * Recursively delete a directory.
+   * 
+   * @param fs {@link FileSystem} on which the path is present
+   * @param dir directory to recursively delete 
+   * @throws IOException
+   * @deprecated Use {@link FileSystem#delete(Path, boolean)}
+   */
+  @Deprecated
+  public static void fullyDelete(FileSystem fs, Path dir) 
+  throws IOException {
+    fs.delete(dir, true);
+  }
+
+  //
+  // If the destination is a subdirectory of the source, then
+  // generate exception
+  //
+  private static void checkDependencies(FileSystem srcFS, 
+                                        Path src, 
+                                        FileSystem dstFS, 
+                                        Path dst)
+                                        throws IOException {
+    if (srcFS == dstFS) {
+      String srcq = src.makeQualified(srcFS).toString() + Path.SEPARATOR;
+      String dstq = dst.makeQualified(dstFS).toString() + Path.SEPARATOR;
+      if (dstq.startsWith(srcq)) {
+        if (srcq.length() == dstq.length()) {
+          throw new IOException("Cannot copy " + src + " to itself.");
+        } else {
+          throw new IOException("Cannot copy " + src + " to its subdirectory " +
+                                dst);
+        }
+      }
+    }
+  }
+
+  /** Copy files between FileSystems. */
+  public static boolean copy(FileSystem srcFS, Path src, 
+                             FileSystem dstFS, Path dst, 
+                             boolean deleteSource,
+                             Configuration conf) throws IOException {
+    return copy(srcFS, src, dstFS, dst, deleteSource, true, conf);
+  }
+
+  public static boolean copy(FileSystem srcFS, Path[] srcs, 
+                             FileSystem dstFS, Path dst,
+                             boolean deleteSource, 
+                             boolean overwrite, Configuration conf)
+                             throws IOException {
+    boolean gotException = false;
+    boolean returnVal = true;
+    StringBuffer exceptions = new StringBuffer();
+
+    if (srcs.length == 1)
+      return copy(srcFS, srcs[0], dstFS, dst, deleteSource, overwrite, conf);
+
+    // Check if dest is directory
+    if (!dstFS.exists(dst)) {
+      throw new IOException("`" + dst +"': specified destination directory " +
+                            "doest not exist");
+    } else {
+      FileStatus sdst = dstFS.getFileStatus(dst);
+      if (!sdst.isDir()) 
+        throw new IOException("copying multiple files, but last argument `" +
+                              dst + "' is not a directory");
+    }
+
+    for (Path src : srcs) {
+      try {
+        if (!copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf))
+          returnVal = false;
+      } catch (IOException e) {
+        gotException = true;
+        exceptions.append(e.getMessage());
+        exceptions.append("\n");
+      }
+    }
+    if (gotException) {
+      throw new IOException(exceptions.toString());
+    }
+    return returnVal;
+  }
+
+  /** Copy files between FileSystems. */
+  public static boolean copy(FileSystem srcFS, Path src, 
+                             FileSystem dstFS, Path dst, 
+                             boolean deleteSource,
+                             boolean overwrite,
+                             Configuration conf) throws IOException {
+    FileStatus fileStatus = srcFS.getFileStatus(src);
+    return copy(srcFS, fileStatus, dstFS, dst, deleteSource, overwrite, conf);
+  }
+
+  /** Copy files between FileSystems. */
+  private static boolean copy(FileSystem srcFS, FileStatus srcStatus,
+                              FileSystem dstFS, Path dst,
+                              boolean deleteSource,
+                              boolean overwrite,
+                              Configuration conf) throws IOException {
+    Path src = srcStatus.getPath();
+    dst = checkDest(src.getName(), dstFS, dst, overwrite);
+    if (srcStatus.isDir()) {
+      checkDependencies(srcFS, src, dstFS, dst);
+      if (!dstFS.mkdirs(dst)) {
+        return false;
+      }
+      FileStatus contents[] = srcFS.listStatus(src);
+      for (int i = 0; i < contents.length; i++) {
+        copy(srcFS, contents[i], dstFS,
+             new Path(dst, contents[i].getPath().getName()),
+             deleteSource, overwrite, conf);
+      }
+    } else {
+      InputStream in=null;
+      OutputStream out = null;
+      try {
+        in = srcFS.open(src);
+        out = dstFS.create(dst, overwrite);
+        IOUtils.copyBytes(in, out, conf, true);
+      } catch (IOException e) {
+        IOUtils.closeStream(out);
+        IOUtils.closeStream(in);
+        throw e;
+      }
+    }
+    if (deleteSource) {
+      return srcFS.delete(src, true);
+    } else {
+      return true;
+    }
+  
+  }
+
+  /** Copy all files in a directory to one output file (merge). */
+  public static boolean copyMerge(FileSystem srcFS, Path srcDir, 
+                                  FileSystem dstFS, Path dstFile, 
+                                  boolean deleteSource,
+                                  Configuration conf, String addString) throws IOException {
+    dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false);
+
+    if (!srcFS.getFileStatus(srcDir).isDir())
+      return false;
+   
+    OutputStream out = dstFS.create(dstFile);
+    
+    try {
+      FileStatus contents[] = srcFS.listStatus(srcDir);
+      for (int i = 0; i < contents.length; i++) {
+        if (!contents[i].isDir()) {
+          InputStream in = srcFS.open(contents[i].getPath());
+          try {
+            IOUtils.copyBytes(in, out, conf, false);
+            if (addString!=null)
+              out.write(addString.getBytes("UTF-8"));
+                
+          } finally {
+            in.close();
+          } 
+        }
+      }
+    } finally {
+      out.close();
+    }
+    
+
+    if (deleteSource) {
+      return srcFS.delete(srcDir, true);
+    } else {
+      return true;
+    }
+  }  
+  
+  /** Copy local files to a FileSystem. */
+  public static boolean copy(File src,
+                             FileSystem dstFS, Path dst,
+                             boolean deleteSource,
+                             Configuration conf) throws IOException {
+    dst = checkDest(src.getName(), dstFS, dst, false);
+
+    if (src.isDirectory()) {
+      if (!dstFS.mkdirs(dst)) {
+        return false;
+      }
+      File contents[] = src.listFiles();
+      for (int i = 0; i < contents.length; i++) {
+        copy(contents[i], dstFS, new Path(dst, contents[i].getName()),
+             deleteSource, conf);
+      }
+    } else if (src.isFile()) {
+      InputStream in = null;
+      OutputStream out =null;
+      try {
+        in = new FileInputStream(src);
+        out = dstFS.create(dst);
+        IOUtils.copyBytes(in, out, conf);
+      } catch (IOException e) {
+        IOUtils.closeStream( out );
+        IOUtils.closeStream( in );
+        throw e;
+      }
+    } else {
+      throw new IOException(src.toString() + 
+                            ": No such file or directory");
+    }
+    if (deleteSource) {
+      return FileUtil.fullyDelete(src);
+    } else {
+      return true;
+    }
+  }
+
+  /** Copy FileSystem files to local files. */
+  public static boolean copy(FileSystem srcFS, Path src, 
+                             File dst, boolean deleteSource,
+                             Configuration conf) throws IOException {
+    FileStatus filestatus = srcFS.getFileStatus(src);
+    return copy(srcFS, filestatus, dst, deleteSource, conf);
+  }
+
+  /** Copy FileSystem files to local files. */
+  private static boolean copy(FileSystem srcFS, FileStatus srcStatus,
+                              File dst, boolean deleteSource,
+                              Configuration conf) throws IOException {
+    Path src = srcStatus.getPath();
+    if (srcStatus.isDir()) {
+      if (!dst.mkdirs()) {
+        return false;
+      }
+      FileStatus contents[] = srcFS.listStatus(src);
+      for (int i = 0; i < contents.length; i++) {
+        copy(srcFS, contents[i],
+             new File(dst, contents[i].getPath().getName()),
+             deleteSource, conf);
+      }
+    } else {
+      InputStream in = srcFS.open(src);
+      IOUtils.copyBytes(in, new FileOutputStream(dst), conf);
+    }
+    if (deleteSource) {
+      return srcFS.delete(src, true);
+    } else {
+      return true;
+    }
+  }
+
+  private static Path checkDest(String srcName, FileSystem dstFS, Path dst,
+      boolean overwrite) throws IOException {
+    if (dstFS.exists(dst)) {
+      FileStatus sdst = dstFS.getFileStatus(dst);
+      if (sdst.isDir()) {
+        if (null == srcName) {
+          throw new IOException("Target " + dst + " is a directory");
+        }
+        return checkDest(null, dstFS, new Path(dst, srcName), overwrite);
+      } else if (!overwrite) {
+        throw new IOException("Target " + dst + " already exists");
+      }
+    }
+    return dst;
+  }
+
+  /**
+   * This class is only used on windows to invoke the cygpath command.
+   */
+  private static class CygPathCommand extends Shell {
+    String[] command;
+    String result;
+    CygPathCommand(String path) throws IOException {
+      command = new String[]{"cygpath", "-u", path};
+      run();
+    }
+    String getResult() throws IOException {
+      return result;
+    }
+    protected String[] getExecString() {
+      return command;
+    }
+    protected void parseExecResult(BufferedReader lines) throws IOException {
+      String line = lines.readLine();
+      if (line == null) {
+        throw new IOException("Can't convert '" + command[2] + 
+                              " to a cygwin path");
+      }
+      result = line;
+    }
+  }
+
+  /**
+   * Convert a os-native filename to a path that works for the shell.
+   * @param filename The filename to convert
+   * @return The unix pathname
+   * @throws IOException on windows, there can be problems with the subprocess
+   */
+  public static String makeShellPath(String filename) throws IOException {
+    if (Path.WINDOWS) {
+      return new CygPathCommand(filename).getResult();
+    } else {
+      return filename;
+    }    
+  }
+  
+  /**
+   * Convert a os-native filename to a path that works for the shell.
+   * @param file The filename to convert
+   * @return The unix pathname
+   * @throws IOException on windows, there can be problems with the subprocess
+   */
+  public static String makeShellPath(File file) throws IOException {
+    return makeShellPath(file, false);
+  }
+
+  /**
+   * Convert a os-native filename to a path that works for the shell.
+   * @param file The filename to convert
+   * @param makeCanonicalPath 
+   *          Whether to make canonical path for the file passed
+   * @return The unix pathname
+   * @throws IOException on windows, there can be problems with the subprocess
+   */
+  public static String makeShellPath(File file, boolean makeCanonicalPath) 
+  throws IOException {
+    if (makeCanonicalPath) {
+      return makeShellPath(file.getCanonicalPath());
+    } else {
+      return makeShellPath(file.toString());
+    }
+  }
+
+  /**
+   * Takes an input dir and returns the du on that local directory. Very basic
+   * implementation.
+   * 
+   * @param dir
+   *          The input dir to get the disk space of this local dir
+   * @return The total disk space of the input local directory
+   */
+  public static long getDU(File dir) {
+    long size = 0;
+    if (!dir.exists())
+      return 0;
+    if (!dir.isDirectory()) {
+      return dir.length();
+    } else {
+      size = dir.length();
+      File[] allFiles = dir.listFiles();
+      for (int i = 0; i < allFiles.length; i++) {
+        size = size + getDU(allFiles[i]);
+      }
+      return size;
+    }
+  }
+    
+  /**
+   * Given a File input it will unzip the file in a the unzip directory
+   * passed as the second parameter
+   * @param inFile The zip file as input
+   * @param unzipDir The unzip directory where to unzip the zip file.
+   * @throws IOException
+   */
+  public static void unZip(File inFile, File unzipDir) throws IOException {
+    Enumeration<? extends ZipEntry> entries;
+    ZipFile zipFile = new ZipFile(inFile);
+
+    try {
+      entries = zipFile.entries();
+      while (entries.hasMoreElements()) {
+        ZipEntry entry = entries.nextElement();
+        if (!entry.isDirectory()) {
+          InputStream in = zipFile.getInputStream(entry);
+          try {
+            File file = new File(unzipDir, entry.getName());
+            if (!file.getParentFile().mkdirs()) {           
+              if (!file.getParentFile().isDirectory()) {
+                throw new IOException("Mkdirs failed to create " + 
+                                      file.getParentFile().toString());
+              }
+            }
+            OutputStream out = new FileOutputStream(file);
+            try {
+              byte[] buffer = new byte[8192];
+              int i;
+              while ((i = in.read(buffer)) != -1) {
+                out.write(buffer, 0, i);
+              }
+            } finally {
+              out.close();
+            }
+          } finally {
+            in.close();
+          }
+        }
+      }
+    } finally {
+      zipFile.close();
+    }
+  }
+
+  /**
+   * Given a Tar File as input it will untar the file in a the untar directory
+   * passed as the second parameter
+   * 
+   * This utility will untar ".tar" files and ".tar.gz","tgz" files.
+   *  
+   * @param inFile The tar file as input. 
+   * @param untarDir The untar directory where to untar the tar file.
+   * @throws IOException
+   */
+  public static void unTar(File inFile, File untarDir) throws IOException {
+    if (!untarDir.mkdirs()) {           
+      if (!untarDir.isDirectory()) {
+        throw new IOException("Mkdirs failed to create " + untarDir);
+      }
+    }
+
+    StringBuffer untarCommand = new StringBuffer();
+    boolean gzipped = inFile.toString().endsWith("gz");
+    if (gzipped) {
+      untarCommand.append(" gzip -dc '");
+      untarCommand.append(FileUtil.makeShellPath(inFile));
+      untarCommand.append("' | (");
+    } 
+    untarCommand.append("cd '");
+    untarCommand.append(FileUtil.makeShellPath(untarDir)); 
+    untarCommand.append("' ; ");
+    untarCommand.append("tar -xf ");
+    
+    if (gzipped) {
+      untarCommand.append(" -)");
+    } else {
+      untarCommand.append(FileUtil.makeShellPath(inFile));
+    }
+    String[] shellCmd = { "bash", "-c", untarCommand.toString() };
+    ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd);
+    shexec.execute();
+    int exitcode = shexec.getExitCode();
+    if (exitcode != 0) {
+      throw new IOException("Error untarring file " + inFile + 
+                  ". Tar process exited with exit code " + exitcode);
+    }
+  }
+
+  /**
+   * Class for creating hardlinks.
+   * Supports Unix, Cygwin, WindXP.
+   *  
+   */
+  public static class HardLink { 
+    enum OSType {
+      OS_TYPE_UNIX, 
+      OS_TYPE_WINXP,
+      OS_TYPE_SOLARIS,
+      OS_TYPE_MAC; 
+    }
+  
+    private static String[] hardLinkCommand;
+    private static String[] getLinkCountCommand;
+    private static OSType osType;
+    
+    static {
+      osType = getOSType();
+      switch(osType) {
+      case OS_TYPE_WINXP:
+        hardLinkCommand = new String[] {"fsutil","hardlink","create", null, null};
+        getLinkCountCommand = new String[] {"stat","-c%h"};
+        break;
+      case OS_TYPE_SOLARIS:
+        hardLinkCommand = new String[] {"ln", null, null};
+        getLinkCountCommand = new String[] {"ls","-l"};
+        break;
+      case OS_TYPE_MAC:
+        hardLinkCommand = new String[] {"ln", null, null};
+        getLinkCountCommand = new String[] {"stat","-f%l"};
+        break;
+      case OS_TYPE_UNIX:
+      default:
+        hardLinkCommand = new String[] {"ln", null, null};
+        getLinkCountCommand = new String[] {"stat","-c%h"};
+      }
+    }
+
+    static private OSType getOSType() {
+      String osName = System.getProperty("os.name");
+      if (osName.indexOf("Windows") >= 0 && 
+          (osName.indexOf("XP") >= 0 || osName.indexOf("2003") >= 0 || osName.indexOf("Vista") >= 0))
+        return OSType.OS_TYPE_WINXP;
+      else if (osName.indexOf("SunOS") >= 0)
+         return OSType.OS_TYPE_SOLARIS;
+      else if (osName.indexOf("Mac") >= 0)
+         return OSType.OS_TYPE_MAC;
+      else
+        return OSType.OS_TYPE_UNIX;
+    }
+    
+    /**
+     * Creates a hardlink 
+     */
+    public static void createHardLink(File target, 
+                                      File linkName) throws IOException {
+      int len = hardLinkCommand.length;
+      if (osType == OSType.OS_TYPE_WINXP) {
+       hardLinkCommand[len-1] = target.getCanonicalPath();
+       hardLinkCommand[len-2] = linkName.getCanonicalPath();
+      } else {
+       hardLinkCommand[len-2] = makeShellPath(target, true);
+       hardLinkCommand[len-1] = makeShellPath(linkName, true);
+      }
+      // execute shell command
+      Process process = Runtime.getRuntime().exec(hardLinkCommand);
+      try {
+        if (process.waitFor() != 0) {
+          String errMsg = new BufferedReader(new InputStreamReader(
+                                                                   process.getInputStream())).readLine();
+          if (errMsg == null)  errMsg = "";
+          String inpMsg = new BufferedReader(new InputStreamReader(
+                                                                   process.getErrorStream())).readLine();
+          if (inpMsg == null)  inpMsg = "";
+          throw new IOException(errMsg + inpMsg);
+        }
+      } catch (InterruptedException e) {
+        throw new IOException(StringUtils.stringifyException(e));
+      } finally {
+        process.destroy();
+      }
+    }
+
+    /**
+     * Retrieves the number of links to the specified file.
+     */
+    public static int getLinkCount(File fileName) throws IOException {
+      int len = getLinkCountCommand.length;
+      String[] cmd = new String[len + 1];
+      for (int i = 0; i < len; i++) {
+        cmd[i] = getLinkCountCommand[i];
+      }
+      cmd[len] = fileName.toString();
+      String inpMsg = "";
+      String errMsg = "";
+      int exitValue = -1;
+      BufferedReader in = null;
+      BufferedReader err = null;
+
+      // execute shell command
+      Process process = Runtime.getRuntime().exec(cmd);
+      try {
+        exitValue = process.waitFor();
+        in = new BufferedReader(new InputStreamReader(
+                                    process.getInputStream()));
+        inpMsg = in.readLine();
+        if (inpMsg == null)  inpMsg = "";
+        
+        err = new BufferedReader(new InputStreamReader(
+                                     process.getErrorStream()));
+        errMsg = err.readLine();
+        if (errMsg == null)  errMsg = "";
+        if (exitValue != 0) {
+          throw new IOException(inpMsg + errMsg);
+        }
+        if (getOSType() == OSType.OS_TYPE_SOLARIS) {
+          String[] result = inpMsg.split("\\s+");
+          return Integer.parseInt(result[1]);
+        } else {
+          return Integer.parseInt(inpMsg);
+        }
+      } catch (NumberFormatException e) {
+        throw new IOException(StringUtils.stringifyException(e) + 
+                              inpMsg + errMsg +
+                              " on file:" + fileName);
+      } catch (InterruptedException e) {
+        throw new IOException(StringUtils.stringifyException(e) + 
+                              inpMsg + errMsg +
+                              " on file:" + fileName);
+      } finally {
+        process.destroy();
+        if (in != null) in.close();
+        if (err != null) err.close();
+      }
+    }
+  }
+
+  /**
+   * Create a soft link between a src and destination
+   * only on a local disk. HDFS does not support this
+   * @param target the target for symlink 
+   * @param linkname the symlink
+   * @return value returned by the command
+   */
+  public static int symLink(String target, String linkname) throws IOException{
+    String cmd = "ln -s " + target + " " + linkname;
+    Process p = Runtime.getRuntime().exec(cmd, null);
+    int returnVal = -1;
+    try{
+      returnVal = p.waitFor();
+    } catch(InterruptedException e){
+      //do nothing as of yet
+    }
+    return returnVal;
+  }
+  
+  /**
+   * Change the permissions on a filename.
+   * @param filename the name of the file to change
+   * @param perm the permission string
+   * @return the exit code from the command
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public static int chmod(String filename, String perm
+                          ) throws IOException, InterruptedException {
+    return chmod(filename, perm, false);
+  }
+
+  /**
+   * Change the permissions on a file / directory, recursively, if
+   * needed.
+   * @param filename name of the file whose permissions are to change
+   * @param perm permission string
+   * @param recursive true, if permissions should be changed recursively
+   * @return the exit code from the command.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  public static int chmod(String filename, String perm, boolean recursive)
+                            throws IOException, InterruptedException {
+    StringBuffer cmdBuf = new StringBuffer();
+    cmdBuf.append("chmod ");
+    if (recursive) {
+      cmdBuf.append("-R ");
+    }
+    cmdBuf.append(perm).append(" ");
+    cmdBuf.append(filename);
+    String[] shellCmd = {"bash", "-c" ,cmdBuf.toString()};
+    ShellCommandExecutor shExec = new ShellCommandExecutor(shellCmd);
+    try {
+      shExec.execute();
+    }catch(Exception e) {
+      if(Log.isDebugEnabled()) {
+        Log.debug("Error while changing permission : " + filename 
+            +" Exception: " + StringUtils.stringifyException(e));
+      }
+    }
+    return shExec.getExitCode();
+  }
+  
+  /**
+   * Create a tmp file for a base file.
+   * @param basefile the base file of the tmp
+   * @param prefix file name prefix of tmp
+   * @param isDeleteOnExit if true, the tmp will be deleted when the VM exits
+   * @return a newly created tmp file
+   * @exception IOException If a tmp file cannot created
+   * @see java.io.File#createTempFile(String, String, File)
+   * @see java.io.File#deleteOnExit()
+   */
+  public static final File createLocalTempFile(final File basefile,
+                                               final String prefix,
+                                               final boolean isDeleteOnExit)
+    throws IOException {
+    File tmp = File.createTempFile(prefix + basefile.getName(),
+                                   "", basefile.getParentFile());
+    if (isDeleteOnExit) {
+      tmp.deleteOnExit();
+    }
+    return tmp;
+  }
+
+  /**
+   * Move the src file to the name specified by target.
+   * @param src the source file
+   * @param target the target file
+   * @exception IOException If this operation fails
+   */
+  public static void replaceFile(File src, File target) throws IOException {
+    /* renameTo() has two limitations on Windows platform.
+     * src.renameTo(target) fails if
+     * 1) If target already exists OR
+     * 2) If target is already open for reading/writing.
+     */
+    if (!src.renameTo(target)) {
+      int retries = 5;
+      while (target.exists() && !target.delete() && retries-- >= 0) {
+        try {
+          Thread.sleep(1000);
+        } catch (InterruptedException e) {
+          throw new IOException("replaceFile interrupted.");
+        }
+      }
+      if (!src.renameTo(target)) {
+        throw new IOException("Unable to rename " + src +
+                              " to " + target);
+      }
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FilterFileSystem.java b/src/java/org/apache/hadoop/fs/FilterFileSystem.java
new file mode 100644
index 00000000000..2a2aa619afc
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FilterFileSystem.java
@@ -0,0 +1,278 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.*;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+
+/****************************************************************
+ * A <code>FilterFileSystem</code> contains
+ * some other file system, which it uses as
+ * its  basic file system, possibly transforming
+ * the data along the way or providing  additional
+ * functionality. The class <code>FilterFileSystem</code>
+ * itself simply overrides all  methods of
+ * <code>FileSystem</code> with versions that
+ * pass all requests to the contained  file
+ * system. Subclasses of <code>FilterFileSystem</code>
+ * may further override some of  these methods
+ * and may also provide additional methods
+ * and fields.
+ *
+ *****************************************************************/
+public class FilterFileSystem extends FileSystem {
+  
+  protected FileSystem fs;
+  
+  /*
+   * so that extending classes can define it
+   */
+  public FilterFileSystem() {
+  }
+  
+  public FilterFileSystem(FileSystem fs) {
+    this.fs = fs;
+    this.statistics = fs.statistics;
+  }
+
+  /** Called after a new FileSystem instance is constructed.
+   * @param name a uri whose authority section names the host, port, etc.
+   *   for this FileSystem
+   * @param conf the configuration
+   */
+  public void initialize(URI name, Configuration conf) throws IOException {
+    fs.initialize(name, conf);
+  }
+
+  /** Returns a URI whose scheme and authority identify this FileSystem.*/
+  public URI getUri() {
+    return fs.getUri();
+  }
+
+  /** Make sure that a path specifies a FileSystem. */
+  public Path makeQualified(Path path) {
+    return fs.makeQualified(path);
+  }
+  
+  ///////////////////////////////////////////////////////////////
+  // FileSystem
+  ///////////////////////////////////////////////////////////////
+
+  /** Check that a Path belongs to this FileSystem. */
+  protected void checkPath(Path path) {
+    fs.checkPath(path);
+  }
+
+  public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
+    long len) throws IOException {
+      return fs.getFileBlockLocations(file, start, len);
+  }
+  
+  /**
+   * Opens an FSDataInputStream at the indicated Path.
+   * @param f the file name to open
+   * @param bufferSize the size of the buffer to be used.
+   */
+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    return fs.open(f, bufferSize);
+  }
+
+  /** {@inheritDoc} */
+  public FSDataOutputStream append(Path f, int bufferSize,
+      Progressable progress) throws IOException {
+    return fs.append(f, bufferSize, progress);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public FSDataOutputStream create(Path f, FsPermission permission,
+      boolean overwrite, int bufferSize, short replication, long blockSize,
+      Progressable progress) throws IOException {
+    return fs.create(f, permission,
+        overwrite, bufferSize, replication, blockSize, progress);
+  }
+
+  /**
+   * Set replication for an existing file.
+   * 
+   * @param src file name
+   * @param replication new replication
+   * @throws IOException
+   * @return true if successful;
+   *         false if file does not exist or is a directory
+   */
+  public boolean setReplication(Path src, short replication) throws IOException {
+    return fs.setReplication(src, replication);
+  }
+  
+  /**
+   * Renames Path src to Path dst.  Can take place on local fs
+   * or remote DFS.
+   */
+  public boolean rename(Path src, Path dst) throws IOException {
+    return fs.rename(src, dst);
+  }
+  
+  /** Delete a file */
+  public boolean delete(Path f, boolean recursive) throws IOException {
+    return fs.delete(f, recursive);
+  }
+  
+  /** List files in a directory. */
+  public FileStatus[] listStatus(Path f) throws IOException {
+    return fs.listStatus(f);
+  }
+  
+  public Path getHomeDirectory() {
+    return fs.getHomeDirectory();
+  }
+
+
+  /**
+   * Set the current working directory for the given file system. All relative
+   * paths will be resolved relative to it.
+   * 
+   * @param newDir
+   */
+  public void setWorkingDirectory(Path newDir) {
+    fs.setWorkingDirectory(newDir);
+  }
+  
+  /**
+   * Get the current working directory for the given file system
+   * 
+   * @return the directory pathname
+   */
+  public Path getWorkingDirectory() {
+    return fs.getWorkingDirectory();
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public FsStatus getStatus(Path p) throws IOException {
+    return fs.getStatus(p);
+  }
+  
+  /** {@inheritDoc} */
+  @Override
+  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
+    return fs.mkdirs(f, permission);
+  }
+
+  /**
+   * The src file is on the local disk.  Add it to FS at
+   * the given dst name.
+   * delSrc indicates if the source should be removed
+   */
+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+    fs.copyFromLocalFile(delSrc, src, dst);
+  }
+  
+  /**
+   * The src file is under FS, and the dst is on the local disk.
+   * Copy it from FS control to the local dst name.
+   * delSrc indicates if the src will be removed or not.
+   */   
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+    fs.copyToLocalFile(delSrc, src, dst);
+  }
+  
+  /**
+   * Returns a local File that the user can write output to.  The caller
+   * provides both the eventual FS target name and the local working
+   * file.  If the FS is local, we write directly into the target.  If
+   * the FS is remote, we write into the tmp local area.
+   */
+  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+    return fs.startLocalOutput(fsOutputFile, tmpLocalFile);
+  }
+
+  /**
+   * Called when we're all done writing to the target.  A local FS will
+   * do nothing, because we've written to exactly the right place.  A remote
+   * FS will copy the contents of tmpLocalFile to the correct target at
+   * fsOutputFile.
+   */
+  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+    fs.completeLocalOutput(fsOutputFile, tmpLocalFile);
+  }
+
+  /** Return the number of bytes that large input files should be optimally
+   * be split into to minimize i/o time. */
+  public long getDefaultBlockSize() {
+    return fs.getDefaultBlockSize();
+  }
+  
+  /**
+   * Get the default replication.
+   */
+  public short getDefaultReplication() {
+    return fs.getDefaultReplication();
+  }
+
+  /**
+   * Get file status.
+   */
+  public FileStatus getFileStatus(Path f) throws IOException {
+    return fs.getFileStatus(f);
+  }
+
+  /** {@inheritDoc} */
+  public FileChecksum getFileChecksum(Path f) throws IOException {
+    return fs.getFileChecksum(f);
+  }
+  
+  /** {@inheritDoc} */
+  public void setVerifyChecksum(boolean verifyChecksum) {
+    fs.setVerifyChecksum(verifyChecksum);
+  }
+
+  @Override
+  public Configuration getConf() {
+    return fs.getConf();
+  }
+  
+  @Override
+  public void close() throws IOException {
+    super.close();
+    fs.close();
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void setOwner(Path p, String username, String groupname
+      ) throws IOException {
+    fs.setOwner(p, username, groupname);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void setPermission(Path p, FsPermission permission
+      ) throws IOException {
+    fs.setPermission(p, permission);
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FsShell.java b/src/java/org/apache/hadoop/fs/FsShell.java
new file mode 100644
index 00000000000..987b4999668
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FsShell.java
@@ -0,0 +1,1925 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.shell.CommandFormat;
+import org.apache.hadoop.fs.shell.Count;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.util.StringUtils;
+
+/** Provide command line access to a FileSystem. */
+public class FsShell extends Configured implements Tool {
+
+  protected FileSystem fs;
+  private Trash trash;
+  public static final SimpleDateFormat dateForm = 
+    new SimpleDateFormat("yyyy-MM-dd HH:mm");
+  protected static final SimpleDateFormat modifFmt =
+    new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+  static final int BORDER = 2;
+  static {
+    modifFmt.setTimeZone(TimeZone.getTimeZone("UTC"));
+  }
+  static final String SETREP_SHORT_USAGE="-setrep [-R] [-w] <rep> <path/file>";
+  static final String GET_SHORT_USAGE = "-get [-ignoreCrc] [-crc] <src> <localdst>";
+  static final String COPYTOLOCAL_SHORT_USAGE = GET_SHORT_USAGE.replace(
+      "-get", "-copyToLocal");
+  static final String TAIL_USAGE="-tail [-f] <file>";
+
+  /**
+   */
+  public FsShell() {
+    this(null);
+  }
+
+  public FsShell(Configuration conf) {
+    super(conf);
+    fs = null;
+    trash = null;
+  }
+  
+  protected void init() throws IOException {
+    getConf().setQuietMode(true);
+    if (this.fs == null) {
+     this.fs = FileSystem.get(getConf());
+    }
+    if (this.trash == null) {
+      this.trash = new Trash(getConf());
+    }
+  }
+
+  
+  /**
+   * Copies from stdin to the indicated file.
+   */
+  private void copyFromStdin(Path dst, FileSystem dstFs) throws IOException {
+    if (dstFs.isDirectory(dst)) {
+      throw new IOException("When source is stdin, destination must be a file.");
+    }
+    if (dstFs.exists(dst)) {
+      throw new IOException("Target " + dst.toString() + " already exists.");
+    }
+    FSDataOutputStream out = dstFs.create(dst); 
+    try {
+      IOUtils.copyBytes(System.in, out, getConf(), false);
+    } 
+    finally {
+      out.close();
+    }
+  }
+
+  /** 
+   * Print from src to stdout.
+   */
+  private void printToStdout(InputStream in) throws IOException {
+    try {
+      IOUtils.copyBytes(in, System.out, getConf(), false);
+    } finally {
+      in.close();
+    }
+  }
+
+  
+  /**
+   * Add local files to the indicated FileSystem name. src is kept.
+   */
+  void copyFromLocal(Path[] srcs, String dstf) throws IOException {
+    Path dstPath = new Path(dstf);
+    FileSystem dstFs = dstPath.getFileSystem(getConf());
+    if (srcs.length == 1 && srcs[0].toString().equals("-"))
+      copyFromStdin(dstPath, dstFs);
+    else
+      dstFs.copyFromLocalFile(false, false, srcs, dstPath);
+  }
+  
+  /**
+   * Add local files to the indicated FileSystem name. src is removed.
+   */
+  void moveFromLocal(Path[] srcs, String dstf) throws IOException {
+    Path dstPath = new Path(dstf);
+    FileSystem dstFs = dstPath.getFileSystem(getConf());
+    dstFs.moveFromLocalFile(srcs, dstPath);
+  }
+
+  /**
+   * Add a local file to the indicated FileSystem name. src is removed.
+   */
+  void moveFromLocal(Path src, String dstf) throws IOException {
+    moveFromLocal((new Path[]{src}), dstf);
+  }
+
+  /**
+   * Obtain the indicated files that match the file pattern <i>srcf</i>
+   * and copy them to the local name. srcf is kept.
+   * When copying multiple files, the destination must be a directory. 
+   * Otherwise, IOException is thrown.
+   * @param argv: arguments
+   * @param pos: Ignore everything before argv[pos]  
+   * @exception: IOException  
+   * @see org.apache.hadoop.fs.FileSystem.globStatus 
+   */
+  void copyToLocal(String[]argv, int pos) throws IOException {
+    CommandFormat cf = new CommandFormat("copyToLocal", 2,2,"crc","ignoreCrc");
+    
+    String srcstr = null;
+    String dststr = null;
+    try {
+      List<String> parameters = cf.parse(argv, pos);
+      srcstr = parameters.get(0);
+      dststr = parameters.get(1);
+    }
+    catch(IllegalArgumentException iae) {
+      System.err.println("Usage: java FsShell " + GET_SHORT_USAGE);
+      throw iae;
+    }
+    boolean copyCrc = cf.getOpt("crc");
+    final boolean verifyChecksum = !cf.getOpt("ignoreCrc");
+
+    if (dststr.equals("-")) {
+      if (copyCrc) {
+        System.err.println("-crc option is not valid when destination is stdout.");
+      }
+      cat(srcstr, verifyChecksum);
+    } else {
+      File dst = new File(dststr);      
+      Path srcpath = new Path(srcstr);
+      FileSystem srcFS = getSrcFileSystem(srcpath, verifyChecksum);
+      if (copyCrc && !(srcFS instanceof ChecksumFileSystem)) {
+        System.err.println("-crc option is not valid when source file system " +
+            "does not have crc files. Automatically turn the option off.");
+        copyCrc = false;
+      }
+      FileStatus[] srcs = srcFS.globStatus(srcpath);
+      boolean dstIsDir = dst.isDirectory(); 
+      if (srcs.length > 1 && !dstIsDir) {
+        throw new IOException("When copying multiple files, "
+                              + "destination should be a directory.");
+      }
+      for (FileStatus status : srcs) {
+        Path p = status.getPath();
+        File f = dstIsDir? new File(dst, p.getName()): dst;
+        copyToLocal(srcFS, status, f, copyCrc);
+      }
+    }
+  }
+
+  /**
+   * Return the {@link FileSystem} specified by src and the conf.
+   * It the {@link FileSystem} supports checksum, set verifyChecksum.
+   */
+  private FileSystem getSrcFileSystem(Path src, boolean verifyChecksum
+      ) throws IOException { 
+    FileSystem srcFs = src.getFileSystem(getConf());
+    srcFs.setVerifyChecksum(verifyChecksum);
+    return srcFs;
+  }
+
+  /**
+   * The prefix for the tmp file used in copyToLocal.
+   * It must be at least three characters long, required by
+   * {@link java.io.File#createTempFile(String, String, File)}.
+   */
+  static final String COPYTOLOCAL_PREFIX = "_copyToLocal_";
+
+  /**
+   * Copy a source file from a given file system to local destination.
+   * @param srcFS source file system
+   * @param src source path
+   * @param dst destination
+   * @param copyCrc copy CRC files?
+   * @exception IOException If some IO failed
+   */
+  private void copyToLocal(final FileSystem srcFS, final FileStatus srcStatus,
+                           final File dst, final boolean copyCrc)
+    throws IOException {
+    /* Keep the structure similar to ChecksumFileSystem.copyToLocal(). 
+     * Ideal these two should just invoke FileUtil.copy() and not repeat
+     * recursion here. Of course, copy() should support two more options :
+     * copyCrc and useTmpFile (may be useTmpFile need not be an option).
+     */
+    
+    Path src = srcStatus.getPath();
+    if (!srcStatus.isDir()) {
+      if (dst.exists()) {
+        // match the error message in FileUtil.checkDest():
+        throw new IOException("Target " + dst + " already exists");
+      }
+      
+      // use absolute name so that tmp file is always created under dest dir
+      File tmp = FileUtil.createLocalTempFile(dst.getAbsoluteFile(),
+                                              COPYTOLOCAL_PREFIX, true);
+      if (!FileUtil.copy(srcFS, src, tmp, false, srcFS.getConf())) {
+        throw new IOException("Failed to copy " + src + " to " + dst); 
+      }
+      
+      if (!tmp.renameTo(dst)) {
+        throw new IOException("Failed to rename tmp file " + tmp + 
+                              " to local destination \"" + dst + "\".");
+      }
+
+      if (copyCrc) {
+        if (!(srcFS instanceof ChecksumFileSystem)) {
+          throw new IOException("Source file system does not have crc files");
+        }
+        
+        ChecksumFileSystem csfs = (ChecksumFileSystem) srcFS;
+        File dstcs = FileSystem.getLocal(srcFS.getConf())
+          .pathToFile(csfs.getChecksumFile(new Path(dst.getCanonicalPath())));
+        FileSystem fs = csfs.getRawFileSystem();
+        FileStatus status = csfs.getFileStatus(csfs.getChecksumFile(src));
+        copyToLocal(fs, status, dstcs, false);
+      } 
+    } else {
+      // once FileUtil.copy() supports tmp file, we don't need to mkdirs().
+      if (!dst.mkdirs()) {
+        throw new IOException("Failed to create local destination \"" +
+                              dst + "\".");
+      }
+      for(FileStatus status : srcFS.listStatus(src)) {
+        copyToLocal(srcFS, status,
+                    new File(dst, status.getPath().getName()), copyCrc);
+      }
+    }
+  }
+
+  /**
+   * Get all the files in the directories that match the source file 
+   * pattern and merge and sort them to only one file on local fs 
+   * srcf is kept.
+   * @param srcf: a file pattern specifying source files
+   * @param dstf: a destination local file/directory 
+   * @exception: IOException  
+   * @see org.apache.hadoop.fs.FileSystem.globStatus 
+   */
+  void copyMergeToLocal(String srcf, Path dst) throws IOException {
+    copyMergeToLocal(srcf, dst, false);
+  }    
+    
+
+  /**
+   * Get all the files in the directories that match the source file pattern
+   * and merge and sort them to only one file on local fs 
+   * srcf is kept.
+   * 
+   * Also adds a string between the files (useful for adding \n
+   * to a text file)
+   * @param srcf: a file pattern specifying source files
+   * @param dstf: a destination local file/directory
+   * @param endline: if an end of line character is added to a text file 
+   * @exception: IOException  
+   * @see org.apache.hadoop.fs.FileSystem.globStatus 
+   */
+  void copyMergeToLocal(String srcf, Path dst, boolean endline) throws IOException {
+    Path srcPath = new Path(srcf);
+    FileSystem srcFs = srcPath.getFileSystem(getConf());
+    Path [] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), 
+                                       srcPath);
+    for(int i=0; i<srcs.length; i++) {
+      if (endline) {
+        FileUtil.copyMerge(srcFs, srcs[i], 
+                           FileSystem.getLocal(getConf()), dst, false, getConf(), "\n");
+      } else {
+        FileUtil.copyMerge(srcFs, srcs[i], 
+                           FileSystem.getLocal(getConf()), dst, false, getConf(), null);
+      }
+    }
+  }      
+
+  /**
+   * Obtain the indicated file and copy to the local name.
+   * srcf is removed.
+   */
+  void moveToLocal(String srcf, Path dst) throws IOException {
+    System.err.println("Option '-moveToLocal' is not implemented yet.");
+  }
+
+  /**
+   * Fetch all files that match the file pattern <i>srcf</i> and display
+   * their content on stdout. 
+   * @param srcf: a file pattern specifying source files
+   * @exception: IOException
+   * @see org.apache.hadoop.fs.FileSystem.globStatus 
+   */
+  void cat(String src, boolean verifyChecksum) throws IOException {
+    //cat behavior in Linux
+    //  [~/1207]$ ls ?.txt
+    //  x.txt  z.txt
+    //  [~/1207]$ cat x.txt y.txt z.txt
+    //  xxx
+    //  cat: y.txt: No such file or directory
+    //  zzz
+
+    Path srcPattern = new Path(src);
+    new DelayedExceptionThrowing() {
+      @Override
+      void process(Path p, FileSystem srcFs) throws IOException {
+        if (srcFs.getFileStatus(p).isDir()) {
+          throw new IOException("Source must be a file.");
+        }
+        printToStdout(srcFs.open(p));
+      }
+    }.globAndProcess(srcPattern, getSrcFileSystem(srcPattern, verifyChecksum));
+  }
+
+  private class TextRecordInputStream extends InputStream {
+    SequenceFile.Reader r;
+    WritableComparable key;
+    Writable val;
+
+    DataInputBuffer inbuf;
+    DataOutputBuffer outbuf;
+
+    public TextRecordInputStream(FileStatus f) throws IOException {
+      r = new SequenceFile.Reader(fs, f.getPath(), getConf());
+      key = ReflectionUtils.newInstance(r.getKeyClass().asSubclass(WritableComparable.class),
+                                        getConf());
+      val = ReflectionUtils.newInstance(r.getValueClass().asSubclass(Writable.class),
+                                        getConf());
+      inbuf = new DataInputBuffer();
+      outbuf = new DataOutputBuffer();
+    }
+
+    public int read() throws IOException {
+      int ret;
+      if (null == inbuf || -1 == (ret = inbuf.read())) {
+        if (!r.next(key, val)) {
+          return -1;
+        }
+        byte[] tmp = key.toString().getBytes();
+        outbuf.write(tmp, 0, tmp.length);
+        outbuf.write('\t');
+        tmp = val.toString().getBytes();
+        outbuf.write(tmp, 0, tmp.length);
+        outbuf.write('\n');
+        inbuf.reset(outbuf.getData(), outbuf.getLength());
+        outbuf.reset();
+        ret = inbuf.read();
+      }
+      return ret;
+    }
+  }
+
+  private InputStream forMagic(Path p, FileSystem srcFs) throws IOException {
+    FSDataInputStream i = srcFs.open(p);
+    switch(i.readShort()) {
+      case 0x1f8b: // RFC 1952
+        i.seek(0);
+        return new GZIPInputStream(i);
+      case 0x5345: // 'S' 'E'
+        if (i.readByte() == 'Q') {
+          i.close();
+          return new TextRecordInputStream(srcFs.getFileStatus(p));
+        }
+        break;
+    }
+    i.seek(0);
+    return i;
+  }
+
+  void text(String srcf) throws IOException {
+    Path srcPattern = new Path(srcf);
+    new DelayedExceptionThrowing() {
+      @Override
+      void process(Path p, FileSystem srcFs) throws IOException {
+        if (srcFs.isDirectory(p)) {
+          throw new IOException("Source must be a file.");
+        }
+        printToStdout(forMagic(p, srcFs));
+      }
+    }.globAndProcess(srcPattern, srcPattern.getFileSystem(getConf()));
+  }
+
+  /**
+   * Parse the incoming command string
+   * @param cmd
+   * @param pos ignore anything before this pos in cmd
+   * @throws IOException 
+   */
+  private void setReplication(String[] cmd, int pos) throws IOException {
+    CommandFormat c = new CommandFormat("setrep", 2, 2, "R", "w");
+    String dst = null;
+    short rep = 0;
+
+    try {
+      List<String> parameters = c.parse(cmd, pos);
+      rep = Short.parseShort(parameters.get(0));
+      dst = parameters.get(1);
+    } catch (NumberFormatException nfe) {
+      System.err.println("Illegal replication, a positive integer expected");
+      throw nfe;
+    }
+    catch(IllegalArgumentException iae) {
+      System.err.println("Usage: java FsShell " + SETREP_SHORT_USAGE);
+      throw iae;
+    }
+
+    if (rep < 1) {
+      System.err.println("Cannot set replication to: " + rep);
+      throw new IllegalArgumentException("replication must be >= 1");
+    }
+
+    List<Path> waitList = c.getOpt("w")? new ArrayList<Path>(): null;
+    setReplication(rep, dst, c.getOpt("R"), waitList);
+
+    if (waitList != null) {
+      waitForReplication(waitList, rep);
+    }
+  }
+    
+  /**
+   * Wait for all files in waitList to have replication number equal to rep.
+   * @param waitList The files are waited for.
+   * @param rep The new replication number.
+   * @throws IOException IOException
+   */
+  void waitForReplication(List<Path> waitList, int rep) throws IOException {
+    for(Path f : waitList) {
+      System.out.print("Waiting for " + f + " ...");
+      System.out.flush();
+
+      boolean printWarning = false;
+      FileStatus status = fs.getFileStatus(f);
+      long len = status.getLen();
+
+      for(boolean done = false; !done; ) {
+        BlockLocation[] locations = fs.getFileBlockLocations(status, 0, len);
+        int i = 0;
+        for(; i < locations.length && 
+          locations[i].getHosts().length == rep; i++)
+          if (!printWarning && locations[i].getHosts().length > rep) {
+            System.out.println("\nWARNING: the waiting time may be long for "
+                + "DECREASING the number of replication.");
+            printWarning = true;
+          }
+        done = i == locations.length;
+
+        if (!done) {
+          System.out.print(".");
+          System.out.flush();
+          try {Thread.sleep(10000);} catch (InterruptedException e) {}
+        }
+      }
+
+      System.out.println(" done");
+    }
+  }
+
+  /**
+   * Set the replication for files that match file pattern <i>srcf</i>
+   * if it's a directory and recursive is true,
+   * set replication for all the subdirs and those files too.
+   * @param newRep new replication factor
+   * @param srcf a file pattern specifying source files
+   * @param recursive if need to set replication factor for files in subdirs
+   * @throws IOException  
+   * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
+   */
+  void setReplication(short newRep, String srcf, boolean recursive,
+                      List<Path> waitingList)
+    throws IOException {
+    Path srcPath = new Path(srcf);
+    FileSystem srcFs = srcPath.getFileSystem(getConf());
+    Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath),
+                                      srcPath);
+    for(int i=0; i<srcs.length; i++) {
+      setReplication(newRep, srcFs, srcs[i], recursive, waitingList);
+    }
+  }
+
+  private void setReplication(short newRep, FileSystem srcFs, 
+                              Path src, boolean recursive,
+                              List<Path> waitingList)
+    throws IOException {
+    if (!srcFs.getFileStatus(src).isDir()) {
+      setFileReplication(src, srcFs, newRep, waitingList);
+      return;
+    }
+    FileStatus items[] = srcFs.listStatus(src);
+    if (items == null) {
+      throw new IOException("Could not get listing for " + src);
+    } else {
+
+      for (int i = 0; i < items.length; i++) {
+        if (!items[i].isDir()) {
+          setFileReplication(items[i].getPath(), srcFs, newRep, waitingList);
+        } else if (recursive) {
+          setReplication(newRep, srcFs, items[i].getPath(), recursive, 
+                         waitingList);
+        }
+      }
+    }
+  }
+    
+  /**
+   * Actually set the replication for this file
+   * If it fails either throw IOException or print an error msg
+   * @param file: a file/directory
+   * @param newRep: new replication factor
+   * @throws IOException
+   */
+  private void setFileReplication(Path file, FileSystem srcFs, short newRep, List<Path> waitList)
+    throws IOException {
+    if (srcFs.setReplication(file, newRep)) {
+      if (waitList != null) {
+        waitList.add(file);
+      }
+      System.out.println("Replication " + newRep + " set: " + file);
+    } else {
+      System.err.println("Could not set replication for: " + file);
+    }
+  }
+    
+    
+  /**
+   * Get a listing of all files in that match the file pattern <i>srcf</i>.
+   * @param srcf a file pattern specifying source files
+   * @param recursive if need to list files in subdirs
+   * @throws IOException  
+   * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
+   */
+  private int ls(String srcf, boolean recursive) throws IOException {
+    Path srcPath = new Path(srcf);
+    FileSystem srcFs = srcPath.getFileSystem(this.getConf());
+    FileStatus[] srcs = srcFs.globStatus(srcPath);
+    if (srcs==null || srcs.length==0) {
+      throw new FileNotFoundException("Cannot access " + srcf + 
+          ": No such file or directory.");
+    }
+ 
+    boolean printHeader = (srcs.length == 1) ? true: false;
+    int numOfErrors = 0;
+    for(int i=0; i<srcs.length; i++) {
+      numOfErrors += ls(srcs[i], srcFs, recursive, printHeader);
+    }
+    return numOfErrors == 0 ? 0 : -1;
+  }
+
+  /* list all files under the directory <i>src</i>
+   * ideally we should provide "-l" option, that lists like "ls -l".
+   */
+  private int ls(FileStatus src, FileSystem srcFs, boolean recursive,
+      boolean printHeader) throws IOException {
+    final String cmd = recursive? "lsr": "ls";
+    final FileStatus[] items = shellListStatus(cmd, srcFs, src);
+    if (items == null) {
+      return 1;
+    } else {
+      int numOfErrors = 0;
+      if (!recursive && printHeader) {
+        if (items.length != 0) {
+          System.out.println("Found " + items.length + " items");
+        }
+      }
+      
+      int maxReplication = 3, maxLen = 10, maxOwner = 0,maxGroup = 0;
+
+      for(int i = 0; i < items.length; i++) {
+        FileStatus stat = items[i];
+        int replication = String.valueOf(stat.getReplication()).length();
+        int len = String.valueOf(stat.getLen()).length();
+        int owner = String.valueOf(stat.getOwner()).length();
+        int group = String.valueOf(stat.getGroup()).length();
+        
+        if (replication > maxReplication) maxReplication = replication;
+        if (len > maxLen) maxLen = len;
+        if (owner > maxOwner)  maxOwner = owner;
+        if (group > maxGroup)  maxGroup = group;
+      }
+      
+      for (int i = 0; i < items.length; i++) {
+        FileStatus stat = items[i];
+        Path cur = stat.getPath();
+        String mdate = dateForm.format(new Date(stat.getModificationTime()));
+        
+        System.out.print((stat.isDir() ? "d" : "-") + 
+          stat.getPermission() + " ");
+        System.out.printf("%"+ maxReplication + 
+          "s ", (!stat.isDir() ? stat.getReplication() : "-"));
+        if (maxOwner > 0)
+          System.out.printf("%-"+ maxOwner + "s ", stat.getOwner());
+        if (maxGroup > 0)
+          System.out.printf("%-"+ maxGroup + "s ", stat.getGroup());
+        System.out.printf("%"+ maxLen + "d ", stat.getLen());
+        System.out.print(mdate + " ");
+        System.out.println(cur.toUri().getPath());
+        if (recursive && stat.isDir()) {
+          numOfErrors += ls(stat,srcFs, recursive, printHeader);
+        }
+      }
+      return numOfErrors;
+    }
+  }
+
+   /**
+   * Show the size of a partition in the filesystem that contains
+   * the specified <i>path</i>.
+   * @param path a path specifying the source partition. null means /.
+   * @throws IOException  
+   */
+  void df(String path) throws IOException {
+    if (path == null) path = "/";
+    final Path srcPath = new Path(path);
+    final FileSystem srcFs = srcPath.getFileSystem(getConf());
+    if (! srcFs.exists(srcPath)) {
+      throw new FileNotFoundException("Cannot access "+srcPath.toString());
+    }
+    final FsStatus stats = srcFs.getStatus(srcPath);
+    final int PercentUsed = (int)(100.0f *  (float)stats.getUsed() / (float)stats.getCapacity());
+    System.out.println("Filesystem\t\tSize\tUsed\tAvail\tUse%");
+    System.out.printf("%s\t\t%d\t%d\t%d\t%d%%\n",
+      path, 
+      stats.getCapacity(), stats.getUsed(), stats.getRemaining(),
+      PercentUsed);
+  }
+
+  /**
+   * Show the size of all files that match the file pattern <i>src</i>
+   * @param src a file pattern specifying source files
+   * @throws IOException  
+   * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
+   */
+  void du(String src) throws IOException {
+    Path srcPath = new Path(src);
+    FileSystem srcFs = srcPath.getFileSystem(getConf());
+    Path[] pathItems = FileUtil.stat2Paths(srcFs.globStatus(srcPath), 
+                                           srcPath);
+    FileStatus items[] = srcFs.listStatus(pathItems);
+    if ((items == null) || ((items.length == 0) && 
+        (!srcFs.exists(srcPath)))){
+      throw new FileNotFoundException("Cannot access " + src
+            + ": No such file or directory.");
+    } else {
+      System.out.println("Found " + items.length + " items");
+      int maxLength = 10;
+      
+      long length[] = new long[items.length];
+      for (int i = 0; i < items.length; i++) {
+        length[i] = items[i].isDir() ?
+          srcFs.getContentSummary(items[i].getPath()).getLength() :
+          items[i].getLen();
+        int len = String.valueOf(length[i]).length();
+        if (len > maxLength) maxLength = len;
+      }
+      for(int i = 0; i < items.length; i++) {
+        System.out.printf("%-"+ (maxLength + BORDER) +"d", length[i]);
+        System.out.println(items[i].getPath());
+      }
+    }
+  }
+    
+  /**
+   * Show the summary disk usage of each dir/file 
+   * that matches the file pattern <i>src</i>
+   * @param src a file pattern specifying source files
+   * @throws IOException  
+   * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
+   */
+  void dus(String src) throws IOException {
+    Path srcPath = new Path(src);
+    FileSystem srcFs = srcPath.getFileSystem(getConf());
+    FileStatus status[] = srcFs.globStatus(new Path(src));
+    if (status==null || status.length==0) {
+      throw new FileNotFoundException("Cannot access " + src + 
+          ": No such file or directory.");
+    }
+    for(int i=0; i<status.length; i++) {
+      long totalSize = srcFs.getContentSummary(status[i].getPath()).getLength();
+      String pathStr = status[i].getPath().toString();
+      System.out.println(("".equals(pathStr)?".":pathStr) + "\t" + totalSize);
+    }
+  }
+
+  /**
+   * Create the given dir
+   */
+  void mkdir(String src) throws IOException {
+    Path f = new Path(src);
+    FileSystem srcFs = f.getFileSystem(getConf());
+    FileStatus fstatus = null;
+    try {
+      fstatus = srcFs.getFileStatus(f);
+      if (fstatus.isDir()) {
+        throw new IOException("cannot create directory " 
+            + src + ": File exists");
+      }
+      else {
+        throw new IOException(src + " exists but " +
+            "is not a directory");
+      }
+    } catch(FileNotFoundException e) {
+        if (!srcFs.mkdirs(f)) {
+          throw new IOException("failed to create " + src);
+        }
+    }
+  }
+
+  /**
+   * (Re)create zero-length file at the specified path.
+   * This will be replaced by a more UNIX-like touch when files may be
+   * modified.
+   */
+  void touchz(String src) throws IOException {
+    Path f = new Path(src);
+    FileSystem srcFs = f.getFileSystem(getConf());
+    FileStatus st;
+    if (srcFs.exists(f)) {
+      st = srcFs.getFileStatus(f);
+      if (st.isDir()) {
+        // TODO: handle this
+        throw new IOException(src + " is a directory");
+      } else if (st.getLen() != 0)
+        throw new IOException(src + " must be a zero-length file");
+    }
+    FSDataOutputStream out = srcFs.create(f);
+    out.close();
+  }
+
+  /**
+   * Check file types.
+   */
+  int test(String argv[], int i) throws IOException {
+    if (!argv[i].startsWith("-") || argv[i].length() > 2)
+      throw new IOException("Not a flag: " + argv[i]);
+    char flag = argv[i].toCharArray()[1];
+    Path f = new Path(argv[++i]);
+    FileSystem srcFs = f.getFileSystem(getConf());
+    switch(flag) {
+      case 'e':
+        return srcFs.exists(f) ? 0 : 1;
+      case 'z':
+        return srcFs.getFileStatus(f).getLen() == 0 ? 0 : 1;
+      case 'd':
+        return srcFs.getFileStatus(f).isDir() ? 0 : 1;
+      default:
+        throw new IOException("Unknown flag: " + flag);
+    }
+  }
+
+  /**
+   * Print statistics about path in specified format.
+   * Format sequences:
+   *   %b: Size of file in blocks
+   *   %n: Filename
+   *   %o: Block size
+   *   %r: replication
+   *   %y: UTC date as &quot;yyyy-MM-dd HH:mm:ss&quot;
+   *   %Y: Milliseconds since January 1, 1970 UTC
+   */
+  void stat(char[] fmt, String src) throws IOException {
+    Path srcPath = new Path(src);
+    FileSystem srcFs = srcPath.getFileSystem(getConf());
+    FileStatus glob[] = srcFs.globStatus(srcPath);
+    if (null == glob)
+      throw new IOException("cannot stat `" + src + "': No such file or directory");
+    for (FileStatus f : glob) {
+      StringBuilder buf = new StringBuilder();
+      for (int i = 0; i < fmt.length; ++i) {
+        if (fmt[i] != '%') {
+          buf.append(fmt[i]);
+        } else {
+          if (i + 1 == fmt.length) break;
+          switch(fmt[++i]) {
+            case 'b':
+              buf.append(f.getLen());
+              break;
+            case 'F':
+              buf.append(f.isDir() ? "directory" : "regular file");
+              break;
+            case 'n':
+              buf.append(f.getPath().getName());
+              break;
+            case 'o':
+              buf.append(f.getBlockSize());
+              break;
+            case 'r':
+              buf.append(f.getReplication());
+              break;
+            case 'y':
+              buf.append(modifFmt.format(new Date(f.getModificationTime())));
+              break;
+            case 'Y':
+              buf.append(f.getModificationTime());
+              break;
+            default:
+              buf.append(fmt[i]);
+              break;
+          }
+        }
+      }
+      System.out.println(buf.toString());
+    }
+  }
+
+  /**
+   * Move files that match the file pattern <i>srcf</i>
+   * to a destination file.
+   * When moving mutiple files, the destination must be a directory. 
+   * Otherwise, IOException is thrown.
+   * @param srcf a file pattern specifying source files
+   * @param dstf a destination local file/directory 
+   * @throws IOException  
+   * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
+   */
+  void rename(String srcf, String dstf) throws IOException {
+    Path srcPath = new Path(srcf);
+    Path dstPath = new Path(dstf);
+    FileSystem fs = srcPath.getFileSystem(getConf());
+    URI srcURI = fs.getUri();
+    URI dstURI = dstPath.getFileSystem(getConf()).getUri();
+    if (srcURI.compareTo(dstURI) != 0) {
+      throw new IOException("src and destination filesystems do not match.");
+    }
+    Path[] srcs = FileUtil.stat2Paths(fs.globStatus(srcPath), srcPath);
+    Path dst = new Path(dstf);
+    if (srcs.length > 1 && !fs.isDirectory(dst)) {
+      throw new IOException("When moving multiple files, " 
+                            + "destination should be a directory.");
+    }
+    for(int i=0; i<srcs.length; i++) {
+      if (!fs.rename(srcs[i], dst)) {
+        FileStatus srcFstatus = null;
+        FileStatus dstFstatus = null;
+        try {
+          srcFstatus = fs.getFileStatus(srcs[i]);
+        } catch(FileNotFoundException e) {
+          throw new FileNotFoundException(srcs[i] + 
+          ": No such file or directory");
+        }
+        try {
+          dstFstatus = fs.getFileStatus(dst);
+        } catch(IOException e) {
+        }
+        if((srcFstatus!= null) && (dstFstatus!= null)) {
+          if (srcFstatus.isDir()  && !dstFstatus.isDir()) {
+            throw new IOException("cannot overwrite non directory "
+                + dst + " with directory " + srcs[i]);
+          }
+        }
+        throw new IOException("Failed to rename " + srcs[i] + " to " + dst);
+      }
+    }
+  }
+
+  /**
+   * Move/rename file(s) to a destination file. Multiple source
+   * files can be specified. The destination is the last element of
+   * the argvp[] array.
+   * If multiple source files are specified, then the destination 
+   * must be a directory. Otherwise, IOException is thrown.
+   * @exception: IOException  
+   */
+  private int rename(String argv[], Configuration conf) throws IOException {
+    int i = 0;
+    int exitCode = 0;
+    String cmd = argv[i++];  
+    String dest = argv[argv.length-1];
+    //
+    // If the user has specified multiple source files, then
+    // the destination has to be a directory
+    //
+    if (argv.length > 3) {
+      Path dst = new Path(dest);
+      FileSystem dstFs = dst.getFileSystem(getConf());
+      if (!dstFs.isDirectory(dst)) {
+        throw new IOException("When moving multiple files, " 
+                              + "destination " + dest + " should be a directory.");
+      }
+    }
+    //
+    // for each source file, issue the rename
+    //
+    for (; i < argv.length - 1; i++) {
+      try {
+        //
+        // issue the rename to the fs
+        //
+        rename(argv[i], dest);
+      } catch (RemoteException e) {
+        //
+        // This is a error returned by hadoop server. Print
+        // out the first line of the error mesage.
+        //
+        exitCode = -1;
+        try {
+          String[] content;
+          content = e.getLocalizedMessage().split("\n");
+          System.err.println(cmd.substring(1) + ": " + content[0]);
+        } catch (Exception ex) {
+          System.err.println(cmd.substring(1) + ": " +
+                             ex.getLocalizedMessage());
+        }
+      } catch (IOException e) {
+        //
+        // IO exception encountered locally.
+        //
+        exitCode = -1;
+        System.err.println(cmd.substring(1) + ": " +
+                           e.getLocalizedMessage());
+      }
+    }
+    return exitCode;
+  }
+
+  /**
+   * Copy files that match the file pattern <i>srcf</i>
+   * to a destination file.
+   * When copying mutiple files, the destination must be a directory. 
+   * Otherwise, IOException is thrown.
+   * @param srcf a file pattern specifying source files
+   * @param dstf a destination local file/directory 
+   * @throws IOException  
+   * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
+   */
+  void copy(String srcf, String dstf, Configuration conf) throws IOException {
+    Path srcPath = new Path(srcf);
+    FileSystem srcFs = srcPath.getFileSystem(getConf());
+    Path dstPath = new Path(dstf);
+    FileSystem dstFs = dstPath.getFileSystem(getConf());
+    Path [] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
+    if (srcs.length > 1 && !dstFs.isDirectory(dstPath)) {
+      throw new IOException("When copying multiple files, " 
+                            + "destination should be a directory.");
+    }
+    for(int i=0; i<srcs.length; i++) {
+      FileUtil.copy(srcFs, srcs[i], dstFs, dstPath, false, conf);
+    }
+  }
+
+  /**
+   * Copy file(s) to a destination file. Multiple source
+   * files can be specified. The destination is the last element of
+   * the argvp[] array.
+   * If multiple source files are specified, then the destination 
+   * must be a directory. Otherwise, IOException is thrown.
+   * @exception: IOException  
+   */
+  private int copy(String argv[], Configuration conf) throws IOException {
+    int i = 0;
+    int exitCode = 0;
+    String cmd = argv[i++];  
+    String dest = argv[argv.length-1];
+    //
+    // If the user has specified multiple source files, then
+    // the destination has to be a directory
+    //
+    if (argv.length > 3) {
+      Path dst = new Path(dest);
+      if (!fs.isDirectory(dst)) {
+        throw new IOException("When copying multiple files, " 
+                              + "destination " + dest + " should be a directory.");
+      }
+    }
+    //
+    // for each source file, issue the copy
+    //
+    for (; i < argv.length - 1; i++) {
+      try {
+        //
+        // issue the copy to the fs
+        //
+        copy(argv[i], dest, conf);
+      } catch (RemoteException e) {
+        //
+        // This is a error returned by hadoop server. Print
+        // out the first line of the error mesage.
+        //
+        exitCode = -1;
+        try {
+          String[] content;
+          content = e.getLocalizedMessage().split("\n");
+          System.err.println(cmd.substring(1) + ": " +
+                             content[0]);
+        } catch (Exception ex) {
+          System.err.println(cmd.substring(1) + ": " +
+                             ex.getLocalizedMessage());
+        }
+      } catch (IOException e) {
+        //
+        // IO exception encountered locally.
+        //
+        exitCode = -1;
+        System.err.println(cmd.substring(1) + ": " +
+                           e.getLocalizedMessage());
+      }
+    }
+    return exitCode;
+  }
+
+  /**
+   * Delete all files that match the file pattern <i>srcf</i>.
+   * @param srcf a file pattern specifying source files
+   * @param recursive if need to delete subdirs
+   * @throws IOException  
+   * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
+   */
+  void delete(String srcf, final boolean recursive) throws IOException {
+    //rm behavior in Linux
+    //  [~/1207]$ ls ?.txt
+    //  x.txt  z.txt
+    //  [~/1207]$ rm x.txt y.txt z.txt 
+    //  rm: cannot remove `y.txt': No such file or directory
+
+    Path srcPattern = new Path(srcf);
+    new DelayedExceptionThrowing() {
+      @Override
+      void process(Path p, FileSystem srcFs) throws IOException {
+        delete(p, srcFs, recursive);
+      }
+    }.globAndProcess(srcPattern, srcPattern.getFileSystem(getConf()));
+  }
+    
+  /* delete a file */
+  private void delete(Path src, FileSystem srcFs, boolean recursive) throws IOException {
+    if (srcFs.isDirectory(src) && !recursive) {
+      throw new IOException("Cannot remove directory \"" + src +
+                            "\", use -rmr instead");
+    }
+    Trash trashTmp = new Trash(srcFs, getConf());
+    if (trashTmp.moveToTrash(src)) {
+      System.out.println("Moved to trash: " + src);
+      return;
+    }
+    if (srcFs.delete(src, true)) {
+      System.out.println("Deleted " + src);
+    } else {
+      if (!srcFs.exists(src)) {
+        throw new FileNotFoundException("cannot remove "
+            + src + ": No such file or directory.");
+        }
+      throw new IOException("Delete failed " + src);
+    }
+  }
+
+  private void expunge() throws IOException {
+    trash.expunge();
+    trash.checkpoint();
+  }
+
+  /**
+   * Returns the Trash object associated with this shell.
+   */
+  public Path getCurrentTrashDir() {
+    return trash.getCurrentTrashDir();
+  }
+
+  /**
+   * Parse the incoming command string
+   * @param cmd
+   * @param pos ignore anything before this pos in cmd
+   * @throws IOException 
+   */
+  private void tail(String[] cmd, int pos) throws IOException {
+    CommandFormat c = new CommandFormat("tail", 1, 1, "f");
+    String src = null;
+    Path path = null;
+
+    try {
+      List<String> parameters = c.parse(cmd, pos);
+      src = parameters.get(0);
+    } catch(IllegalArgumentException iae) {
+      System.err.println("Usage: java FsShell " + TAIL_USAGE);
+      throw iae;
+    }
+    boolean foption = c.getOpt("f") ? true: false;
+    path = new Path(src);
+    FileSystem srcFs = path.getFileSystem(getConf());
+    FileStatus fileStatus = srcFs.getFileStatus(path);
+    if (fileStatus.isDir()) {
+      throw new IOException("Source must be a file.");
+    }
+
+    long fileSize = fileStatus.getLen();
+    long offset = (fileSize > 1024) ? fileSize - 1024: 0;
+
+    while (true) {
+      FSDataInputStream in = srcFs.open(path);
+      in.seek(offset);
+      IOUtils.copyBytes(in, System.out, 1024, false);
+      offset = in.getPos();
+      in.close();
+      if (!foption) {
+        break;
+      }
+      fileSize = srcFs.getFileStatus(path).getLen();
+      offset = (fileSize > offset) ? offset: fileSize;
+      try {
+        Thread.sleep(5000);
+      } catch (InterruptedException e) {
+        break;
+      }
+    }
+  }
+
+  /**
+   * This class runs a command on a given FileStatus. This can be used for
+   * running various commands like chmod, chown etc.
+   */
+  static abstract class CmdHandler {
+    
+    protected int errorCode = 0;
+    protected boolean okToContinue = true;
+    protected String cmdName;
+    
+    int getErrorCode() { return errorCode; }
+    boolean okToContinue() { return okToContinue; }
+    String getName() { return cmdName; }
+    
+    protected CmdHandler(String cmdName, FileSystem fs) {
+      this.cmdName = cmdName;
+    }
+    
+    public abstract void run(FileStatus file, FileSystem fs) throws IOException;
+  }
+  
+  /** helper returns listStatus() */
+  private static FileStatus[] shellListStatus(String cmd, 
+                                                   FileSystem srcFs,
+                                                   FileStatus src) {
+    if (!src.isDir()) {
+      FileStatus[] files = { src };
+      return files;
+    }
+    Path path = src.getPath();
+    try {
+      FileStatus[] files = srcFs.listStatus(path);
+      if ( files == null ) {
+        System.err.println(cmd + 
+                           ": could not get listing for '" + path + "'");
+      }
+      return files;
+    } catch (IOException e) {
+      System.err.println(cmd + 
+                         ": could not get get listing for '" + path + "' : " +
+                         e.getMessage().split("\n")[0]);
+    }
+    return null;
+  }
+  
+  
+  /**
+   * Runs the command on a given file with the command handler. 
+   * If recursive is set, command is run recursively.
+   */                                       
+  private static int runCmdHandler(CmdHandler handler, FileStatus stat, 
+                                   FileSystem srcFs, 
+                                   boolean recursive) throws IOException {
+    int errors = 0;
+    handler.run(stat, srcFs);
+    if (recursive && stat.isDir() && handler.okToContinue()) {
+      FileStatus[] files = shellListStatus(handler.getName(), srcFs, stat);
+      if (files == null) {
+        return 1;
+      }
+      for(FileStatus file : files ) {
+        errors += runCmdHandler(handler, file, srcFs, recursive);
+      }
+    }
+    return errors;
+  }
+
+  ///top level runCmdHandler
+  int runCmdHandler(CmdHandler handler, String[] args,
+                                   int startIndex, boolean recursive) 
+                                   throws IOException {
+    int errors = 0;
+    
+    for (int i=startIndex; i<args.length; i++) {
+      Path srcPath = new Path(args[i]);
+      FileSystem srcFs = srcPath.getFileSystem(getConf());
+      Path[] paths = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
+      for(Path path : paths) {
+        try {
+          FileStatus file = srcFs.getFileStatus(path);
+          if (file == null) {
+            System.err.println(handler.getName() + 
+                               ": could not get status for '" + path + "'");
+            errors++;
+          } else {
+            errors += runCmdHandler(handler, file, srcFs, recursive);
+          }
+        } catch (IOException e) {
+          String msg = (e.getMessage() != null ? e.getLocalizedMessage() :
+            (e.getCause().getMessage() != null ? 
+                e.getCause().getLocalizedMessage() : "null"));
+          System.err.println(handler.getName() + ": could not get status for '"
+                                        + path + "': " + msg.split("\n")[0]);        
+        }
+      }
+    }
+    
+    return (errors > 0 || handler.getErrorCode() != 0) ? 1 : 0;
+  }
+  
+  /**
+   * Return an abbreviated English-language desc of the byte length
+   * @deprecated Consider using {@link org.apache.hadoop.util.StringUtils#byteDesc} instead.
+   */
+  @Deprecated
+  public static String byteDesc(long len) {
+    return StringUtils.byteDesc(len);
+  }
+
+  /**
+   * @deprecated Consider using {@link org.apache.hadoop.util.StringUtils#limitDecimalTo2} instead.
+   */
+  @Deprecated
+  public static synchronized String limitDecimalTo2(double d) {
+    return StringUtils.limitDecimalTo2(d);
+  }
+
+  private void printHelp(String cmd) {
+    String summary = "hadoop fs is the command to execute fs commands. " +
+      "The full syntax is: \n\n" +
+      "hadoop fs [-fs <local | file system URI>] [-conf <configuration file>]\n\t" +
+      "[-D <property=value>] [-ls <path>] [-lsr <path>] [-df [<path>]] [-du <path>]\n\t" + 
+      "[-dus <path>] [-mv <src> <dst>] [-cp <src> <dst>] [-rm <src>]\n\t" + 
+      "[-rmr <src>] [-put <localsrc> ... <dst>] [-copyFromLocal <localsrc> ... <dst>]\n\t" +
+      "[-moveFromLocal <localsrc> ... <dst>] [" + 
+      GET_SHORT_USAGE + "\n\t" +
+      "[-getmerge <src> <localdst> [addnl]] [-cat <src>]\n\t" +
+      "[" + COPYTOLOCAL_SHORT_USAGE + "] [-moveToLocal <src> <localdst>]\n\t" +
+      "[-mkdir <path>] [-report] [" + SETREP_SHORT_USAGE + "]\n\t" +
+      "[-touchz <path>] [-test -[ezd] <path>] [-stat [format] <path>]\n\t" +
+      "[-tail [-f] <path>] [-text <path>]\n\t" +
+      "[" + FsShellPermissions.CHMOD_USAGE + "]\n\t" +
+      "[" + FsShellPermissions.CHOWN_USAGE + "]\n\t" +
+      "[" + FsShellPermissions.CHGRP_USAGE + "]\n\t" +      
+      "[" + Count.USAGE + "]\n\t" +      
+      "[-help [cmd]]\n";
+
+    String conf ="-conf <configuration file>:  Specify an application configuration file.";
+ 
+    String D = "-D <property=value>:  Use value for given property.";
+  
+    String fs = "-fs [local | <file system URI>]: \tSpecify the file system to use.\n" + 
+      "\t\tIf not specified, the current configuration is used, \n" +
+      "\t\ttaken from the following, in increasing precedence: \n" + 
+      "\t\t\tcore-default.xml inside the hadoop jar file \n" +
+      "\t\t\tcore-site.xml in $HADOOP_CONF_DIR \n" +
+      "\t\t'local' means use the local file system as your DFS. \n" +
+      "\t\t<file system URI> specifies a particular file system to \n" +
+      "\t\tcontact. This argument is optional but if used must appear\n" +
+      "\t\tappear first on the command line.  Exactly one additional\n" +
+      "\t\targument must be specified. \n";
+
+        
+    String ls = "-ls <path>: \tList the contents that match the specified file pattern. If\n" + 
+      "\t\tpath is not specified, the contents of /user/<currentUser>\n" +
+      "\t\twill be listed. Directory entries are of the form \n" +
+      "\t\t\tdirName (full path) <dir> \n" +
+      "\t\tand file entries are of the form \n" + 
+      "\t\t\tfileName(full path) <r n> size \n" +
+      "\t\twhere n is the number of replicas specified for the file \n" + 
+      "\t\tand size is the size of the file, in bytes.\n";
+
+    String lsr = "-lsr <path>: \tRecursively list the contents that match the specified\n" +
+      "\t\tfile pattern.  Behaves very similarly to hadoop fs -ls,\n" + 
+      "\t\texcept that the data is shown for all the entries in the\n" +
+      "\t\tsubtree.\n";
+
+    String df = "-df [<path>]: \tShows the capacity, free and used space of the filesystem.\n"+
+      "\t\tIf the filesystem has multiple partitions, and no path to a particular partition\n"+
+      "\t\tis specified, then the status of the root partitions will be shown.\n";
+
+    String du = "-du <path>: \tShow the amount of space, in bytes, used by the files that \n" +
+      "\t\tmatch the specified file pattern.  Equivalent to the unix\n" + 
+      "\t\tcommand \"du -sb <path>/*\" in case of a directory, \n" +
+      "\t\tand to \"du -b <path>\" in case of a file.\n" +
+      "\t\tThe output is in the form \n" + 
+      "\t\t\tname(full path) size (in bytes)\n"; 
+
+    String dus = "-dus <path>: \tShow the amount of space, in bytes, used by the files that \n" +
+      "\t\tmatch the specified file pattern.  Equivalent to the unix\n" + 
+      "\t\tcommand \"du -sb\"  The output is in the form \n" + 
+      "\t\t\tname(full path) size (in bytes)\n"; 
+    
+    String mv = "-mv <src> <dst>:   Move files that match the specified file pattern <src>\n" +
+      "\t\tto a destination <dst>.  When moving multiple files, the \n" +
+      "\t\tdestination must be a directory. \n";
+
+    String cp = "-cp <src> <dst>:   Copy files that match the file pattern <src> to a \n" +
+      "\t\tdestination.  When copying multiple files, the destination\n" +
+      "\t\tmust be a directory. \n";
+
+    String rm = "-rm <src>: \tDelete all files that match the specified file pattern.\n" +
+      "\t\tEquivlent to the Unix command \"rm <src>\"\n";
+
+    String rmr = "-rmr <src>: \tRemove all directories which match the specified file \n" +
+      "\t\tpattern. Equivlent to the Unix command \"rm -rf <src>\"\n";
+
+    String put = "-put <localsrc> ... <dst>: \tCopy files " + 
+    "from the local file system \n\t\tinto fs. \n";
+
+    String copyFromLocal = "-copyFromLocal <localsrc> ... <dst>:" +
+    " Identical to the -put command.\n";
+
+    String moveFromLocal = "-moveFromLocal <localsrc> ... <dst>:" +
+    " Same as -put, except that the source is\n\t\tdeleted after it's copied.\n"; 
+
+    String get = GET_SHORT_USAGE
+      + ":  Copy files that match the file pattern <src> \n" +
+      "\t\tto the local name.  <src> is kept.  When copying mutiple, \n" +
+      "\t\tfiles, the destination must be a directory. \n";
+
+    String getmerge = "-getmerge <src> <localdst>:  Get all the files in the directories that \n" +
+      "\t\tmatch the source file pattern and merge and sort them to only\n" +
+      "\t\tone file on local fs. <src> is kept.\n";
+
+    String cat = "-cat <src>: \tFetch all files that match the file pattern <src> \n" +
+      "\t\tand display their content on stdout.\n";
+
+    
+    String text = "-text <src>: \tTakes a source file and outputs the file in text format.\n" +
+      "\t\tThe allowed formats are zip and TextRecordInputStream.\n";
+         
+    
+    String copyToLocal = COPYTOLOCAL_SHORT_USAGE
+                         + ":  Identical to the -get command.\n";
+
+    String moveToLocal = "-moveToLocal <src> <localdst>:  Not implemented yet \n";
+        
+    String mkdir = "-mkdir <path>: \tCreate a directory in specified location. \n";
+
+    String setrep = SETREP_SHORT_USAGE
+      + ":  Set the replication level of a file. \n"
+      + "\t\tThe -R flag requests a recursive change of replication level \n"
+      + "\t\tfor an entire tree.\n";
+
+    String touchz = "-touchz <path>: Write a timestamp in yyyy-MM-dd HH:mm:ss format\n" +
+      "\t\tin a file at <path>. An error is returned if the file exists with non-zero length\n";
+
+    String test = "-test -[ezd] <path>: If file { exists, has zero length, is a directory\n" +
+      "\t\tthen return 0, else return 1.\n";
+
+    String stat = "-stat [format] <path>: Print statistics about the file/directory at <path>\n" +
+      "\t\tin the specified format. Format accepts filesize in blocks (%b), filename (%n),\n" +
+      "\t\tblock size (%o), replication (%r), modification date (%y, %Y)\n";
+
+    String tail = TAIL_USAGE
+      + ":  Show the last 1KB of the file. \n"
+      + "\t\tThe -f option shows apended data as the file grows. \n";
+
+    String chmod = FsShellPermissions.CHMOD_USAGE + "\n" +
+      "\t\tChanges permissions of a file.\n" +
+      "\t\tThis works similar to shell's chmod with a few exceptions.\n\n" +
+      "\t-R\tmodifies the files recursively. This is the only option\n" +
+      "\t\tcurrently supported.\n\n" +
+      "\tMODE\tMode is same as mode used for chmod shell command.\n" +
+      "\t\tOnly letters recognized are 'rwxXt'. E.g. +t,a+r,g-w,+rwx,o=r\n\n" +
+      "\tOCTALMODE Mode specifed in 3 or 4 digits. If 4 digits, the first may\n" +
+      "\tbe 1 or 0 to turn the sticky bit on or off, respectively.  Unlike " +
+      "\tshell command, it is not possible to specify only part of the mode\n" +
+      "\t\tE.g. 754 is same as u=rwx,g=rx,o=r\n\n" +
+      "\t\tIf none of 'augo' is specified, 'a' is assumed and unlike\n" +
+      "\t\tshell command, no umask is applied.\n";
+    
+    String chown = FsShellPermissions.CHOWN_USAGE + "\n" +
+      "\t\tChanges owner and group of a file.\n" +
+      "\t\tThis is similar to shell's chown with a few exceptions.\n\n" +
+      "\t-R\tmodifies the files recursively. This is the only option\n" +
+      "\t\tcurrently supported.\n\n" +
+      "\t\tIf only owner or group is specified then only owner or\n" +
+      "\t\tgroup is modified.\n\n" +
+      "\t\tThe owner and group names may only cosists of digits, alphabet,\n"+
+      "\t\tand any of '-_.@/' i.e. [-_.@/a-zA-Z0-9]. The names are case\n" +
+      "\t\tsensitive.\n\n" +
+      "\t\tWARNING: Avoid using '.' to separate user name and group though\n" +
+      "\t\tLinux allows it. If user names have dots in them and you are\n" +
+      "\t\tusing local file system, you might see surprising results since\n" +
+      "\t\tshell command 'chown' is used for local files.\n";
+    
+    String chgrp = FsShellPermissions.CHGRP_USAGE + "\n" +
+      "\t\tThis is equivalent to -chown ... :GROUP ...\n";
+    
+    String help = "-help [cmd]: \tDisplays help for given command or all commands if none\n" +
+      "\t\tis specified.\n";
+
+    if ("fs".equals(cmd)) {
+      System.out.println(fs);
+    } else if ("conf".equals(cmd)) {
+      System.out.println(conf);
+    } else if ("D".equals(cmd)) {
+      System.out.println(D);
+    } else if ("ls".equals(cmd)) {
+      System.out.println(ls);
+    } else if ("lsr".equals(cmd)) {
+      System.out.println(lsr);
+    } else if ("df".equals(cmd)) {
+      System.out.println(df);
+    } else if ("du".equals(cmd)) {
+      System.out.println(du);
+    } else if ("dus".equals(cmd)) {
+      System.out.println(dus);
+    } else if ("rm".equals(cmd)) {
+      System.out.println(rm);
+    } else if ("rmr".equals(cmd)) {
+      System.out.println(rmr);
+    } else if ("mkdir".equals(cmd)) {
+      System.out.println(mkdir);
+    } else if ("mv".equals(cmd)) {
+      System.out.println(mv);
+    } else if ("cp".equals(cmd)) {
+      System.out.println(cp);
+    } else if ("put".equals(cmd)) {
+      System.out.println(put);
+    } else if ("copyFromLocal".equals(cmd)) {
+      System.out.println(copyFromLocal);
+    } else if ("moveFromLocal".equals(cmd)) {
+      System.out.println(moveFromLocal);
+    } else if ("get".equals(cmd)) {
+      System.out.println(get);
+    } else if ("getmerge".equals(cmd)) {
+      System.out.println(getmerge);
+    } else if ("copyToLocal".equals(cmd)) {
+      System.out.println(copyToLocal);
+    } else if ("moveToLocal".equals(cmd)) {
+      System.out.println(moveToLocal);
+    } else if ("cat".equals(cmd)) {
+      System.out.println(cat);
+    } else if ("get".equals(cmd)) {
+      System.out.println(get);
+    } else if ("setrep".equals(cmd)) {
+      System.out.println(setrep);
+    } else if ("touchz".equals(cmd)) {
+      System.out.println(touchz);
+    } else if ("test".equals(cmd)) {
+      System.out.println(test);
+    } else if ("text".equals(cmd)) {
+      System.out.println(text);
+    } else if ("stat".equals(cmd)) {
+      System.out.println(stat);
+    } else if ("tail".equals(cmd)) {
+      System.out.println(tail);
+    } else if ("chmod".equals(cmd)) {
+      System.out.println(chmod);
+    } else if ("chown".equals(cmd)) {
+      System.out.println(chown);
+    } else if ("chgrp".equals(cmd)) {
+      System.out.println(chgrp);
+    } else if (Count.matches(cmd)) {
+      System.out.println(Count.DESCRIPTION);
+    } else if ("help".equals(cmd)) {
+      System.out.println(help);
+    } else {
+      System.out.println(summary);
+      System.out.println(fs);
+      System.out.println(ls);
+      System.out.println(lsr);
+      System.out.println(df);
+      System.out.println(du);
+      System.out.println(dus);
+      System.out.println(mv);
+      System.out.println(cp);
+      System.out.println(rm);
+      System.out.println(rmr);
+      System.out.println(put);
+      System.out.println(copyFromLocal);
+      System.out.println(moveFromLocal);
+      System.out.println(get);
+      System.out.println(getmerge);
+      System.out.println(cat);
+      System.out.println(copyToLocal);
+      System.out.println(moveToLocal);
+      System.out.println(mkdir);
+      System.out.println(setrep);
+      System.out.println(tail);
+      System.out.println(touchz);
+      System.out.println(test);
+      System.out.println(text);
+      System.out.println(stat);
+      System.out.println(chmod);
+      System.out.println(chown);      
+      System.out.println(chgrp);
+      System.out.println(Count.DESCRIPTION);
+      System.out.println(help);
+    }        
+
+                           
+  }
+
+  /**
+   * Apply operation specified by 'cmd' on all parameters
+   * starting from argv[startindex].
+   */
+  private int doall(String cmd, String argv[], int startindex) {
+    int exitCode = 0;
+    int i = startindex;
+    //
+    // for each source file, issue the command
+    //
+    for (; i < argv.length; i++) {
+      try {
+        //
+        // issue the command to the fs
+        //
+        if ("-cat".equals(cmd)) {
+          cat(argv[i], true);
+        } else if ("-mkdir".equals(cmd)) {
+          mkdir(argv[i]);
+        } else if ("-rm".equals(cmd)) {
+          delete(argv[i], false);
+        } else if ("-rmr".equals(cmd)) {
+          delete(argv[i], true);
+        } else if ("-df".equals(cmd)) {
+          df(argv[i]);
+        } else if ("-du".equals(cmd)) {
+          du(argv[i]);
+        } else if ("-dus".equals(cmd)) {
+          dus(argv[i]);
+        } else if (Count.matches(cmd)) {
+          new Count(argv, i, getConf()).runAll();
+        } else if ("-ls".equals(cmd)) {
+          exitCode = ls(argv[i], false);
+        } else if ("-lsr".equals(cmd)) {
+          exitCode = ls(argv[i], true);
+        } else if ("-touchz".equals(cmd)) {
+          touchz(argv[i]);
+        } else if ("-text".equals(cmd)) {
+          text(argv[i]);
+        }
+      } catch (RemoteException e) {
+        //
+        // This is a error returned by hadoop server. Print
+        // out the first line of the error message.
+        //
+        exitCode = -1;
+        try {
+          String[] content;
+          content = e.getLocalizedMessage().split("\n");
+          System.err.println(cmd.substring(1) + ": " +
+                             content[0]);
+        } catch (Exception ex) {
+          System.err.println(cmd.substring(1) + ": " +
+                             ex.getLocalizedMessage());
+        }
+      } catch (IOException e) {
+        //
+        // IO exception encountered locally.
+        //
+        exitCode = -1;
+        String content = e.getLocalizedMessage();
+        if (content != null) {
+          content = content.split("\n")[0];
+        }
+        System.err.println(cmd.substring(1) + ": " +
+                          content);
+      }
+    }
+    return exitCode;
+  }
+
+  /**
+   * Displays format of commands.
+   * 
+   */
+  private static void printUsage(String cmd) {
+    String prefix = "Usage: java " + FsShell.class.getSimpleName();
+    if ("-fs".equals(cmd)) {
+      System.err.println("Usage: java FsShell" + 
+                         " [-fs <local | file system URI>]");
+    } else if ("-conf".equals(cmd)) {
+      System.err.println("Usage: java FsShell" + 
+                         " [-conf <configuration file>]");
+    } else if ("-D".equals(cmd)) {
+      System.err.println("Usage: java FsShell" + 
+                         " [-D <[property=value>]");
+    } else if ("-ls".equals(cmd) || "-lsr".equals(cmd) ||
+               "-du".equals(cmd) || "-dus".equals(cmd) ||
+               "-rm".equals(cmd) || "-rmr".equals(cmd) ||
+               "-touchz".equals(cmd) || "-mkdir".equals(cmd) ||
+               "-text".equals(cmd)) {
+      System.err.println("Usage: java FsShell" + 
+                         " [" + cmd + " <path>]");
+    } else if ("-df".equals(cmd) ) {
+      System.err.println("Usage: java FsShell" +
+                         " [" + cmd + " [<path>]]");
+    } else if (Count.matches(cmd)) {
+      System.err.println(prefix + " [" + Count.USAGE + "]");
+    } else if ("-mv".equals(cmd) || "-cp".equals(cmd)) {
+      System.err.println("Usage: java FsShell" + 
+                         " [" + cmd + " <src> <dst>]");
+    } else if ("-put".equals(cmd) || "-copyFromLocal".equals(cmd) ||
+               "-moveFromLocal".equals(cmd)) {
+      System.err.println("Usage: java FsShell" + 
+                         " [" + cmd + " <localsrc> ... <dst>]");
+    } else if ("-get".equals(cmd)) {
+      System.err.println("Usage: java FsShell [" + GET_SHORT_USAGE + "]"); 
+    } else if ("-copyToLocal".equals(cmd)) {
+      System.err.println("Usage: java FsShell [" + COPYTOLOCAL_SHORT_USAGE+ "]"); 
+    } else if ("-moveToLocal".equals(cmd)) {
+      System.err.println("Usage: java FsShell" + 
+                         " [" + cmd + " [-crc] <src> <localdst>]");
+    } else if ("-cat".equals(cmd)) {
+      System.err.println("Usage: java FsShell" + 
+                         " [" + cmd + " <src>]");
+    } else if ("-setrep".equals(cmd)) {
+      System.err.println("Usage: java FsShell [" + SETREP_SHORT_USAGE + "]");
+    } else if ("-test".equals(cmd)) {
+      System.err.println("Usage: java FsShell" +
+                         " [-test -[ezd] <path>]");
+    } else if ("-stat".equals(cmd)) {
+      System.err.println("Usage: java FsShell" +
+                         " [-stat [format] <path>]");
+    } else if ("-tail".equals(cmd)) {
+      System.err.println("Usage: java FsShell [" + TAIL_USAGE + "]");
+    } else {
+      System.err.println("Usage: java FsShell");
+      System.err.println("           [-ls <path>]");
+      System.err.println("           [-lsr <path>]");
+      System.err.println("           [-df [<path>]]");
+      System.err.println("           [-du <path>]");
+      System.err.println("           [-dus <path>]");
+      System.err.println("           [" + Count.USAGE + "]");
+      System.err.println("           [-mv <src> <dst>]");
+      System.err.println("           [-cp <src> <dst>]");
+      System.err.println("           [-rm <path>]");
+      System.err.println("           [-rmr <path>]");
+      System.err.println("           [-expunge]");
+      System.err.println("           [-put <localsrc> ... <dst>]");
+      System.err.println("           [-copyFromLocal <localsrc> ... <dst>]");
+      System.err.println("           [-moveFromLocal <localsrc> ... <dst>]");
+      System.err.println("           [" + GET_SHORT_USAGE + "]");
+      System.err.println("           [-getmerge <src> <localdst> [addnl]]");
+      System.err.println("           [-cat <src>]");
+      System.err.println("           [-text <src>]");
+      System.err.println("           [" + COPYTOLOCAL_SHORT_USAGE + "]");
+      System.err.println("           [-moveToLocal [-crc] <src> <localdst>]");
+      System.err.println("           [-mkdir <path>]");
+      System.err.println("           [" + SETREP_SHORT_USAGE + "]");
+      System.err.println("           [-touchz <path>]");
+      System.err.println("           [-test -[ezd] <path>]");
+      System.err.println("           [-stat [format] <path>]");
+      System.err.println("           [" + TAIL_USAGE + "]");
+      System.err.println("           [" + FsShellPermissions.CHMOD_USAGE + "]");      
+      System.err.println("           [" + FsShellPermissions.CHOWN_USAGE + "]");
+      System.err.println("           [" + FsShellPermissions.CHGRP_USAGE + "]");
+      System.err.println("           [-help [cmd]]");
+      System.err.println();
+      ToolRunner.printGenericCommandUsage(System.err);
+    }
+  }
+
+  /**
+   * run
+   */
+  public int run(String argv[]) throws Exception {
+
+    if (argv.length < 1) {
+      printUsage(""); 
+      return -1;
+    }
+
+    int exitCode = -1;
+    int i = 0;
+    String cmd = argv[i++];
+    //
+    // verify that we have enough command line parameters
+    //
+    if ("-put".equals(cmd) || "-test".equals(cmd) ||
+        "-copyFromLocal".equals(cmd) || "-moveFromLocal".equals(cmd)) {
+      if (argv.length < 3) {
+        printUsage(cmd);
+        return exitCode;
+      }
+    } else if ("-get".equals(cmd) || 
+               "-copyToLocal".equals(cmd) || "-moveToLocal".equals(cmd)) {
+      if (argv.length < 3) {
+        printUsage(cmd);
+        return exitCode;
+      }
+    } else if ("-mv".equals(cmd) || "-cp".equals(cmd)) {
+      if (argv.length < 3) {
+        printUsage(cmd);
+        return exitCode;
+      }
+    } else if ("-rm".equals(cmd) || "-rmr".equals(cmd) ||
+               "-cat".equals(cmd) || "-mkdir".equals(cmd) ||
+               "-touchz".equals(cmd) || "-stat".equals(cmd) ||
+               "-text".equals(cmd)) {
+      if (argv.length < 2) {
+        printUsage(cmd);
+        return exitCode;
+      }
+    }
+    // initialize FsShell
+    try {
+      init();
+    } catch (RPC.VersionMismatch v) { 
+      System.err.println("Version Mismatch between client and server" +
+                         "... command aborted.");
+      return exitCode;
+    } catch (IOException e) {
+      System.err.println("Bad connection to FS. command aborted.");
+      return exitCode;
+    }
+
+    exitCode = 0;
+    try {
+      if ("-put".equals(cmd) || "-copyFromLocal".equals(cmd)) {
+        Path[] srcs = new Path[argv.length-2];
+        for (int j=0 ; i < argv.length-1 ;) 
+          srcs[j++] = new Path(argv[i++]);
+        copyFromLocal(srcs, argv[i++]);
+      } else if ("-moveFromLocal".equals(cmd)) {
+        Path[] srcs = new Path[argv.length-2];
+        for (int j=0 ; i < argv.length-1 ;) 
+          srcs[j++] = new Path(argv[i++]);
+        moveFromLocal(srcs, argv[i++]);
+      } else if ("-get".equals(cmd) || "-copyToLocal".equals(cmd)) {
+        copyToLocal(argv, i);
+      } else if ("-getmerge".equals(cmd)) {
+        if (argv.length>i+2)
+          copyMergeToLocal(argv[i++], new Path(argv[i++]), Boolean.parseBoolean(argv[i++]));
+        else
+          copyMergeToLocal(argv[i++], new Path(argv[i++]));
+      } else if ("-cat".equals(cmd)) {
+        exitCode = doall(cmd, argv, i);
+      } else if ("-text".equals(cmd)) {
+        exitCode = doall(cmd, argv, i);
+      } else if ("-moveToLocal".equals(cmd)) {
+        moveToLocal(argv[i++], new Path(argv[i++]));
+      } else if ("-setrep".equals(cmd)) {
+        setReplication(argv, i);           
+      } else if ("-chmod".equals(cmd) || 
+                 "-chown".equals(cmd) ||
+                 "-chgrp".equals(cmd)) {
+        FsShellPermissions.changePermissions(fs, cmd, argv, i, this);
+      } else if ("-ls".equals(cmd)) {
+        if (i < argv.length) {
+          exitCode = doall(cmd, argv, i);
+        } else {
+          exitCode = ls(Path.CUR_DIR, false);
+        } 
+      } else if ("-lsr".equals(cmd)) {
+        if (i < argv.length) {
+          exitCode = doall(cmd, argv, i);
+        } else {
+          exitCode = ls(Path.CUR_DIR, true);
+        } 
+      } else if ("-mv".equals(cmd)) {
+        exitCode = rename(argv, getConf());
+      } else if ("-cp".equals(cmd)) {
+        exitCode = copy(argv, getConf());
+      } else if ("-rm".equals(cmd)) {
+        exitCode = doall(cmd, argv, i);
+      } else if ("-rmr".equals(cmd)) {
+        exitCode = doall(cmd, argv, i);
+      } else if ("-expunge".equals(cmd)) {
+        expunge();
+      } else if ("-df".equals(cmd)) {
+        if (argv.length-1 > 0) {
+          exitCode = doall(cmd, argv, i);
+        } else {
+          df(null);
+        }
+      } else if ("-du".equals(cmd)) {
+        if (i < argv.length) {
+          exitCode = doall(cmd, argv, i);
+        } else {
+          du(".");
+        }
+      } else if ("-dus".equals(cmd)) {
+        if (i < argv.length) {
+          exitCode = doall(cmd, argv, i);
+        } else {
+          dus(".");
+        }         
+      } else if (Count.matches(cmd)) {
+        exitCode = new Count(argv, i, getConf()).runAll();
+      } else if ("-mkdir".equals(cmd)) {
+        exitCode = doall(cmd, argv, i);
+      } else if ("-touchz".equals(cmd)) {
+        exitCode = doall(cmd, argv, i);
+      } else if ("-test".equals(cmd)) {
+        exitCode = test(argv, i);
+      } else if ("-stat".equals(cmd)) {
+        if (i + 1 < argv.length) {
+          stat(argv[i++].toCharArray(), argv[i++]);
+        } else {
+          stat("%y".toCharArray(), argv[i]);
+        }
+      } else if ("-help".equals(cmd)) {
+        if (i < argv.length) {
+          printHelp(argv[i]);
+        } else {
+          printHelp("");
+        }
+      } else if ("-tail".equals(cmd)) {
+        tail(argv, i);           
+      } else {
+        exitCode = -1;
+        System.err.println(cmd.substring(1) + ": Unknown command");
+        printUsage("");
+      }
+    } catch (IllegalArgumentException arge) {
+      exitCode = -1;
+      System.err.println(cmd.substring(1) + ": " + arge.getLocalizedMessage());
+      printUsage(cmd);
+    } catch (RemoteException e) {
+      //
+      // This is a error returned by hadoop server. Print
+      // out the first line of the error mesage, ignore the stack trace.
+      exitCode = -1;
+      try {
+        String[] content;
+        content = e.getLocalizedMessage().split("\n");
+        System.err.println(cmd.substring(1) + ": " + 
+                           content[0]);
+      } catch (Exception ex) {
+        System.err.println(cmd.substring(1) + ": " + 
+                           ex.getLocalizedMessage());  
+      }
+    } catch (IOException e) {
+      //
+      // IO exception encountered locally.
+      // 
+      exitCode = -1;
+      System.err.println(cmd.substring(1) + ": " + 
+                         e.getLocalizedMessage());  
+    } catch (Exception re) {
+      exitCode = -1;
+      System.err.println(cmd.substring(1) + ": " + re.getLocalizedMessage());  
+    } finally {
+    }
+    return exitCode;
+  }
+
+  public void close() throws IOException {
+    if (fs != null) {
+      fs.close();
+      fs = null;
+    }
+  }
+
+  /**
+   * main() has some simple utility methods
+   */
+  public static void main(String argv[]) throws Exception {
+    FsShell shell = new FsShell();
+    int res;
+    try {
+      res = ToolRunner.run(shell, argv);
+    } finally {
+      shell.close();
+    }
+    System.exit(res);
+  }
+
+  /**
+   * Accumulate exceptions if there is any.  Throw them at last.
+   */
+  private abstract class DelayedExceptionThrowing {
+    abstract void process(Path p, FileSystem srcFs) throws IOException;
+
+    final void globAndProcess(Path srcPattern, FileSystem srcFs
+        ) throws IOException {
+      List<IOException> exceptions = new ArrayList<IOException>();
+      for(Path p : FileUtil.stat2Paths(srcFs.globStatus(srcPattern), 
+                                       srcPattern))
+        try { process(p, srcFs); } 
+        catch(IOException ioe) { exceptions.add(ioe); }
+    
+      if (!exceptions.isEmpty())
+        if (exceptions.size() == 1)
+          throw exceptions.get(0);
+        else 
+          throw new IOException("Multiple IOExceptions: " + exceptions);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FsShellPermissions.java b/src/java/org/apache/hadoop/fs/FsShellPermissions.java
new file mode 100644
index 00000000000..27997c7e7a8
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FsShellPermissions.java
@@ -0,0 +1,315 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.fs.FsShell.CmdHandler;
+import org.apache.hadoop.fs.permission.FsPermission;
+
+
+/**
+ * This class is the home for file permissions related commands.
+ * Moved to this separate class since FsShell is getting too large.
+ */
+class FsShellPermissions {
+  
+  /*========== chmod ==========*/
+
+  /*
+   * The pattern is almost as flexible as mode allowed by chmod shell command.
+   * The main restriction is that we recognize only rwxXt. To reduce errors we
+   * also enforce octal mode specifications of either 3 digits without a sticky
+   * bit setting or four digits with a sticky bit setting.
+   */
+  private static Pattern chmodNormalPattern =
+   Pattern.compile("\\G\\s*([ugoa]*)([+=-]+)([rwxXt]+)([,\\s]*)\\s*");
+  private static Pattern chmodOctalPattern =
+            Pattern.compile("^\\s*[+]?([01]?)([0-7]{3})\\s*$");
+
+  static String CHMOD_USAGE =
+                            "-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...";
+
+  private static class ChmodHandler extends CmdHandler {
+
+    private short userMode;
+    private short groupMode;
+    private short othersMode;
+    private short stickyMode;
+    private char userType = '+';
+    private char groupType = '+';
+    private char othersType = '+';
+    private char stickyBitType = '+';
+
+    private void applyNormalPattern(String modeStr, Matcher matcher)
+                                    throws IOException {
+      // Are there multiple permissions stored in one chmod?
+      boolean commaSeperated = false;
+
+      for(int i=0; i < 1 || matcher.end() < modeStr.length(); i++) {
+        if (i>0 && (!commaSeperated || !matcher.find())) {
+          patternError(modeStr);
+        }
+
+        /* groups : 1 : [ugoa]*
+         *          2 : [+-=]
+         *          3 : [rwxXt]+
+         *          4 : [,\s]*
+         */
+
+        String str = matcher.group(2);
+        char type = str.charAt(str.length() - 1);
+
+        boolean user, group, others, stickyBit;
+        user = group = others = stickyBit = false;
+
+        for(char c : matcher.group(1).toCharArray()) {
+          switch (c) {
+          case 'u' : user = true; break;
+          case 'g' : group = true; break;
+          case 'o' : others = true; break;
+          case 'a' : break;
+          default  : throw new RuntimeException("Unexpected");          
+          }
+        }
+
+        if (!(user || group || others)) { // same as specifying 'a'
+          user = group = others = true;
+        }
+
+        short mode = 0;
+
+        for(char c : matcher.group(3).toCharArray()) {
+          switch (c) {
+          case 'r' : mode |= 4; break;
+          case 'w' : mode |= 2; break;
+          case 'x' : mode |= 1; break;
+          case 'X' : mode |= 8; break;
+          case 't' : stickyBit = true; break;
+          default  : throw new RuntimeException("Unexpected");
+          }
+        }
+
+        if ( user ) {
+          userMode = mode;
+          userType = type;
+        }
+
+        if ( group ) {
+          groupMode = mode;
+          groupType = type;
+        }
+
+        if ( others ) {
+          othersMode = mode;
+          othersType = type;
+          
+          stickyMode = (short) (stickyBit ? 1 : 0);
+          stickyBitType = type;
+        }
+
+        commaSeperated = matcher.group(4).contains(",");
+      }
+    }
+
+    private void applyOctalPattern(String modeStr, Matcher matcher) {
+      userType = groupType = othersType = '=';
+
+      // Check if sticky bit is specified
+      String sb = matcher.group(1);
+      if(!sb.isEmpty()) {
+        stickyMode = Short.valueOf(sb.substring(0, 1));
+        stickyBitType = '=';
+      }
+
+      String str = matcher.group(2);
+      userMode = Short.valueOf(str.substring(0, 1));
+      groupMode = Short.valueOf(str.substring(1, 2));
+      othersMode = Short.valueOf(str.substring(2, 3));      
+    }
+
+    private void patternError(String mode) throws IOException {
+      throw new IOException("chmod : mode '" + mode + 
+                            "' does not match the expected pattern.");      
+    }
+
+    ChmodHandler(FileSystem fs, String modeStr) throws IOException {
+      super("chmod", fs);
+      Matcher matcher = null;
+
+      if ((matcher = chmodNormalPattern.matcher(modeStr)).find()) {
+        applyNormalPattern(modeStr, matcher);
+      } else if ((matcher = chmodOctalPattern.matcher(modeStr)).matches()) {
+        applyOctalPattern(modeStr, matcher);
+      } else {
+        patternError(modeStr);
+      }
+    }
+
+    private int applyChmod(char type, int mode, int existing, boolean exeOk) {
+      boolean capX = false;
+
+      if ((mode&8) != 0) { // convert X to x;
+        capX = true;
+        mode &= ~8;
+        mode |= 1;
+      }
+
+      switch (type) {
+      case '+' : mode = mode | existing; break;
+      case '-' : mode = (~mode) & existing; break;
+      case '=' : break;
+      default  : throw new RuntimeException("Unexpected");      
+      }
+
+      // if X is specified add 'x' only if exeOk or x was already set.
+      if (capX && !exeOk && (mode&1) != 0 && (existing&1) == 0) {
+        mode &= ~1; // remove x
+      }
+
+      return mode;
+    }
+
+    @Override
+    public void run(FileStatus file, FileSystem srcFs) throws IOException {
+      FsPermission perms = file.getPermission();
+      int existing = perms.toShort();
+      boolean exeOk = file.isDir() || (existing & 0111) != 0;
+      int newperms = ( applyChmod(stickyBitType, stickyMode,
+                             (existing>>>9), false) << 9 |
+                       applyChmod(userType, userMode,
+                             (existing>>>6)&7, exeOk) << 6 |
+                       applyChmod(groupType, groupMode,
+                             (existing>>>3)&7, exeOk) << 3 |
+                       applyChmod(othersType, othersMode, existing&7, exeOk));
+
+      if (existing != newperms) {
+        try {
+          srcFs.setPermission(file.getPath(), 
+                                new FsPermission((short)newperms));
+        } catch (IOException e) {
+          System.err.println(getName() + ": changing permissions of '" + 
+                             file.getPath() + "':" + e.getMessage());
+        }
+      }
+    }
+  }
+
+  /*========== chown ==========*/
+  
+  static private String allowedChars = "[-_./@a-zA-Z0-9]";
+  ///allows only "allowedChars" above in names for owner and group
+  static private Pattern chownPattern = 
+         Pattern.compile("^\\s*(" + allowedChars + "+)?" +
+                          "([:](" + allowedChars + "*))?\\s*$");
+  static private Pattern chgrpPattern = 
+         Pattern.compile("^\\s*(" + allowedChars + "+)\\s*$");
+  
+  static String CHOWN_USAGE = "-chown [-R] [OWNER][:[GROUP]] PATH...";
+  static String CHGRP_USAGE = "-chgrp [-R] GROUP PATH...";  
+
+  private static class ChownHandler extends CmdHandler {
+    protected String owner = null;
+    protected String group = null;
+
+    protected ChownHandler(String cmd, FileSystem fs) { //for chgrp
+      super(cmd, fs);
+    }
+
+    ChownHandler(FileSystem fs, String ownerStr) throws IOException {
+      super("chown", fs);
+      Matcher matcher = chownPattern.matcher(ownerStr);
+      if (!matcher.matches()) {
+        throw new IOException("'" + ownerStr + "' does not match " +
+                              "expected pattern for [owner][:group].");
+      }
+      owner = matcher.group(1);
+      group = matcher.group(3);
+      if (group != null && group.length() == 0) {
+        group = null;
+      }
+      if (owner == null && group == null) {
+        throw new IOException("'" + ownerStr + "' does not specify " +
+                              " onwer or group.");
+      }
+    }
+
+    @Override
+    public void run(FileStatus file, FileSystem srcFs) throws IOException {
+      //Should we do case insensitive match?  
+      String newOwner = (owner == null || owner.equals(file.getOwner())) ?
+                        null : owner;
+      String newGroup = (group == null || group.equals(file.getGroup())) ?
+                        null : group;
+
+      if (newOwner != null || newGroup != null) {
+        try {
+          srcFs.setOwner(file.getPath(), newOwner, newGroup);
+        } catch (IOException e) {
+          System.err.println(getName() + ": changing ownership of '" + 
+                             file.getPath() + "':" + e.getMessage());
+
+        }
+      }
+    }
+  }
+
+  /*========== chgrp ==========*/    
+  
+  private static class ChgrpHandler extends ChownHandler {
+    ChgrpHandler(FileSystem fs, String groupStr) throws IOException {
+      super("chgrp", fs);
+
+      Matcher matcher = chgrpPattern.matcher(groupStr);
+      if (!matcher.matches()) {
+        throw new IOException("'" + groupStr + "' does not match " +
+        "expected pattern for group");
+      }
+      group = matcher.group(1);
+    }
+  }
+
+  static void changePermissions(FileSystem fs, String cmd, 
+                                String argv[], int startIndex, FsShell shell)
+                                throws IOException {
+    CmdHandler handler = null;
+    boolean recursive = false;
+
+    // handle common arguments, currently only "-R" 
+    for (; startIndex < argv.length && argv[startIndex].equals("-R"); 
+    startIndex++) {
+      recursive = true;
+    }
+
+    if ( startIndex >= argv.length ) {
+      throw new IOException("Not enough arguments for the command");
+    }
+
+    if (cmd.equals("-chmod")) {
+      handler = new ChmodHandler(fs, argv[startIndex++]);
+    } else if (cmd.equals("-chown")) {
+      handler = new ChownHandler(fs, argv[startIndex++]);
+    } else if (cmd.equals("-chgrp")) {
+      handler = new ChgrpHandler(fs, argv[startIndex++]);
+    }
+
+    shell.runCmdHandler(handler, argv, startIndex, recursive);
+  } 
+}
diff --git a/src/java/org/apache/hadoop/fs/FsStatus.java b/src/java/org/apache/hadoop/fs/FsStatus.java
new file mode 100644
index 00000000000..0c7a5ac5747
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FsStatus.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+/** This class is used to represent the capacity, free and used space on a
+  * {@link FileSystem}.
+  */
+public class FsStatus implements Writable {
+  private long capacity;
+  private long used;
+  private long remaining;
+
+  /** Construct a FsStatus object, using the specified statistics */
+  public FsStatus(long capacity, long used, long remaining) {
+    this.capacity = capacity;
+    this.used = used;
+    this.remaining = remaining;
+  }
+
+  /** Return the capacity in bytes of the file system */
+  public long getCapacity() {
+    return capacity;
+  }
+
+  /** Return the number of bytes used on the file system */
+  public long getUsed() {
+    return used;
+  }
+
+  /** Return the number of remaining bytes on the file system */
+  public long getRemaining() {
+    return remaining;
+  }
+
+  //////////////////////////////////////////////////
+  // Writable
+  //////////////////////////////////////////////////
+  public void write(DataOutput out) throws IOException {
+    out.writeLong(capacity);
+    out.writeLong(used);
+    out.writeLong(remaining);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    capacity = in.readLong();
+    used = in.readLong();
+    remaining = in.readLong();
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/FsUrlConnection.java b/src/java/org/apache/hadoop/fs/FsUrlConnection.java
new file mode 100644
index 00000000000..c919b8b4047
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FsUrlConnection.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.net.URLConnection;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Representation of a URL connection to open InputStreams.
+ */
+class FsUrlConnection extends URLConnection {
+
+  private Configuration conf;
+
+  private InputStream is;
+
+  FsUrlConnection(Configuration conf, URL url) {
+    super(url);
+    this.conf = conf;
+  }
+
+  @Override
+  public void connect() throws IOException {
+    try {
+      FileSystem fs = FileSystem.get(url.toURI(), conf);
+      is = fs.open(new Path(url.getPath()));
+    } catch (URISyntaxException e) {
+      throw new IOException(e.toString());
+    }
+  }
+
+  /* @inheritDoc */
+  @Override
+  public InputStream getInputStream() throws IOException {
+    if (is == null) {
+      connect();
+    }
+    return is;
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/FsUrlStreamHandler.java b/src/java/org/apache/hadoop/fs/FsUrlStreamHandler.java
new file mode 100644
index 00000000000..37c6fcf4807
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FsUrlStreamHandler.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.net.URL;
+import java.net.URLStreamHandler;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * URLStream handler relying on FileSystem and on a given Configuration to
+ * handle URL protocols.
+ */
+class FsUrlStreamHandler extends URLStreamHandler {
+
+  private Configuration conf;
+
+  FsUrlStreamHandler(Configuration conf) {
+    this.conf = conf;
+  }
+
+  FsUrlStreamHandler() {
+    this.conf = new Configuration();
+  }
+
+  @Override
+  protected FsUrlConnection openConnection(URL url) throws IOException {
+    return new FsUrlConnection(conf, url);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java b/src/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java
new file mode 100644
index 00000000000..624d7050b93
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.net.URLStreamHandlerFactory;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Factory for URL stream handlers.
+ * 
+ * There is only one handler whose job is to create UrlConnections. A
+ * FsUrlConnection relies on FileSystem to choose the appropriate FS
+ * implementation.
+ * 
+ * Before returning our handler, we make sure that FileSystem knows an
+ * implementation for the requested scheme/protocol.
+ */
+public class FsUrlStreamHandlerFactory implements
+    URLStreamHandlerFactory {
+
+  // The configuration holds supported FS implementation class names.
+  private Configuration conf;
+
+  // This map stores whether a protocol is know or not by FileSystem
+  private Map<String, Boolean> protocols = new HashMap<String, Boolean>();
+
+  // The URL Stream handler
+  private java.net.URLStreamHandler handler;
+
+  public FsUrlStreamHandlerFactory() {
+    this.conf = new Configuration();
+    // force the resolution of the configuration files
+    // this is required if we want the factory to be able to handle
+    // file:// URLs
+    this.conf.getClass("fs.file.impl", null);
+    this.handler = new FsUrlStreamHandler(this.conf);
+  }
+
+  public FsUrlStreamHandlerFactory(Configuration conf) {
+    this.conf = new Configuration(conf);
+    // force the resolution of the configuration files
+    this.conf.getClass("fs.file.impl", null);
+    this.handler = new FsUrlStreamHandler(this.conf);
+  }
+
+  public java.net.URLStreamHandler createURLStreamHandler(String protocol) {
+    if (!protocols.containsKey(protocol)) {
+      boolean known =
+          (conf.getClass("fs." + protocol + ".impl", null) != null);
+      protocols.put(protocol, known);
+    }
+    if (protocols.get(protocol)) {
+      return handler;
+    } else {
+      // FileSystem does not know the protocol, let the VM handle this
+      return null;
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/GlobExpander.java b/src/java/org/apache/hadoop/fs/GlobExpander.java
new file mode 100644
index 00000000000..bc9b27674e0
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/GlobExpander.java
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+class GlobExpander {
+  
+  static class StringWithOffset {
+    String string;
+    int offset;
+    public StringWithOffset(String string, int offset) {
+      super();
+      this.string = string;
+      this.offset = offset;
+    }
+  }
+  
+  /**
+   * Expand globs in the given <code>filePattern</code> into a collection of 
+   * file patterns so that in the expanded set no file pattern has a
+   * slash character ("/") in a curly bracket pair.
+   * @param filePattern
+   * @return expanded file patterns
+   * @throws IOException 
+   */
+  public static List<String> expand(String filePattern) throws IOException {
+    List<String> fullyExpanded = new ArrayList<String>();
+    List<StringWithOffset> toExpand = new ArrayList<StringWithOffset>();
+    toExpand.add(new StringWithOffset(filePattern, 0));
+    while (!toExpand.isEmpty()) {
+      StringWithOffset path = toExpand.remove(0);
+      List<StringWithOffset> expanded = expandLeftmost(path);
+      if (expanded == null) {
+        fullyExpanded.add(path.string);
+      } else {
+        toExpand.addAll(0, expanded);
+      }
+    }
+    return fullyExpanded;
+  }
+  
+  /**
+   * Expand the leftmost outer curly bracket pair containing a
+   * slash character ("/") in <code>filePattern</code>.
+   * @param filePattern
+   * @return expanded file patterns
+   * @throws IOException 
+   */
+  private static List<StringWithOffset> expandLeftmost(StringWithOffset
+      filePatternWithOffset) throws IOException {
+    
+    String filePattern = filePatternWithOffset.string;
+    int leftmost = leftmostOuterCurlyContainingSlash(filePattern,
+        filePatternWithOffset.offset);
+    if (leftmost == -1) {
+      return null;
+    }
+    int curlyOpen = 0;
+    StringBuilder prefix = new StringBuilder(filePattern.substring(0, leftmost));
+    StringBuilder suffix = new StringBuilder();
+    List<String> alts = new ArrayList<String>();
+    StringBuilder alt = new StringBuilder();
+    StringBuilder cur = prefix;
+    for (int i = leftmost; i < filePattern.length(); i++) {
+      char c = filePattern.charAt(i);
+      if (cur == suffix) {
+        cur.append(c);
+      } else if (c == '\\') {
+        i++;
+        if (i >= filePattern.length()) {
+          throw new IOException("Illegal file pattern: "
+              + "An escaped character does not present for glob "
+              + filePattern + " at " + i);
+        }
+        c = filePattern.charAt(i);
+        cur.append(c);
+      } else if (c == '{') {
+        if (curlyOpen++ == 0) {
+          alt.setLength(0);
+          cur = alt;
+        } else {
+          cur.append(c);
+        }
+
+      } else if (c == '}' && curlyOpen > 0) {
+        if (--curlyOpen == 0) {
+          alts.add(alt.toString());
+          alt.setLength(0);
+          cur = suffix;
+        } else {
+          cur.append(c);
+        }
+      } else if (c == ',') {
+        if (curlyOpen == 1) {
+          alts.add(alt.toString());
+          alt.setLength(0);
+        } else {
+          cur.append(c);
+        }
+      } else {
+        cur.append(c);
+      }
+    }
+    List<StringWithOffset> exp = new ArrayList<StringWithOffset>();
+    for (String string : alts) {
+      exp.add(new StringWithOffset(prefix + string + suffix, prefix.length()));
+    }
+    return exp;
+  }
+  
+  /**
+   * Finds the index of the leftmost opening curly bracket containing a
+   * slash character ("/") in <code>filePattern</code>.
+   * @param filePattern
+   * @return the index of the leftmost opening curly bracket containing a
+   * slash character ("/"), or -1 if there is no such bracket
+   * @throws IOException 
+   */
+  private static int leftmostOuterCurlyContainingSlash(String filePattern,
+      int offset) throws IOException {
+    int curlyOpen = 0;
+    int leftmost = -1;
+    boolean seenSlash = false;
+    for (int i = offset; i < filePattern.length(); i++) {
+      char c = filePattern.charAt(i);
+      if (c == '\\') {
+        i++;
+        if (i >= filePattern.length()) {
+          throw new IOException("Illegal file pattern: "
+              + "An escaped character does not present for glob "
+              + filePattern + " at " + i);
+        }
+      } else if (c == '{') {
+        if (curlyOpen++ == 0) {
+          leftmost = i;
+        }
+      } else if (c == '}' && curlyOpen > 0) {
+        if (--curlyOpen == 0 && leftmost != -1 && seenSlash) {
+          return leftmost;
+        }
+      } else if (c == '/' && curlyOpen > 0) {
+        seenSlash = true;
+      }
+    }
+    return -1;
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/HarFileSystem.java b/src/java/org/apache/hadoop/fs/HarFileSystem.java
new file mode 100644
index 00000000000..bcec4b660f1
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/HarFileSystem.java
@@ -0,0 +1,892 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.util.LineReader;
+import org.apache.hadoop.util.Progressable;
+
+/**
+ * This is an implementation of the Hadoop Archive 
+ * Filesystem. This archive Filesystem has index files
+ * of the form _index* and has contents of the form
+ * part-*. The index files store the indexes of the 
+ * real files. The index files are of the form _masterindex
+ * and _index. The master index is a level of indirection 
+ * in to the index file to make the look ups faster. the index
+ * file is sorted with hash code of the paths that it contains 
+ * and the master index contains pointers to the positions in 
+ * index for ranges of hashcodes.
+ */
+
+public class HarFileSystem extends FilterFileSystem {
+  public static final int VERSION = 1;
+  // uri representation of this Har filesystem
+  private URI uri;
+  // the version of this har filesystem
+  private int version;
+  // underlying uri 
+  private URI underLyingURI;
+  // the top level path of the archive
+  // in the underlying file system
+  private Path archivePath;
+  // the masterIndex of the archive
+  private Path masterIndex;
+  // the index file 
+  private Path archiveIndex;
+  // the har auth
+  private String harAuth;
+  
+  /**
+   * public construction of harfilesystem
+   *
+   */
+  public HarFileSystem() {
+  }
+  
+  /**
+   * Constructor to create a HarFileSystem with an
+   * underlying filesystem.
+   * @param fs
+   */
+  public HarFileSystem(FileSystem fs) {
+    super(fs);
+  }
+  
+  /**
+   * Initialize a Har filesystem per har archive. The 
+   * archive home directory is the top level directory
+   * in the filesystem that contains the HAR archive.
+   * Be careful with this method, you do not want to go 
+   * on creating new Filesystem instances per call to 
+   * path.getFileSystem().
+   * the uri of Har is 
+   * har://underlyingfsscheme-host:port/archivepath.
+   * or 
+   * har:///archivepath. This assumes the underlying filesystem
+   * to be used in case not specified.
+   */
+  public void initialize(URI name, Configuration conf) throws IOException {
+    //decode the name
+    underLyingURI = decodeHarURI(name, conf);
+    //  we got the right har Path- now check if this is 
+    //truly a har filesystem
+    Path harPath = archivePath(new Path(name.toString()));
+    if (harPath == null) { 
+      throw new IOException("Invalid path for the Har Filesystem. " + 
+                           name.toString());
+    }
+    if (fs == null) {
+      fs = FileSystem.get(underLyingURI, conf);
+    }
+    this.uri = harPath.toUri();
+    this.archivePath = new Path(this.uri.getPath());
+    this.harAuth = getHarAuth(this.underLyingURI);
+    //check for the underlying fs containing
+    // the index file
+    this.masterIndex = new Path(archivePath, "_masterindex");
+    this.archiveIndex = new Path(archivePath, "_index");
+    if (!fs.exists(masterIndex) || !fs.exists(archiveIndex)) {
+      throw new IOException("Invalid path for the Har Filesystem. " +
+          "No index file in " + harPath);
+    }
+    try{ 
+      this.version = getHarVersion();
+    } catch(IOException io) {
+      throw new IOException("Unable to " +
+          "read the version of the Har file system: " + this.archivePath);
+    }
+    if (this.version != HarFileSystem.VERSION) {
+      throw new IOException("Invalid version " + 
+          this.version + " expected " + HarFileSystem.VERSION);
+    }
+  }
+  
+  // get the version of the filesystem from the masterindex file
+  // the version is currently not useful since its the first version 
+  // of archives
+  public int getHarVersion() throws IOException { 
+    FSDataInputStream masterIn = fs.open(masterIndex);
+    LineReader lmaster = new LineReader(masterIn, getConf());
+    Text line = new Text();
+    lmaster.readLine(line);
+    try {
+      masterIn.close();
+    } catch(IOException e){
+      //disregard it.
+      // its a read.
+    }
+    String versionLine = line.toString();
+    String[] arr = versionLine.split(" ");
+    int version = Integer.parseInt(arr[0]);
+    return version;
+  }
+  
+  /*
+   * find the parent path that is the 
+   * archive path in the path. The last
+   * path segment that ends with .har is 
+   * the path that will be returned.
+   */
+  private Path archivePath(Path p) {
+    Path retPath = null;
+    Path tmp = p;
+    for (int i=0; i< p.depth(); i++) {
+      if (tmp.toString().endsWith(".har")) {
+        retPath = tmp;
+        break;
+      }
+      tmp = tmp.getParent();
+    }
+    return retPath;
+  }
+
+  /**
+   * decode the raw URI to get the underlying URI
+   * @param rawURI raw Har URI
+   * @return filtered URI of the underlying fileSystem
+   */
+  private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException {
+    String tmpAuth = rawURI.getAuthority();
+    //we are using the default file
+    //system in the config 
+    //so create a underlying uri and 
+    //return it
+    if (tmpAuth == null) {
+      //create a path 
+      return FileSystem.getDefaultUri(conf);
+    }
+    String host = rawURI.getHost();
+    String[] str = host.split("-", 2);
+    if (str[0] == null) {
+      throw new IOException("URI: " + rawURI + " is an invalid Har URI.");
+    }
+    String underLyingScheme = str[0];
+    String underLyingHost = (str.length > 1)? str[1]:null;
+    int underLyingPort = rawURI.getPort();
+    String auth = (underLyingHost == null && underLyingPort == -1)?
+                  null:(underLyingHost+":"+underLyingPort);
+    URI tmp = null;
+    if (rawURI.getQuery() != null) {
+      // query component not allowed
+      throw new IOException("query component in Path not supported  " + rawURI);
+    }
+    try {
+      tmp = new URI(underLyingScheme, auth, rawURI.getPath(), 
+            rawURI.getQuery(), rawURI.getFragment());
+    } catch (URISyntaxException e) {
+        // do nothing should not happen
+    }
+    return tmp;
+  }
+  
+  /**
+   * return the top level archive.
+   */
+  public Path getWorkingDirectory() {
+    return new Path(uri.toString());
+  }
+  
+  /**
+   * Create a har specific auth 
+   * har-underlyingfs:port
+   * @param underLyingURI the uri of underlying
+   * filesystem
+   * @return har specific auth
+   */
+  private String getHarAuth(URI underLyingUri) {
+    String auth = underLyingUri.getScheme() + "-";
+    if (underLyingUri.getHost() != null) {
+      auth += underLyingUri.getHost() + ":";
+      if (underLyingUri.getPort() != -1) {
+        auth +=  underLyingUri.getPort();
+      }
+    }
+    else {
+      auth += ":";
+    }
+    return auth;
+  }
+  
+  /**
+   * Returns the uri of this filesystem.
+   * The uri is of the form 
+   * har://underlyingfsschema-host:port/pathintheunderlyingfs
+   */
+  @Override
+  public URI getUri() {
+    return this.uri;
+  }
+  
+  /**
+   * this method returns the path 
+   * inside the har filesystem.
+   * this is relative path inside 
+   * the har filesystem.
+   * @param path the fully qualified path in the har filesystem.
+   * @return relative path in the filesystem.
+   */
+  private Path getPathInHar(Path path) {
+    Path harPath = new Path(path.toUri().getPath());
+    if (archivePath.compareTo(harPath) == 0)
+      return new Path(Path.SEPARATOR);
+    Path tmp = new Path(harPath.getName());
+    Path parent = harPath.getParent();
+    while (!(parent.compareTo(archivePath) == 0)) {
+      if (parent.toString().equals(Path.SEPARATOR)) {
+        tmp = null;
+        break;
+      }
+      tmp = new Path(parent.getName(), tmp);
+      parent = parent.getParent();
+    }
+    if (tmp != null) 
+      tmp = new Path(Path.SEPARATOR, tmp);
+    return tmp;
+  }
+  
+  //the relative path of p. basically 
+  // getting rid of /. Parsing and doing 
+  // string manipulation is not good - so
+  // just use the path api to do it.
+  private Path makeRelative(String initial, Path p) {
+    Path root = new Path(Path.SEPARATOR);
+    if (root.compareTo(p) == 0)
+      return new Path(initial);
+    Path retPath = new Path(p.getName());
+    Path parent = p.getParent();
+    for (int i=0; i < p.depth()-1; i++) {
+      retPath = new Path(parent.getName(), retPath);
+      parent = parent.getParent();
+    }
+    return new Path(initial, retPath.toString());
+  }
+  
+  /* this makes a path qualified in the har filesystem
+   * (non-Javadoc)
+   * @see org.apache.hadoop.fs.FilterFileSystem#makeQualified(
+   * org.apache.hadoop.fs.Path)
+   */
+  @Override
+  public Path makeQualified(Path path) {
+    // make sure that we just get the 
+    // path component 
+    Path fsPath = path;
+    if (!path.isAbsolute()) {
+      fsPath = new Path(archivePath, path);
+    }
+
+    URI tmpURI = fsPath.toUri();
+    fsPath = new Path(tmpURI.getPath());
+    //change this to Har uri 
+    URI tmp = null;
+    try {
+      tmp = new URI(uri.getScheme(), harAuth, fsPath.toString(),
+                    tmpURI.getQuery(), tmpURI.getFragment());
+    } catch(URISyntaxException ue) {
+      LOG.error("Error in URI ", ue);
+    }
+    if (tmp != null) {
+      return new Path(tmp.toString());
+    }
+    return null;
+  }
+  
+  /**
+   * get block locations from the underlying fs
+   * @param file the input filestatus to get block locations
+   * @param start the start in the file
+   * @param len the length in the file
+   * @return block locations for this segment of file
+   * @throws IOException
+   */
+  @Override
+  public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
+      long len) throws IOException {
+    // need to look up the file in the underlying fs
+    // look up the index 
+    
+    // make sure this is a prt of this har filesystem
+    Path p = makeQualified(file.getPath());
+    Path harPath = getPathInHar(p);
+    String line = fileStatusInIndex(harPath);
+    if (line == null)  {
+      throw new FileNotFoundException("File " + file.getPath() + " not found");
+    }
+    HarStatus harStatus = new HarStatus(line);
+    if (harStatus.isDir()) {
+      return new BlockLocation[0];
+    }
+    FileStatus fsFile = fs.getFileStatus(new Path(archivePath,
+        harStatus.getPartName()));
+    BlockLocation[] rawBlocks = fs.getFileBlockLocations(fsFile, 
+        harStatus.getStartIndex() + start, len);
+    return fakeBlockLocations(rawBlocks, harStatus.getStartIndex());
+  }
+  
+  /**
+   * fake the rawblocks since map reduce uses the block offsets to 
+   * fo some computations regarding the blocks
+   * @param rawBlocks the raw blocks returned by the filesystem
+   * @return faked blocks with changed offsets.
+   */
+  private BlockLocation[] fakeBlockLocations(BlockLocation[] rawBlocks, 
+		  long startIndex) {
+	for (BlockLocation block : rawBlocks) {
+		long rawOffset = block.getOffset();
+		block.setOffset(rawOffset - startIndex);
+	}
+	return rawBlocks;
+  }
+  
+  /**
+   * the hash of the path p inside iniside
+   * the filesystem
+   * @param p the path in the harfilesystem
+   * @return the hash code of the path.
+   */
+  public static int getHarHash(Path p) {
+    return (p.toString().hashCode() & 0x7fffffff);
+  }
+  
+  static class Store {
+    public Store() {
+      begin = end = startHash = endHash = 0;
+    }
+    public Store(long begin, long end, int startHash, int endHash) {
+      this.begin = begin;
+      this.end = end;
+      this.startHash = startHash;
+      this.endHash = endHash;
+    }
+    public long begin;
+    public long end;
+    public int startHash;
+    public int endHash;
+  }
+  
+  // make sure that this harPath is relative to the har filesystem
+  // this only works for relative paths. This returns the line matching
+  // the file in the index. Returns a null if there is not matching 
+  // filename in the index file.
+  private String fileStatusInIndex(Path harPath) throws IOException {
+    // read the index file 
+    int hashCode = getHarHash(harPath);
+    // get the master index to find the pos 
+    // in the index file
+    FSDataInputStream in = fs.open(masterIndex);
+    FileStatus masterStat = fs.getFileStatus(masterIndex);
+    LineReader lin = new LineReader(in, getConf());
+    Text line = new Text();
+    long read = lin.readLine(line);
+   //ignore the first line. this is the header of the index files
+    String[] readStr = null;
+    List<Store> stores = new ArrayList<Store>();
+    while(read < masterStat.getLen()) {
+      int b = lin.readLine(line);
+      read += b;
+      readStr = line.toString().split(" ");
+      int startHash = Integer.parseInt(readStr[0]);
+      int endHash  = Integer.parseInt(readStr[1]);
+      if (startHash <= hashCode && hashCode <= endHash) {
+        stores.add(new Store(Long.parseLong(readStr[2]), 
+            Long.parseLong(readStr[3]), startHash,
+            endHash));
+      }
+      line.clear();
+    }
+    try {
+      lin.close();
+    } catch(IOException io){
+      // do nothing just a read.
+    }
+    FSDataInputStream aIn = fs.open(archiveIndex);
+    LineReader aLin = new LineReader(aIn, getConf());
+    String retStr = null;
+    // now start reading the real index file
+     read = 0;
+    for (Store s: stores) {
+      aIn.seek(s.begin);
+      while (read + s.begin < s.end) {
+        int tmp = aLin.readLine(line);
+        read += tmp;
+        String lineFeed = line.toString();
+        String[] parsed = lineFeed.split(" ");
+        if (harPath.compareTo(new Path(parsed[0])) == 0) {
+          // bingo!
+          retStr = lineFeed;
+          break;
+        }
+        line.clear();
+      }
+      if (retStr != null)
+        break;
+    }
+    try {
+      aIn.close();
+    } catch(IOException io) {
+      //do nothing
+    }
+    return retStr;
+  }
+  
+  // a single line parser for hadoop archives status 
+  // stored in a single line in the index files 
+  // the format is of the form 
+  // filename "dir"/"file" partFileName startIndex length 
+  // <space seperated children>
+  private static class HarStatus {
+    boolean isDir;
+    String name;
+    List<String> children;
+    String partName;
+    long startIndex;
+    long length;
+    public HarStatus(String harString) {
+      String[] splits = harString.split(" ");
+      this.name = splits[0];
+      this.isDir = "dir".equals(splits[1]) ? true: false;
+      // this is equal to "none" if its a directory
+      this.partName = splits[2];
+      this.startIndex = Long.parseLong(splits[3]);
+      this.length = Long.parseLong(splits[4]);
+      if (isDir) {
+        children = new ArrayList<String>();
+        for (int i = 5; i < splits.length; i++) {
+          children.add(splits[i]);
+        }
+      }
+    }
+    public boolean isDir() {
+      return isDir;
+    }
+    
+    public String getName() {
+      return name;
+    }
+    
+    public List<String> getChildren() {
+      return children;
+    }
+    public String getFileName() {
+      return name;
+    }
+    public String getPartName() {
+      return partName;
+    }
+    public long getStartIndex() {
+      return startIndex;
+    }
+    public long getLength() {
+      return length;
+    }
+  }
+  
+  /**
+   * return the filestatus of files in har archive.
+   * The permission returned are that of the archive
+   * index files. The permissions are not persisted 
+   * while creating a hadoop archive.
+   * @param f the path in har filesystem
+   * @return filestatus.
+   * @throws IOException
+   */
+  @Override
+  public FileStatus getFileStatus(Path f) throws IOException {
+    FileStatus archiveStatus = fs.getFileStatus(archiveIndex);
+    // get the fs DataInputStream for the underlying file
+    // look up the index.
+    Path p = makeQualified(f);
+    Path harPath = getPathInHar(p);
+    if (harPath == null) {
+      throw new IOException("Invalid file name: " + f + " in " + uri);
+    }
+    String readStr = fileStatusInIndex(harPath);
+    if (readStr == null) {
+      throw new FileNotFoundException("File: " +  f + " does not exist in " + uri);
+    }
+    HarStatus hstatus = null;
+    hstatus = new HarStatus(readStr);
+    return new FileStatus(hstatus.isDir()?0:hstatus.getLength(), hstatus.isDir(),
+        (int)archiveStatus.getReplication(), archiveStatus.getBlockSize(),
+        archiveStatus.getModificationTime(), archiveStatus.getAccessTime(),
+        new FsPermission(
+        archiveStatus.getPermission()), archiveStatus.getOwner(), 
+        archiveStatus.getGroup(), 
+            makeRelative(this.uri.toString(), new Path(hstatus.name)));
+  }
+
+  /**
+   * Returns a har input stream which fakes end of 
+   * file. It reads the index files to get the part 
+   * file name and the size and start of the file.
+   */
+  @Override
+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    // get the fs DataInputStream for the underlying file
+    // look up the index.
+    Path p = makeQualified(f);
+    Path harPath = getPathInHar(p);
+    if (harPath == null) {
+      throw new IOException("Invalid file name: " + f + " in " + uri);
+    }
+    String readStr = fileStatusInIndex(harPath);
+    if (readStr == null) {
+      throw new FileNotFoundException(f + ": not found in " + archivePath);
+    }
+    HarStatus hstatus = new HarStatus(readStr); 
+    // we got it.. woo hooo!!! 
+    if (hstatus.isDir()) {
+      throw new FileNotFoundException(f + " : not a file in " +
+                archivePath);
+    }
+    return new HarFSDataInputStream(fs, new Path(archivePath, 
+        hstatus.getPartName()),
+        hstatus.getStartIndex(), hstatus.getLength(), bufferSize);
+  }
+ 
+  /*
+   * create throws an exception in Har filesystem.
+   * The archive once created cannot be changed.
+   */
+  public FSDataOutputStream create(Path f, int bufferSize) 
+                                    throws IOException {
+    throw new IOException("Har: Create not allowed");
+  }
+  
+  public FSDataOutputStream create(Path f,
+      FsPermission permission,
+      boolean overwrite,
+      int bufferSize,
+      short replication,
+      long blockSize,
+      Progressable progress) throws IOException {
+    throw new IOException("Har: create not allowed.");
+  }
+  
+  @Override
+  public void close() throws IOException {
+    if (fs != null) {
+      try {
+        fs.close();
+      } catch(IOException ie) {
+        //this might already be closed
+        // ignore
+      }
+    }
+  }
+  
+  /**
+   * Not implemented.
+   */
+  @Override
+  public boolean setReplication(Path src, short replication) throws IOException{
+    throw new IOException("Har: setreplication not allowed");
+  }
+  
+  /**
+   * Not implemented.
+   */
+  @Override
+  public boolean delete(Path f, boolean recursive) throws IOException { 
+    throw new IOException("Har: delete not allowed");
+  }
+  
+  /**
+   * liststatus returns the children of a directory 
+   * after looking up the index files.
+   */
+  @Override
+  public FileStatus[] listStatus(Path f) throws IOException {
+    //need to see if the file is an index in file
+    //get the filestatus of the archive directory
+    // we will create fake filestatuses to return
+    // to the client
+    List<FileStatus> statuses = new ArrayList<FileStatus>();
+    FileStatus archiveStatus = fs.getFileStatus(archiveIndex);
+    Path tmpPath = makeQualified(f);
+    Path harPath = getPathInHar(tmpPath);
+    String readStr = fileStatusInIndex(harPath);
+    if (readStr == null) {
+      throw new FileNotFoundException("File " + f + " not found in " + archivePath);
+    }
+    HarStatus hstatus = new HarStatus(readStr);
+    if (!hstatus.isDir()) 
+        statuses.add(new FileStatus(hstatus.getLength(), 
+            hstatus.isDir(),
+            archiveStatus.getReplication(), archiveStatus.getBlockSize(),
+            archiveStatus.getModificationTime(), archiveStatus.getAccessTime(),
+            new FsPermission(archiveStatus.getPermission()),
+            archiveStatus.getOwner(), archiveStatus.getGroup(), 
+            makeRelative(this.uri.toString(), new Path(hstatus.name))));
+    else 
+      for (String child: hstatus.children) {
+        FileStatus tmp = getFileStatus(new Path(tmpPath, child));
+        statuses.add(tmp);
+      }
+    return statuses.toArray(new FileStatus[statuses.size()]);
+  }
+  
+  /**
+   * return the top level archive path.
+   */
+  public Path getHomeDirectory() {
+    return new Path(uri.toString());
+  }
+  
+  public void setWorkingDirectory(Path newDir) {
+    //does nothing.
+  }
+  
+  /**
+   * not implemented.
+   */
+  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
+    throw new IOException("Har: mkdirs not allowed");
+  }
+  
+  /**
+   * not implemented.
+   */
+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws 
+        IOException {
+    throw new IOException("Har: copyfromlocalfile not allowed");
+  }
+  
+  /**
+   * copies the file in the har filesystem to a local file.
+   */
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst) 
+    throws IOException {
+    FileUtil.copy(this, src, getLocal(getConf()), dst, false, getConf());
+  }
+  
+  /**
+   * not implemented.
+   */
+  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
+    throws IOException {
+    throw new IOException("Har: startLocalOutput not allowed");
+  }
+  
+  /**
+   * not implemented.
+   */
+  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
+    throws IOException {
+    throw new IOException("Har: completeLocalOutput not allowed");
+  }
+  
+  /**
+   * not implemented.
+   */
+  public void setOwner(Path p, String username, String groupname)
+    throws IOException {
+    throw new IOException("Har: setowner not allowed");
+  }
+
+  /**
+   * Not implemented.
+   */
+  public void setPermission(Path p, FsPermission permisssion) 
+    throws IOException {
+    throw new IOException("Har: setPermission not allowed");
+  }
+  
+  /**
+   * Hadoop archives input stream. This input stream fakes EOF 
+   * since archive files are part of bigger part files.
+   */
+  private static class HarFSDataInputStream extends FSDataInputStream {
+    /**
+     * Create an input stream that fakes all the reads/positions/seeking.
+     */
+    private static class HarFsInputStream extends FSInputStream {
+      private long position, start, end;
+      //The underlying data input stream that the
+      // underlying filesystem will return.
+      private FSDataInputStream underLyingStream;
+      //one byte buffer
+      private byte[] oneBytebuff = new byte[1];
+      HarFsInputStream(FileSystem fs, Path path, long start,
+          long length, int bufferSize) throws IOException {
+        underLyingStream = fs.open(path, bufferSize);
+        underLyingStream.seek(start);
+        // the start of this file in the part file
+        this.start = start;
+        // the position pointer in the part file
+        this.position = start;
+        // the end pointer in the part file
+        this.end = start + length;
+      }
+      
+      public synchronized int available() throws IOException {
+        long remaining = end - underLyingStream.getPos();
+        if (remaining > (long)Integer.MAX_VALUE) {
+          return Integer.MAX_VALUE;
+        }
+        return (int) remaining;
+      }
+      
+      public synchronized  void close() throws IOException {
+        underLyingStream.close();
+        super.close();
+      }
+      
+      //not implemented
+      @Override
+      public void mark(int readLimit) {
+        // do nothing 
+      }
+      
+      /**
+       * reset is not implemented
+       */
+      public void reset() throws IOException {
+        throw new IOException("reset not implemented.");
+      }
+      
+      public synchronized int read() throws IOException {
+        int ret = read(oneBytebuff, 0, 1);
+        return (ret <= 0) ? -1: (oneBytebuff[0] & 0xff);
+      }
+      
+      public synchronized int read(byte[] b) throws IOException {
+        int ret = read(b, 0, b.length);
+        if (ret != -1) {
+          position += ret;
+        }
+        return ret;
+      }
+      
+      /**
+       * 
+       */
+      public synchronized int read(byte[] b, int offset, int len) 
+        throws IOException {
+        int newlen = len;
+        int ret = -1;
+        if (position + len > end) {
+          newlen = (int) (end - position);
+        }
+        // end case
+        if (newlen == 0) 
+          return ret;
+        ret = underLyingStream.read(b, offset, newlen);
+        position += ret;
+        return ret;
+      }
+      
+      public synchronized long skip(long n) throws IOException {
+        long tmpN = n;
+        if (tmpN > 0) {
+          if (position + tmpN > end) {
+            tmpN = end - position;
+          }
+          underLyingStream.seek(tmpN + position);
+          position += tmpN;
+          return tmpN;
+        }
+        return (tmpN < 0)? -1 : 0;
+      }
+      
+      public synchronized long getPos() throws IOException {
+        return (position - start);
+      }
+      
+      public synchronized void seek(long pos) throws IOException {
+        if (pos < 0 || (start + pos > end)) {
+          throw new IOException("Failed to seek: EOF");
+        }
+        position = start + pos;
+        underLyingStream.seek(position);
+      }
+
+      public boolean seekToNewSource(long targetPos) throws IOException {
+        //do not need to implement this
+        // hdfs in itself does seektonewsource 
+        // while reading.
+        return false;
+      }
+      
+      /**
+       * implementing position readable. 
+       */
+      public int read(long pos, byte[] b, int offset, int length) 
+      throws IOException {
+        int nlength = length;
+        if (start + nlength + pos > end) {
+          nlength = (int) (end - (start + pos));
+        }
+        return underLyingStream.read(pos + start , b, offset, nlength);
+      }
+      
+      /**
+       * position readable again.
+       */
+      public void readFully(long pos, byte[] b, int offset, int length) 
+      throws IOException {
+        if (start + length + pos > end) {
+          throw new IOException("Not enough bytes to read.");
+        }
+        underLyingStream.readFully(pos + start, b, offset, length);
+      }
+      
+      public void readFully(long pos, byte[] b) throws IOException {
+          readFully(pos, b, 0, b.length);
+      }
+      
+    }
+  
+    /**
+     * constructors for har input stream.
+     * @param fs the underlying filesystem
+     * @param p The path in the underlying filesystem
+     * @param start the start position in the part file
+     * @param length the length of valid data in the part file
+     * @param bufsize the buffer size
+     * @throws IOException
+     */
+    public HarFSDataInputStream(FileSystem fs, Path  p, long start, 
+        long length, int bufsize) throws IOException {
+        super(new HarFsInputStream(fs, p, start, length, bufsize));
+    }
+
+    /**
+     * constructor for har input stream.
+     * @param fs the underlying filesystem
+     * @param p the path in the underlying file system
+     * @param start the start position in the part file
+     * @param length the length of valid data in the part file.
+     * @throws IOException
+     */
+    public HarFSDataInputStream(FileSystem fs, Path  p, long start, long length)
+      throws IOException {
+        super(new HarFsInputStream(fs, p, start, length, 0));
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/LengthFileChecksum.java b/src/java/org/apache/hadoop/fs/LengthFileChecksum.java
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/java/org/apache/hadoop/fs/LocalDirAllocator.java b/src/java/org/apache/hadoop/fs/LocalDirAllocator.java
new file mode 100644
index 00000000000..5d04d280da3
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/LocalDirAllocator.java
@@ -0,0 +1,418 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.commons.logging.*;
+
+import org.apache.hadoop.util.*;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.DiskChecker.DiskErrorException;
+import org.apache.hadoop.conf.Configuration; 
+
+/** An implementation of a round-robin scheme for disk allocation for creating
+ * files. The way it works is that it is kept track what disk was last
+ * allocated for a file write. For the current request, the next disk from
+ * the set of disks would be allocated if the free space on the disk is 
+ * sufficient enough to accomodate the file that is being considered for
+ * creation. If the space requirements cannot be met, the next disk in order
+ * would be tried and so on till a disk is found with sufficient capacity.
+ * Once a disk with sufficient space is identified, a check is done to make
+ * sure that the disk is writable. Also, there is an API provided that doesn't
+ * take the space requirements into consideration but just checks whether the
+ * disk under consideration is writable (this should be used for cases where
+ * the file size is not known apriori). An API is provided to read a path that
+ * was created earlier. That API works by doing a scan of all the disks for the
+ * input pathname.
+ * This implementation also provides the functionality of having multiple 
+ * allocators per JVM (one for each unique functionality or context, like 
+ * mapred, dfs-client, etc.). It ensures that there is only one instance of
+ * an allocator per context per JVM.
+ * Note:
+ * 1. The contexts referred above are actually the configuration items defined
+ * in the Configuration class like "mapred.local.dir" (for which we want to 
+ * control the dir allocations). The context-strings are exactly those 
+ * configuration items.
+ * 2. This implementation does not take into consideration cases where
+ * a disk becomes read-only or goes out of space while a file is being written
+ * to (disks are shared between multiple processes, and so the latter situation
+ * is probable).
+ * 3. In the class implementation, "Disk" is referred to as "Dir", which
+ * actually points to the configured directory on the Disk which will be the
+ * parent for all file write/read allocations.
+ */
+public class LocalDirAllocator {
+  
+  //A Map from the config item names like "mapred.local.dir", 
+  //"dfs.client.buffer.dir" to the instance of the AllocatorPerContext. This
+  //is a static object to make sure there exists exactly one instance per JVM
+  private static Map <String, AllocatorPerContext> contexts = 
+                 new TreeMap<String, AllocatorPerContext>();
+  private String contextCfgItemName;
+
+  /**Create an allocator object
+   * @param contextCfgItemName
+   */
+  public LocalDirAllocator(String contextCfgItemName) {
+    this.contextCfgItemName = contextCfgItemName;
+  }
+  
+  /** This method must be used to obtain the dir allocation context for a 
+   * particular value of the context name. The context name must be an item
+   * defined in the Configuration object for which we want to control the 
+   * dir allocations (e.g., <code>mapred.local.dir</code>). The method will
+   * create a context for that name if it doesn't already exist.
+   */
+  private AllocatorPerContext obtainContext(String contextCfgItemName) {
+    synchronized (contexts) {
+      AllocatorPerContext l = contexts.get(contextCfgItemName);
+      if (l == null) {
+        contexts.put(contextCfgItemName, 
+                    (l = new AllocatorPerContext(contextCfgItemName)));
+      }
+      return l;
+    }
+  }
+  
+  /** Get a path from the local FS. This method should be used if the size of 
+   *  the file is not known apriori. We go round-robin over the set of disks
+   *  (via the configured dirs) and return the first complete path where
+   *  we could create the parent directory of the passed path. 
+   *  @param pathStr the requested path (this will be created on the first 
+   *  available disk)
+   *  @param conf the Configuration object
+   *  @return the complete path to the file on a local disk
+   *  @throws IOException
+   */
+  public Path getLocalPathForWrite(String pathStr, 
+      Configuration conf) throws IOException {
+    return getLocalPathForWrite(pathStr, -1, conf);
+  }
+  
+  /** Get a path from the local FS. Pass size as -1 if not known apriori. We
+   *  round-robin over the set of disks (via the configured dirs) and return
+   *  the first complete path which has enough space 
+   *  @param pathStr the requested path (this will be created on the first 
+   *  available disk)
+   *  @param size the size of the file that is going to be written
+   *  @param conf the Configuration object
+   *  @return the complete path to the file on a local disk
+   *  @throws IOException
+   */
+  public Path getLocalPathForWrite(String pathStr, long size, 
+      Configuration conf) throws IOException {
+    AllocatorPerContext context = obtainContext(contextCfgItemName);
+    return context.getLocalPathForWrite(pathStr, size, conf);
+  }
+  
+  /** Get a path from the local FS for reading. We search through all the
+   *  configured dirs for the file's existence and return the complete
+   *  path to the file when we find one 
+   *  @param pathStr the requested file (this will be searched)
+   *  @param conf the Configuration object
+   *  @return the complete path to the file on a local disk
+   *  @throws IOException
+   */
+  public Path getLocalPathToRead(String pathStr, 
+      Configuration conf) throws IOException {
+    AllocatorPerContext context = obtainContext(contextCfgItemName);
+    return context.getLocalPathToRead(pathStr, conf);
+  }
+
+  /** Creates a temporary file in the local FS. Pass size as -1 if not known 
+   *  apriori. We round-robin over the set of disks (via the configured dirs) 
+   *  and select the first complete path which has enough space. A file is
+   *  created on this directory. The file is guaranteed to go away when the
+   *  JVM exits.
+   *  @param pathStr prefix for the temporary file
+   *  @param size the size of the file that is going to be written
+   *  @param conf the Configuration object
+   *  @return a unique temporary file
+   *  @throws IOException
+   */
+  public File createTmpFileForWrite(String pathStr, long size, 
+      Configuration conf) throws IOException {
+    AllocatorPerContext context = obtainContext(contextCfgItemName);
+    return context.createTmpFileForWrite(pathStr, size, conf);
+  }
+  
+  /** Method to check whether a context is valid
+   * @param contextCfgItemName
+   * @return true/false
+   */
+  public static boolean isContextValid(String contextCfgItemName) {
+    synchronized (contexts) {
+      return contexts.containsKey(contextCfgItemName);
+    }
+  }
+    
+  /** We search through all the configured dirs for the file's existence
+   *  and return true when we find  
+   *  @param pathStr the requested file (this will be searched)
+   *  @param conf the Configuration object
+   *  @return true if files exist. false otherwise
+   *  @throws IOException
+   */
+  public boolean ifExists(String pathStr,Configuration conf) {
+    AllocatorPerContext context = obtainContext(contextCfgItemName);
+    return context.ifExists(pathStr, conf);
+  }
+
+  /**
+   * Get the current directory index for the given configuration item.
+   * @return the current directory index for the given configuration item.
+   */
+  int getCurrentDirectoryIndex() {
+    AllocatorPerContext context = obtainContext(contextCfgItemName);
+    return context.getCurrentDirectoryIndex();
+  }
+  
+  private static class AllocatorPerContext {
+
+    private final Log LOG =
+      LogFactory.getLog(AllocatorPerContext.class);
+
+    private int dirNumLastAccessed;
+    private Random dirIndexRandomizer = new Random();
+    private FileSystem localFS;
+    private DF[] dirDF;
+    private String contextCfgItemName;
+    private String[] localDirs;
+    private String savedLocalDirs = "";
+
+    public AllocatorPerContext(String contextCfgItemName) {
+      this.contextCfgItemName = contextCfgItemName;
+    }
+
+    /** This method gets called everytime before any read/write to make sure
+     * that any change to localDirs is reflected immediately.
+     */
+    private void confChanged(Configuration conf) throws IOException {
+      String newLocalDirs = conf.get(contextCfgItemName);
+      if (!newLocalDirs.equals(savedLocalDirs)) {
+        localDirs = conf.getStrings(contextCfgItemName);
+        localFS = FileSystem.getLocal(conf);
+        int numDirs = localDirs.length;
+        ArrayList<String> dirs = new ArrayList<String>(numDirs);
+        ArrayList<DF> dfList = new ArrayList<DF>(numDirs);
+        for (int i = 0; i < numDirs; i++) {
+          try {
+            // filter problematic directories
+            Path tmpDir = new Path(localDirs[i]);
+            if(localFS.mkdirs(tmpDir)|| localFS.exists(tmpDir)) {
+              try {
+                DiskChecker.checkDir(new File(localDirs[i]));
+                dirs.add(localDirs[i]);
+                dfList.add(new DF(new File(localDirs[i]), 30000));
+              } catch (DiskErrorException de) {
+                LOG.warn( localDirs[i] + "is not writable\n" +
+                    StringUtils.stringifyException(de));
+              }
+            } else {
+              LOG.warn( "Failed to create " + localDirs[i]);
+            }
+          } catch (IOException ie) { 
+            LOG.warn( "Failed to create " + localDirs[i] + ": " +
+                ie.getMessage() + "\n" + StringUtils.stringifyException(ie));
+          } //ignore
+        }
+        localDirs = dirs.toArray(new String[dirs.size()]);
+        dirDF = dfList.toArray(new DF[dirs.size()]);
+        savedLocalDirs = newLocalDirs;
+        
+        // randomize the first disk picked in the round-robin selection 
+        dirNumLastAccessed = dirIndexRandomizer.nextInt(dirs.size());
+      }
+    }
+
+    private Path createPath(String path) throws IOException {
+      Path file = new Path(new Path(localDirs[dirNumLastAccessed]),
+                                    path);
+      //check whether we are able to create a directory here. If the disk
+      //happens to be RDONLY we will fail
+      try {
+        DiskChecker.checkDir(new File(file.getParent().toUri().getPath()));
+        return file;
+      } catch (DiskErrorException d) {
+        LOG.warn(StringUtils.stringifyException(d));
+        return null;
+      }
+    }
+
+    /**
+     * Get the current directory index.
+     * @return the current directory index.
+     */
+    int getCurrentDirectoryIndex() {
+      return dirNumLastAccessed;
+    }
+    
+    /** Get a path from the local FS. This method should be used if the size of 
+     *  the file is not known a priori. 
+     *  
+     *  It will use roulette selection, picking directories
+     *  with probability proportional to their available space. 
+     */
+    public synchronized Path getLocalPathForWrite(String path, 
+        Configuration conf) throws IOException {
+      return getLocalPathForWrite(path, -1, conf);
+    }
+
+    /** Get a path from the local FS. If size is known, we go
+     *  round-robin over the set of disks (via the configured dirs) and return
+     *  the first complete path which has enough space.
+     *  
+     *  If size is not known, use roulette selection -- pick directories
+     *  with probability proportional to their available space.
+     */
+    public synchronized Path getLocalPathForWrite(String pathStr, long size, 
+        Configuration conf) throws IOException {
+      confChanged(conf);
+      int numDirs = localDirs.length;
+      int numDirsSearched = 0;
+      //remove the leading slash from the path (to make sure that the uri
+      //resolution results in a valid path on the dir being checked)
+      if (pathStr.startsWith("/")) {
+        pathStr = pathStr.substring(1);
+      }
+      Path returnPath = null;
+      
+      if(size == -1) {  //do roulette selection: pick dir with probability 
+                    //proportional to available size
+        long[] availableOnDisk = new long[dirDF.length];
+        long totalAvailable = 0;
+        
+            //build the "roulette wheel"
+        for(int i =0; i < dirDF.length; ++i) {
+          availableOnDisk[i] = dirDF[i].getAvailable();
+          totalAvailable += availableOnDisk[i];
+        }
+
+        // Keep rolling the wheel till we get a valid path
+        Random r = new java.util.Random();
+        while (numDirsSearched < numDirs && returnPath == null) {
+          long randomPosition = Math.abs(r.nextLong()) % totalAvailable;
+          int dir = 0;
+          while (randomPosition > availableOnDisk[dir]) {
+            randomPosition -= availableOnDisk[dir];
+            dir++;
+          }
+          dirNumLastAccessed = dir;
+          returnPath = createPath(pathStr);
+          if (returnPath == null) {
+            totalAvailable -= availableOnDisk[dir];
+            availableOnDisk[dir] = 0; // skip this disk
+            numDirsSearched++;
+          }
+        }
+      } else {
+        while (numDirsSearched < numDirs && returnPath == null) {
+          long capacity = dirDF[dirNumLastAccessed].getAvailable();
+          if (capacity > size) {
+            returnPath = createPath(pathStr);
+          }
+          dirNumLastAccessed++;
+          dirNumLastAccessed = dirNumLastAccessed % numDirs; 
+          numDirsSearched++;
+        } 
+      }
+      if (returnPath != null) {
+        return returnPath;
+      }
+      
+      //no path found
+      throw new DiskErrorException("Could not find any valid local " +
+          "directory for " + pathStr);
+    }
+
+    /** Creates a file on the local FS. Pass size as -1 if not known apriori. We
+     *  round-robin over the set of disks (via the configured dirs) and return
+     *  a file on the first path which has enough space. The file is guaranteed
+     *  to go away when the JVM exits.
+     */
+    public File createTmpFileForWrite(String pathStr, long size, 
+        Configuration conf) throws IOException {
+
+      // find an appropriate directory
+      Path path = getLocalPathForWrite(pathStr, size, conf);
+      File dir = new File(path.getParent().toUri().getPath());
+      String prefix = path.getName();
+
+      // create a temp file on this directory
+      File result = File.createTempFile(prefix, null, dir);
+      result.deleteOnExit();
+      return result;
+    }
+
+    /** Get a path from the local FS for reading. We search through all the
+     *  configured dirs for the file's existence and return the complete
+     *  path to the file when we find one 
+     */
+    public synchronized Path getLocalPathToRead(String pathStr, 
+        Configuration conf) throws IOException {
+      confChanged(conf);
+      int numDirs = localDirs.length;
+      int numDirsSearched = 0;
+      //remove the leading slash from the path (to make sure that the uri
+      //resolution results in a valid path on the dir being checked)
+      if (pathStr.startsWith("/")) {
+        pathStr = pathStr.substring(1);
+      }
+      while (numDirsSearched < numDirs) {
+        Path file = new Path(localDirs[numDirsSearched], pathStr);
+        if (localFS.exists(file)) {
+          return file;
+        }
+        numDirsSearched++;
+      }
+
+      //no path found
+      throw new DiskErrorException ("Could not find " + pathStr +" in any of" +
+      " the configured local directories");
+    }
+
+    /** We search through all the configured dirs for the file's existence
+     *  and return true when we find one 
+     */
+    public synchronized boolean ifExists(String pathStr,Configuration conf) {
+      try {
+        int numDirs = localDirs.length;
+        int numDirsSearched = 0;
+        //remove the leading slash from the path (to make sure that the uri
+        //resolution results in a valid path on the dir being checked)
+        if (pathStr.startsWith("/")) {
+          pathStr = pathStr.substring(1);
+        }
+        while (numDirsSearched < numDirs) {
+          Path file = new Path(localDirs[numDirsSearched], pathStr);
+          if (localFS.exists(file)) {
+            return true;
+          }
+          numDirsSearched++;
+        }
+      } catch (IOException e) {
+        // IGNORE and try again
+      }
+      return false;
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/LocalFileSystem.java b/src/java/org/apache/hadoop/fs/LocalFileSystem.java
new file mode 100644
index 00000000000..199c773f5e4
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/LocalFileSystem.java
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.*;
+import java.net.URI;
+import java.util.*;
+
+/****************************************************************
+ * Implement the FileSystem API for the checksumed local filesystem.
+ *
+ *****************************************************************/
+public class LocalFileSystem extends ChecksumFileSystem {
+  static final URI NAME = URI.create("file:///");
+  static private Random rand = new Random();
+  FileSystem rfs;
+  
+  public LocalFileSystem() {
+    this(new RawLocalFileSystem());
+  }
+  
+  public FileSystem getRaw() {
+    return rfs;
+  }
+    
+  public LocalFileSystem(FileSystem rawLocalFileSystem) {
+    super(rawLocalFileSystem);
+    rfs = rawLocalFileSystem;
+  }
+    
+  /** Convert a path to a File. */
+  public File pathToFile(Path path) {
+    return ((RawLocalFileSystem)fs).pathToFile(path);
+  }
+
+  @Override
+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+    FileUtil.copy(this, src, this, dst, delSrc, getConf());
+  }
+
+  @Override
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+    FileUtil.copy(this, src, this, dst, delSrc, getConf());
+  }
+
+  /**
+   * Moves files to a bad file directory on the same device, so that their
+   * storage will not be reused.
+   */
+  public boolean reportChecksumFailure(Path p, FSDataInputStream in,
+                                       long inPos,
+                                       FSDataInputStream sums, long sumsPos) {
+    try {
+      // canonicalize f
+      File f = ((RawLocalFileSystem)fs).pathToFile(p).getCanonicalFile();
+      
+      // find highest writable parent dir of f on the same device
+      String device = new DF(f, getConf()).getMount();
+      File parent = f.getParentFile();
+      File dir = null;
+      while (parent!=null && parent.canWrite() && parent.toString().startsWith(device)) {
+        dir = parent;
+        parent = parent.getParentFile();
+      }
+
+      if (dir==null) {
+        throw new IOException(
+                              "not able to find the highest writable parent dir");
+      }
+        
+      // move the file there
+      File badDir = new File(dir, "bad_files");
+      if (!badDir.mkdirs()) {
+        if (!badDir.isDirectory()) {
+          throw new IOException("Mkdirs failed to create " + badDir.toString());
+        }
+      }
+      String suffix = "." + rand.nextInt();
+      File badFile = new File(badDir, f.getName()+suffix);
+      LOG.warn("Moving bad file " + f + " to " + badFile);
+      in.close();                               // close it first
+      boolean b = f.renameTo(badFile);                      // rename it
+      if (!b) {
+        LOG.warn("Ignoring failure of renameTo");
+      }
+      // move checksum file too
+      File checkFile = ((RawLocalFileSystem)fs).pathToFile(getChecksumFile(p));
+      b = checkFile.renameTo(new File(badDir, checkFile.getName()+suffix));
+      if (!b) {
+          LOG.warn("Ignoring failure of renameTo");
+        }
+    } catch (IOException e) {
+      LOG.warn("Error moving bad file " + p + ": " + e);
+    }
+    return false;
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java b/src/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
new file mode 100644
index 00000000000..c20b3d31d5d
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.io.WritableUtils;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.znerd.xmlenc.XMLOutputter;
+
+/** MD5 of MD5 of CRC32. */
+public class MD5MD5CRC32FileChecksum extends FileChecksum {
+  public static final int LENGTH = MD5Hash.MD5_LEN
+      + (Integer.SIZE + Long.SIZE)/Byte.SIZE;
+
+  private int bytesPerCRC;
+  private long crcPerBlock;
+  private MD5Hash md5;
+
+  /** Same as this(0, 0, null) */
+  public MD5MD5CRC32FileChecksum() {
+    this(0, 0, null);
+  }
+
+  /** Create a MD5FileChecksum */
+  public MD5MD5CRC32FileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
+    this.bytesPerCRC = bytesPerCRC;
+    this.crcPerBlock = crcPerBlock;
+    this.md5 = md5;
+  }
+  
+  /** {@inheritDoc} */ 
+  public String getAlgorithmName() {
+    return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32";
+  }
+
+  /** {@inheritDoc} */ 
+  public int getLength() {return LENGTH;}
+
+  /** {@inheritDoc} */ 
+  public byte[] getBytes() {
+    return WritableUtils.toByteArray(this);
+  }
+
+  /** {@inheritDoc} */ 
+  public void readFields(DataInput in) throws IOException {
+    bytesPerCRC = in.readInt();
+    crcPerBlock = in.readLong();
+    md5 = MD5Hash.read(in);
+  }
+
+  /** {@inheritDoc} */ 
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(bytesPerCRC);
+    out.writeLong(crcPerBlock);
+    md5.write(out);    
+  }
+
+  /** Write that object to xml output. */
+  public static void write(XMLOutputter xml, MD5MD5CRC32FileChecksum that
+      ) throws IOException {
+    xml.startTag(MD5MD5CRC32FileChecksum.class.getName());
+    if (that != null) {
+      xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
+      xml.attribute("crcPerBlock", "" + that.crcPerBlock);
+      xml.attribute("md5", "" + that.md5);
+    }
+    xml.endTag();
+  }
+
+  /** Return the object represented in the attributes. */
+  public static MD5MD5CRC32FileChecksum valueOf(Attributes attrs
+      ) throws SAXException {
+    final String bytesPerCRC = attrs.getValue("bytesPerCRC");
+    final String crcPerBlock = attrs.getValue("crcPerBlock");
+    final String md5 = attrs.getValue("md5");
+    if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
+      return null;
+    }
+
+    try {
+      return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC),
+          Integer.valueOf(crcPerBlock), new MD5Hash(md5));
+    } catch(Exception e) {
+      throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
+          + ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e);
+    }
+  }
+
+  /** {@inheritDoc} */ 
+  public String toString() {
+    return getAlgorithmName() + ":" + md5;
+  }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/fs/Path.java b/src/java/org/apache/hadoop/fs/Path.java
new file mode 100644
index 00000000000..cf96bf24515
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/Path.java
@@ -0,0 +1,298 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.net.*;
+import java.io.*;
+
+import org.apache.hadoop.conf.Configuration;
+
+/** Names a file or directory in a {@link FileSystem}.
+ * Path strings use slash as the directory separator.  A path string is
+ * absolute if it begins with a slash.
+ */
+public class Path implements Comparable {
+
+  /** The directory separator, a slash. */
+  public static final String SEPARATOR = "/";
+  public static final char SEPARATOR_CHAR = '/';
+  
+  public static final String CUR_DIR = ".";
+  
+  static final boolean WINDOWS
+    = System.getProperty("os.name").startsWith("Windows");
+
+  private URI uri;                                // a hierarchical uri
+
+  /** Resolve a child path against a parent path. */
+  public Path(String parent, String child) {
+    this(new Path(parent), new Path(child));
+  }
+
+  /** Resolve a child path against a parent path. */
+  public Path(Path parent, String child) {
+    this(parent, new Path(child));
+  }
+
+  /** Resolve a child path against a parent path. */
+  public Path(String parent, Path child) {
+    this(new Path(parent), child);
+  }
+
+  /** Resolve a child path against a parent path. */
+  public Path(Path parent, Path child) {
+    // Add a slash to parent's path so resolution is compatible with URI's
+    URI parentUri = parent.uri;
+    String parentPath = parentUri.getPath();
+    if (!(parentPath.equals("/") || parentPath.equals("")))
+      try {
+        parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
+                            parentUri.getPath()+"/", null, null);
+      } catch (URISyntaxException e) {
+        throw new IllegalArgumentException(e);
+      }
+    URI resolved = parentUri.resolve(child.uri);
+    initialize(resolved.getScheme(), resolved.getAuthority(),
+               normalizePath(resolved.getPath()));
+  }
+
+  private void checkPathArg( String path ) {
+    // disallow construction of a Path from an empty string
+    if ( path == null ) {
+      throw new IllegalArgumentException(
+          "Can not create a Path from a null string");
+    }
+    if( path.length() == 0 ) {
+       throw new IllegalArgumentException(
+           "Can not create a Path from an empty string");
+    }   
+  }
+  
+  /** Construct a path from a String.  Path strings are URIs, but with
+   * unescaped elements and some additional normalization. */
+  public Path(String pathString) {
+    checkPathArg( pathString );
+    
+    // We can't use 'new URI(String)' directly, since it assumes things are
+    // escaped, which we don't require of Paths. 
+    
+    // add a slash in front of paths with Windows drive letters
+    if (hasWindowsDrive(pathString, false))
+      pathString = "/"+pathString;
+
+    // parse uri components
+    String scheme = null;
+    String authority = null;
+
+    int start = 0;
+
+    // parse uri scheme, if any
+    int colon = pathString.indexOf(':');
+    int slash = pathString.indexOf('/');
+    if ((colon != -1) &&
+        ((slash == -1) || (colon < slash))) {     // has a scheme
+      scheme = pathString.substring(0, colon);
+      start = colon+1;
+    }
+
+    // parse uri authority, if any
+    if (pathString.startsWith("//", start) &&
+        (pathString.length()-start > 2)) {       // has authority
+      int nextSlash = pathString.indexOf('/', start+2);
+      int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
+      authority = pathString.substring(start+2, authEnd);
+      start = authEnd;
+    }
+
+    // uri path is the rest of the string -- query & fragment not supported
+    String path = pathString.substring(start, pathString.length());
+
+    initialize(scheme, authority, path);
+  }
+
+  /** Construct a Path from components. */
+  public Path(String scheme, String authority, String path) {
+    checkPathArg( path );
+    initialize(scheme, authority, path);
+  }
+
+  private void initialize(String scheme, String authority, String path) {
+    try {
+      this.uri = new URI(scheme, authority, normalizePath(path), null, null)
+        .normalize();
+    } catch (URISyntaxException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  private String normalizePath(String path) {
+    // remove double slashes & backslashes
+    path = path.replace("//", "/");
+    path = path.replace("\\", "/");
+    
+    // trim trailing slash from non-root path (ignoring windows drive)
+    int minLength = hasWindowsDrive(path, true) ? 4 : 1;
+    if (path.length() > minLength && path.endsWith("/")) {
+      path = path.substring(0, path.length()-1);
+    }
+    
+    return path;
+  }
+
+  private boolean hasWindowsDrive(String path, boolean slashed) {
+    if (!WINDOWS) return false;
+    int start = slashed ? 1 : 0;
+    return
+      path.length() >= start+2 &&
+      (slashed ? path.charAt(0) == '/' : true) &&
+      path.charAt(start+1) == ':' &&
+      ((path.charAt(start) >= 'A' && path.charAt(start) <= 'Z') ||
+       (path.charAt(start) >= 'a' && path.charAt(start) <= 'z'));
+  }
+
+
+  /** Convert this to a URI. */
+  public URI toUri() { return uri; }
+
+  /** Return the FileSystem that owns this Path. */
+  public FileSystem getFileSystem(Configuration conf) throws IOException {
+    return FileSystem.get(this.toUri(), conf);
+  }
+
+  /** True if the directory of this path is absolute. */
+  public boolean isAbsolute() {
+    int start = hasWindowsDrive(uri.getPath(), true) ? 3 : 0;
+    return uri.getPath().startsWith(SEPARATOR, start);
+  }
+
+  /** Returns the final component of this path.*/
+  public String getName() {
+    String path = uri.getPath();
+    int slash = path.lastIndexOf(SEPARATOR);
+    return path.substring(slash+1);
+  }
+
+  /** Returns the parent of a path or null if at root. */
+  public Path getParent() {
+    String path = uri.getPath();
+    int lastSlash = path.lastIndexOf('/');
+    int start = hasWindowsDrive(path, true) ? 3 : 0;
+    if ((path.length() == start) ||               // empty path
+        (lastSlash == start && path.length() == start+1)) { // at root
+      return null;
+    }
+    String parent;
+    if (lastSlash==-1) {
+      parent = CUR_DIR;
+    } else {
+      int end = hasWindowsDrive(path, true) ? 3 : 0;
+      parent = path.substring(0, lastSlash==end?end+1:lastSlash);
+    }
+    return new Path(uri.getScheme(), uri.getAuthority(), parent);
+  }
+
+  /** Adds a suffix to the final name in the path.*/
+  public Path suffix(String suffix) {
+    return new Path(getParent(), getName()+suffix);
+  }
+
+  public String toString() {
+    // we can't use uri.toString(), which escapes everything, because we want
+    // illegal characters unescaped in the string, for glob processing, etc.
+    StringBuffer buffer = new StringBuffer();
+    if (uri.getScheme() != null) {
+      buffer.append(uri.getScheme());
+      buffer.append(":");
+    }
+    if (uri.getAuthority() != null) {
+      buffer.append("//");
+      buffer.append(uri.getAuthority());
+    }
+    if (uri.getPath() != null) {
+      String path = uri.getPath();
+      if (path.indexOf('/')==0 &&
+          hasWindowsDrive(path, true) &&          // has windows drive
+          uri.getScheme() == null &&              // but no scheme
+          uri.getAuthority() == null)             // or authority
+        path = path.substring(1);                 // remove slash before drive
+      buffer.append(path);
+    }
+    return buffer.toString();
+  }
+
+  public boolean equals(Object o) {
+    if (!(o instanceof Path)) {
+      return false;
+    }
+    Path that = (Path)o;
+    return this.uri.equals(that.uri);
+  }
+
+  public int hashCode() {
+    return uri.hashCode();
+  }
+
+  public int compareTo(Object o) {
+    Path that = (Path)o;
+    return this.uri.compareTo(that.uri);
+  }
+  
+  /** Return the number of elements in this path. */
+  public int depth() {
+    String path = uri.getPath();
+    int depth = 0;
+    int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
+    while (slash != -1) {
+      depth++;
+      slash = path.indexOf(SEPARATOR, slash+1);
+    }
+    return depth;
+  }
+
+  /** Returns a qualified path object. */
+  public Path makeQualified(FileSystem fs) {
+    Path path = this;
+    if (!isAbsolute()) {
+      path = new Path(fs.getWorkingDirectory(), this);
+    }
+
+    URI pathUri = path.toUri();
+    URI fsUri = fs.getUri();
+      
+    String scheme = pathUri.getScheme();
+    String authority = pathUri.getAuthority();
+
+    if (scheme != null &&
+        (authority != null || fsUri.getAuthority() == null))
+      return path;
+
+    if (scheme == null) {
+      scheme = fsUri.getScheme();
+    }
+
+    if (authority == null) {
+      authority = fsUri.getAuthority();
+      if (authority == null) {
+        authority = "";
+      }
+    }
+
+    return new Path(scheme+":"+"//"+authority + pathUri.getPath());
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/PathFilter.java b/src/java/org/apache/hadoop/fs/PathFilter.java
new file mode 100644
index 00000000000..bcb7658943a
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/PathFilter.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+public interface PathFilter {
+  /**
+   * Tests whether or not the specified abstract pathname should be
+   * included in a pathname list.
+   *
+   * @param  path  The abstract pathname to be tested
+   * @return  <code>true</code> if and only if <code>pathname</code>
+   *          should be included
+   */
+  boolean accept(Path path);
+}
+
+
diff --git a/src/java/org/apache/hadoop/fs/PositionedReadable.java b/src/java/org/apache/hadoop/fs/PositionedReadable.java
new file mode 100644
index 00000000000..d5af64e53e0
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/PositionedReadable.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.*;
+import org.apache.hadoop.fs.*;
+
+/** Stream that permits positional reading. */
+public interface PositionedReadable {
+  /**
+   * Read upto the specified number of bytes, from a given
+   * position within a file, and return the number of bytes read. This does not
+   * change the current offset of a file, and is thread-safe.
+   */
+  public int read(long position, byte[] buffer, int offset, int length)
+    throws IOException;
+  
+  /**
+   * Read the specified number of bytes, from a given
+   * position within a file. This does not
+   * change the current offset of a file, and is thread-safe.
+   */
+  public void readFully(long position, byte[] buffer, int offset, int length)
+    throws IOException;
+  
+  /**
+   * Read number of bytes equalt to the length of the buffer, from a given
+   * position within a file. This does not
+   * change the current offset of a file, and is thread-safe.
+   */
+  public void readFully(long position, byte[] buffer) throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/fs/RawLocalFileSystem.java b/src/java/org/apache/hadoop/fs/RawLocalFileSystem.java
new file mode 100644
index 00000000000..4587136e8af
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/RawLocalFileSystem.java
@@ -0,0 +1,496 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.BufferedOutputStream;
+import java.io.DataOutput;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.URI;
+import java.nio.ByteBuffer;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.util.StringUtils;
+
+/****************************************************************
+ * Implement the FileSystem API for the raw local filesystem.
+ *
+ *****************************************************************/
+public class RawLocalFileSystem extends FileSystem {
+  static final URI NAME = URI.create("file:///");
+  private Path workingDir;
+  
+  public RawLocalFileSystem() {
+    workingDir = new Path(System.getProperty("user.dir")).makeQualified(this);
+  }
+  
+  /** Convert a path to a File. */
+  public File pathToFile(Path path) {
+    checkPath(path);
+    if (!path.isAbsolute()) {
+      path = new Path(getWorkingDirectory(), path);
+    }
+    return new File(path.toUri().getPath());
+  }
+
+  public URI getUri() { return NAME; }
+  
+  public void initialize(URI uri, Configuration conf) throws IOException {
+    super.initialize(uri, conf);
+    setConf(conf);
+  }
+  
+  class TrackingFileInputStream extends FileInputStream {
+    public TrackingFileInputStream(File f) throws IOException {
+      super(f);
+    }
+    
+    public int read() throws IOException {
+      int result = super.read();
+      if (result != -1) {
+        statistics.incrementBytesRead(1);
+      }
+      return result;
+    }
+    
+    public int read(byte[] data) throws IOException {
+      int result = super.read(data);
+      if (result != -1) {
+        statistics.incrementBytesRead(result);
+      }
+      return result;
+    }
+    
+    public int read(byte[] data, int offset, int length) throws IOException {
+      int result = super.read(data, offset, length);
+      if (result != -1) {
+        statistics.incrementBytesRead(result);
+      }
+      return result;
+    }
+  }
+
+  /*******************************************************
+   * For open()'s FSInputStream
+   *******************************************************/
+  class LocalFSFileInputStream extends FSInputStream {
+    FileInputStream fis;
+    private long position;
+
+    public LocalFSFileInputStream(Path f) throws IOException {
+      this.fis = new TrackingFileInputStream(pathToFile(f));
+    }
+    
+    public void seek(long pos) throws IOException {
+      fis.getChannel().position(pos);
+      this.position = pos;
+    }
+    
+    public long getPos() throws IOException {
+      return this.position;
+    }
+    
+    public boolean seekToNewSource(long targetPos) throws IOException {
+      return false;
+    }
+    
+    /*
+     * Just forward to the fis
+     */
+    public int available() throws IOException { return fis.available(); }
+    public void close() throws IOException { fis.close(); }
+    public boolean markSupport() { return false; }
+    
+    public int read() throws IOException {
+      try {
+        int value = fis.read();
+        if (value >= 0) {
+          this.position++;
+        }
+        return value;
+      } catch (IOException e) {                 // unexpected exception
+        throw new FSError(e);                   // assume native fs error
+      }
+    }
+    
+    public int read(byte[] b, int off, int len) throws IOException {
+      try {
+        int value = fis.read(b, off, len);
+        if (value > 0) {
+          this.position += value;
+        }
+        return value;
+      } catch (IOException e) {                 // unexpected exception
+        throw new FSError(e);                   // assume native fs error
+      }
+    }
+    
+    public int read(long position, byte[] b, int off, int len)
+      throws IOException {
+      ByteBuffer bb = ByteBuffer.wrap(b, off, len);
+      try {
+        return fis.getChannel().read(bb, position);
+      } catch (IOException e) {
+        throw new FSError(e);
+      }
+    }
+    
+    public long skip(long n) throws IOException {
+      long value = fis.skip(n);
+      if (value > 0) {
+        this.position += value;
+      }
+      return value;
+    }
+  }
+  
+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    if (!exists(f)) {
+      throw new FileNotFoundException(f.toString());
+    }
+    return new FSDataInputStream(new BufferedFSInputStream(
+        new LocalFSFileInputStream(f), bufferSize));
+  }
+  
+  /*********************************************************
+   * For create()'s FSOutputStream.
+   *********************************************************/
+  class LocalFSFileOutputStream extends OutputStream implements Syncable {
+    FileOutputStream fos;
+    
+    private LocalFSFileOutputStream(Path f, boolean append) throws IOException {
+      this.fos = new FileOutputStream(pathToFile(f), append);
+    }
+    
+    /*
+     * Just forward to the fos
+     */
+    public void close() throws IOException { fos.close(); }
+    public void flush() throws IOException { fos.flush(); }
+    public void write(byte[] b, int off, int len) throws IOException {
+      try {
+        fos.write(b, off, len);
+      } catch (IOException e) {                // unexpected exception
+        throw new FSError(e);                  // assume native fs error
+      }
+    }
+    
+    public void write(int b) throws IOException {
+      try {
+        fos.write(b);
+      } catch (IOException e) {              // unexpected exception
+        throw new FSError(e);                // assume native fs error
+      }
+    }
+
+    /** {@inheritDoc} */
+    public void sync() throws IOException {
+      fos.getFD().sync();      
+    }
+  }
+  
+  /** {@inheritDoc} */
+  public FSDataOutputStream append(Path f, int bufferSize,
+      Progressable progress) throws IOException {
+    if (!exists(f)) {
+      throw new FileNotFoundException("File " + f + " not found.");
+    }
+    if (getFileStatus(f).isDir()) {
+      throw new IOException("Cannot append to a diretory (=" + f + " ).");
+    }
+    return new FSDataOutputStream(new BufferedOutputStream(
+        new LocalFSFileOutputStream(f, true), bufferSize), statistics);
+  }
+
+  /** {@inheritDoc} */
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize,
+                                   short replication, long blockSize, Progressable progress)
+    throws IOException {
+    if (exists(f) && !overwrite) {
+      throw new IOException("File already exists:"+f);
+    }
+    Path parent = f.getParent();
+    if (parent != null && !mkdirs(parent)) {
+      throw new IOException("Mkdirs failed to create " + parent.toString());
+    }
+    return new FSDataOutputStream(new BufferedOutputStream(
+        new LocalFSFileOutputStream(f, false), bufferSize), statistics);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public FSDataOutputStream create(Path f, FsPermission permission,
+      boolean overwrite, int bufferSize, short replication, long blockSize,
+      Progressable progress) throws IOException {
+    FSDataOutputStream out = create(f,
+        overwrite, bufferSize, replication, blockSize, progress);
+    setPermission(f, permission);
+    return out;
+  }
+  
+  public boolean rename(Path src, Path dst) throws IOException {
+    if (pathToFile(src).renameTo(pathToFile(dst))) {
+      return true;
+    }
+    return FileUtil.copy(this, src, this, dst, true, getConf());
+  }
+  
+  public boolean delete(Path p, boolean recursive) throws IOException {
+    File f = pathToFile(p);
+    if (f.isFile()) {
+      return f.delete();
+    } else if ((!recursive) && f.isDirectory() && 
+        (f.listFiles().length != 0)) {
+      throw new IOException("Directory " + f.toString() + " is not empty");
+    }
+    return FileUtil.fullyDelete(f);
+  }
+ 
+  public FileStatus[] listStatus(Path f) throws IOException {
+    File localf = pathToFile(f);
+    FileStatus[] results;
+
+    if (!localf.exists()) {
+      return null;
+    }
+    if (localf.isFile()) {
+      return new FileStatus[] {
+          new RawLocalFileStatus(localf, getDefaultBlockSize(), this) };
+    }
+
+    String[] names = localf.list();
+    if (names == null) {
+      return null;
+    }
+    results = new FileStatus[names.length];
+    for (int i = 0; i < names.length; i++) {
+      results[i] = getFileStatus(new Path(f, names[i]));
+    }
+    return results;
+  }
+
+  /**
+   * Creates the specified directory hierarchy. Does not
+   * treat existence as an error.
+   */
+  public boolean mkdirs(Path f) throws IOException {
+    Path parent = f.getParent();
+    File p2f = pathToFile(f);
+    return (parent == null || mkdirs(parent)) &&
+      (p2f.mkdir() || p2f.isDirectory());
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
+    boolean b = mkdirs(f);
+    setPermission(f, permission);
+    return b;
+  }
+  
+  @Override
+  public Path getHomeDirectory() {
+    return new Path(System.getProperty("user.home")).makeQualified(this);
+  }
+
+  /**
+   * Set the working directory to the given directory.
+   */
+  @Override
+  public void setWorkingDirectory(Path newDir) {
+    workingDir = newDir;
+  }
+  
+  @Override
+  public Path getWorkingDirectory() {
+    return workingDir;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public FsStatus getStatus(Path p) throws IOException {
+    File partition = pathToFile(p == null ? new Path("/") : p);
+    //File provides getUsableSpace() and getFreeSpace()
+    //File provides no API to obtain used space, assume used = total - free
+    return new FsStatus(partition.getTotalSpace(), 
+      partition.getTotalSpace() - partition.getFreeSpace(),
+      partition.getFreeSpace());
+  }
+  
+  // In the case of the local filesystem, we can just rename the file.
+  public void moveFromLocalFile(Path src, Path dst) throws IOException {
+    rename(src, dst);
+  }
+  
+  // We can write output directly to the final location
+  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+    return fsOutputFile;
+  }
+  
+  // It's in the right place - nothing to do.
+  public void completeLocalOutput(Path fsWorkingFile, Path tmpLocalFile)
+    throws IOException {
+  }
+  
+  public void close() throws IOException {
+    super.close();
+  }
+  
+  public String toString() {
+    return "LocalFS";
+  }
+  
+  public FileStatus getFileStatus(Path f) throws IOException {
+    File path = pathToFile(f);
+    if (path.exists()) {
+      return new RawLocalFileStatus(pathToFile(f), getDefaultBlockSize(), this);
+    } else {
+      throw new FileNotFoundException( "File " + f + " does not exist.");
+    }
+  }
+
+  static class RawLocalFileStatus extends FileStatus {
+    /* We can add extra fields here. It breaks at least CopyFiles.FilePair().
+     * We recognize if the information is already loaded by check if
+     * onwer.equals("").
+     */
+    private boolean isPermissionLoaded() {
+      return !super.getOwner().equals(""); 
+    }
+    
+    RawLocalFileStatus(File f, long defaultBlockSize, FileSystem fs) {
+      super(f.length(), f.isDirectory(), 1, defaultBlockSize,
+            f.lastModified(), new Path(f.getPath()).makeQualified(fs));
+    }
+    
+    @Override
+    public FsPermission getPermission() {
+      if (!isPermissionLoaded()) {
+        loadPermissionInfo();
+      }
+      return super.getPermission();
+    }
+
+    @Override
+    public String getOwner() {
+      if (!isPermissionLoaded()) {
+        loadPermissionInfo();
+      }
+      return super.getOwner();
+    }
+
+    @Override
+    public String getGroup() {
+      if (!isPermissionLoaded()) {
+        loadPermissionInfo();
+      }
+      return super.getGroup();
+    }
+
+    /// loads permissions, owner, and group from `ls -ld`
+    private void loadPermissionInfo() {
+      IOException e = null;
+      try {
+        StringTokenizer t = new StringTokenizer(
+            execCommand(new File(getPath().toUri()), 
+                        Shell.getGET_PERMISSION_COMMAND()));
+        //expected format
+        //-rw-------    1 username groupname ...
+        String permission = t.nextToken();
+        if (permission.length() > 10) { //files with ACLs might have a '+'
+          permission = permission.substring(0, 10);
+        }
+        setPermission(FsPermission.valueOf(permission));
+        t.nextToken();
+        setOwner(t.nextToken());
+        setGroup(t.nextToken());
+      } catch (Shell.ExitCodeException ioe) {
+        if (ioe.getExitCode() != 1) {
+          e = ioe;
+        } else {
+          setPermission(null);
+          setOwner(null);
+          setGroup(null);
+        }
+      } catch (IOException ioe) {
+        e = ioe;
+      } finally {
+        if (e != null) {
+          throw new RuntimeException("Error while running command to get " +
+                                     "file permissions : " + 
+                                     StringUtils.stringifyException(e));
+        }
+      }
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+      if (!isPermissionLoaded()) {
+        loadPermissionInfo();
+      }
+      super.write(out);
+    }
+  }
+
+  /**
+   * Use the command chown to set owner.
+   */
+  @Override
+  public void setOwner(Path p, String username, String groupname
+      ) throws IOException {
+    if (username == null && groupname == null) {
+      throw new IOException("username == null && groupname == null");
+    }
+
+    if (username == null) {
+      execCommand(pathToFile(p), Shell.SET_GROUP_COMMAND, groupname); 
+    } else {
+      //OWNER[:[GROUP]]
+      String s = username + (groupname == null? "": ":" + groupname);
+      execCommand(pathToFile(p), Shell.SET_OWNER_COMMAND, s);
+    }
+  }
+
+  /**
+   * Use the command chmod to set permission.
+   */
+  @Override
+  public void setPermission(Path p, FsPermission permission
+      ) throws IOException {
+    execCommand(pathToFile(p), Shell.SET_PERMISSION_COMMAND,
+        String.format("%05o", permission.toShort()));
+  }
+
+  private static String execCommand(File f, String... cmd) throws IOException {
+    String[] args = new String[cmd.length + 1];
+    System.arraycopy(cmd, 0, args, 0, cmd.length);
+    args[cmd.length] = f.getCanonicalPath();
+    String output = Shell.execCommand(args);
+    return output;
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/Seekable.java b/src/java/org/apache/hadoop/fs/Seekable.java
new file mode 100644
index 00000000000..20e75088514
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/Seekable.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.*;
+
+/** Stream that permits seeking. */
+public interface Seekable {
+  /**
+   * Seek to the given offset from the start of the file.
+   * The next read() will be from that location.  Can't
+   * seek past the end of the file.
+   */
+  void seek(long pos) throws IOException;
+  
+  /**
+   * Return the current offset from the start of the file
+   */
+  long getPos() throws IOException;
+
+  /**
+   * Seeks a different copy of the data.  Returns true if 
+   * found a new source, false otherwise.
+   */
+  boolean seekToNewSource(long targetPos) throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/fs/Syncable.java b/src/java/org/apache/hadoop/fs/Syncable.java
new file mode 100644
index 00000000000..650d224e3e9
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/Syncable.java
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+
+/** This interface declare the sync() operation. */
+public interface Syncable {
+  /**
+   * Synchronize all buffer with the underlying devices.
+   * @throws IOException
+   */
+  public void sync() throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/fs/Trash.java b/src/java/org/apache/hadoop/fs/Trash.java
new file mode 100644
index 00000000000..5b062a1ece5
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/Trash.java
@@ -0,0 +1,291 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.text.*;
+import java.io.*;
+import java.util.Date;
+
+import org.apache.commons.logging.*;
+
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.fs.permission.*;
+import org.apache.hadoop.util.StringUtils;
+
+/** Provides a <i>trash</i> feature.  Files are moved to a user's trash
+ * directory, a subdirectory of their home directory named ".Trash".  Files are
+ * initially moved to a <i>current</i> sub-directory of the trash directory.
+ * Within that sub-directory their original path is preserved.  Periodically
+ * one may checkpoint the current trash and remove older checkpoints.  (This
+ * design permits trash management without enumeration of the full trash
+ * content, without date support in the filesystem, and without clock
+ * synchronization.)
+ */
+public class Trash extends Configured {
+  private static final Log LOG =
+    LogFactory.getLog(Trash.class);
+
+  private static final Path CURRENT = new Path("Current");
+  private static final Path TRASH = new Path(".Trash/");
+  private static final Path HOMES = new Path("/user/");
+
+  private static final FsPermission PERMISSION =
+    new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE);
+
+  private static final DateFormat CHECKPOINT = new SimpleDateFormat("yyMMddHHmm");
+  private static final int MSECS_PER_MINUTE = 60*1000;
+
+  private final FileSystem fs;
+  private final Path trash;
+  private final Path current;
+  private final long interval;
+
+  /** Construct a trash can accessor.
+   * @param conf a Configuration
+   */
+  public Trash(Configuration conf) throws IOException {
+    this(FileSystem.get(conf), conf);
+  }
+
+  /**
+   * Construct a trash can accessor for the FileSystem provided.
+   */
+  public Trash(FileSystem fs, Configuration conf) throws IOException {
+    super(conf);
+    this.fs = fs;
+    this.trash = new Path(fs.getHomeDirectory(), TRASH);
+    this.current = new Path(trash, CURRENT);
+    this.interval = conf.getLong("fs.trash.interval", 60) * MSECS_PER_MINUTE;
+  }
+
+  private Trash(Path home, Configuration conf) throws IOException {
+    super(conf);
+    this.fs = home.getFileSystem(conf);
+    this.trash = new Path(home, TRASH);
+    this.current = new Path(trash, CURRENT);
+    this.interval = conf.getLong("fs.trash.interval", 60) * MSECS_PER_MINUTE;
+  }
+  
+  private Path makeTrashRelativePath(Path basePath, Path rmFilePath) {
+    return new Path(basePath + rmFilePath.toUri().getPath());
+  }
+
+  /** Move a file or directory to the current trash directory.
+   * @return false if the item is already in the trash or trash is disabled
+   */ 
+  public boolean moveToTrash(Path path) throws IOException {
+    if (interval == 0)
+      return false;
+
+    if (!path.isAbsolute())                       // make path absolute
+      path = new Path(fs.getWorkingDirectory(), path);
+
+    if (!fs.exists(path))                         // check that path exists
+      throw new FileNotFoundException(path.toString());
+
+    String qpath = path.makeQualified(fs).toString();
+
+    if (qpath.startsWith(trash.toString())) {
+      return false;                               // already in trash
+    }
+
+    if (trash.getParent().toString().startsWith(qpath)) {
+      throw new IOException("Cannot move \"" + path +
+                            "\" to the trash, as it contains the trash");
+    }
+
+    Path trashPath = makeTrashRelativePath(current, path);
+    Path baseTrashPath = makeTrashRelativePath(current, path.getParent());
+    
+    IOException cause = null;
+
+    // try twice, in case checkpoint between the mkdirs() & rename()
+    for (int i = 0; i < 2; i++) {
+      try {
+        if (!fs.mkdirs(baseTrashPath, PERMISSION)) {      // create current
+          LOG.warn("Can't create trash directory: "+baseTrashPath);
+          return false;
+        }
+      } catch (IOException e) {
+        LOG.warn("Can't create trash directory: "+baseTrashPath);
+        return false;
+      }
+      try {
+        //
+        // if the target path in Trash already exists, then append with 
+        // a number. Start from 1.
+        //
+        String orig = trashPath.toString();
+        for (int j = 1; fs.exists(trashPath); j++) {
+          trashPath = new Path(orig + "." + j);
+        }
+        if (fs.rename(path, trashPath))           // move to current trash
+          return true;
+      } catch (IOException e) {
+        cause = e;
+      }
+    }
+    throw (IOException)
+      new IOException("Failed to move to trash: "+path).initCause(cause);
+  }
+
+  /** Create a trash checkpoint. */
+  public void checkpoint() throws IOException {
+    if (!fs.exists(current))                      // no trash, no checkpoint
+      return;
+
+    Path checkpoint;
+    synchronized (CHECKPOINT) {
+      checkpoint = new Path(trash, CHECKPOINT.format(new Date()));
+    }
+
+    if (fs.rename(current, checkpoint)) {
+      LOG.info("Created trash checkpoint: "+checkpoint.toUri().getPath());
+    } else {
+      throw new IOException("Failed to checkpoint trash: "+checkpoint);
+    }
+  }
+
+  /** Delete old checkpoints. */
+  public void expunge() throws IOException {
+    FileStatus[] dirs = fs.listStatus(trash);            // scan trash sub-directories
+    if( dirs == null){
+      return;
+    }
+    long now = System.currentTimeMillis();
+    for (int i = 0; i < dirs.length; i++) {
+      Path path = dirs[i].getPath();
+      String dir = path.toUri().getPath();
+      String name = path.getName();
+      if (name.equals(CURRENT.getName()))         // skip current
+        continue;
+
+      long time;
+      try {
+        synchronized (CHECKPOINT) {
+          time = CHECKPOINT.parse(name).getTime();
+        }
+      } catch (ParseException e) {
+        LOG.warn("Unexpected item in trash: "+dir+". Ignoring.");
+        continue;
+      }
+
+      if ((now - interval) > time) {
+        if (fs.delete(path, true)) {
+          LOG.info("Deleted trash checkpoint: "+dir);
+        } else {
+          LOG.warn("Couldn't delete checkpoint: "+dir+" Ignoring.");
+        }
+      }
+    }
+  }
+
+  //
+  // get the current working directory
+  //
+  Path getCurrentTrashDir() {
+    return current;
+  }
+
+  /** Return a {@link Runnable} that periodically empties the trash of all
+   * users, intended to be run by the superuser.  Only one checkpoint is kept
+   * at a time.
+   */
+  public Runnable getEmptier() throws IOException {
+    return new Emptier(getConf());
+  }
+
+  private class Emptier implements Runnable {
+
+    private Configuration conf;
+    private long interval;
+
+    Emptier(Configuration conf) throws IOException {
+      this.conf = conf;
+      this.interval = conf.getLong("fs.trash.interval", 0) * MSECS_PER_MINUTE;
+    }
+
+    public void run() {
+      if (interval == 0)
+        return;                                   // trash disabled
+
+      long now = System.currentTimeMillis();
+      long end;
+      while (true) {
+        end = ceiling(now, interval);
+        try {                                     // sleep for interval
+          Thread.sleep(end - now);
+        } catch (InterruptedException e) {
+          break;                                  // exit on interrupt
+        }
+          
+        try {
+          now = System.currentTimeMillis();
+          if (now >= end) {
+
+            FileStatus[] homes = null;
+            try {
+              homes = fs.listStatus(HOMES);         // list all home dirs
+            } catch (IOException e) {
+              LOG.warn("Trash can't list homes: "+e+" Sleeping.");
+              continue;
+            }
+
+            if (homes == null)
+              continue;
+
+            for (FileStatus home : homes) {         // dump each trash
+              if (!home.isDir())
+                continue;
+              try {
+                Trash trash = new Trash(home.getPath(), conf);
+                trash.expunge();
+                trash.checkpoint();
+              } catch (IOException e) {
+                LOG.warn("Trash caught: "+e+". Skipping "+home.getPath()+".");
+              } 
+            }
+          }
+        } catch (Exception e) {
+          LOG.warn("RuntimeException during Trash.Emptier.run() " + 
+                   StringUtils.stringifyException(e));
+        }
+      }
+      try {
+        fs.close();
+      } catch(IOException e) {
+        LOG.warn("Trash cannot close FileSystem. " +
+            StringUtils.stringifyException(e));
+      }
+    }
+
+    private long ceiling(long time, long interval) {
+      return floor(time, interval) + interval;
+    }
+    private long floor(long time, long interval) {
+      return (time / interval) * interval;
+    }
+
+  }
+
+  /** Run an emptier.*/
+  public static void main(String[] args) throws Exception {
+    new Trash(new Configuration()).getEmptier().run();
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/ftp/FTPException.java b/src/java/org/apache/hadoop/fs/ftp/FTPException.java
new file mode 100644
index 00000000000..c76cb57f3c8
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/ftp/FTPException.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.ftp;
+
+/**
+ * A class to wrap a {@link Throwable} into a Runtime Exception.
+ */
+public class FTPException extends RuntimeException {
+
+  private static final long serialVersionUID = 1L;
+
+  public FTPException(String message) {
+    super(message);
+  }
+
+  public FTPException(Throwable t) {
+    super(t);
+  }
+
+  public FTPException(String message, Throwable t) {
+    super(message, t);
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java b/src/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java
new file mode 100644
index 00000000000..ee91f1c899f
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java
@@ -0,0 +1,576 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.ftp;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.net.ftp.FTP;
+import org.apache.commons.net.ftp.FTPClient;
+import org.apache.commons.net.ftp.FTPFile;
+import org.apache.commons.net.ftp.FTPReply;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+
+/**
+ * <p>
+ * A {@link FileSystem} backed by an FTP client provided by <a
+ * href="http://commons.apache.org/net/">Apache Commons Net</a>.
+ * </p>
+ */
+public class FTPFileSystem extends FileSystem {
+
+  public static final Log LOG = LogFactory
+      .getLog(FTPFileSystem.class);
+
+  public static final int DEFAULT_BUFFER_SIZE = 1024 * 1024;
+
+  public static final int DEFAULT_BLOCK_SIZE = 4 * 1024;
+
+  private URI uri;
+
+  @Override
+  public void initialize(URI uri, Configuration conf) throws IOException { // get
+    super.initialize(uri, conf);
+    // get host information from uri (overrides info in conf)
+    String host = uri.getHost();
+    host = (host == null) ? conf.get("fs.ftp.host", null) : host;
+    if (host == null) {
+      throw new IOException("Invalid host specified");
+    }
+    conf.set("fs.ftp.host", host);
+
+    // get port information from uri, (overrides info in conf)
+    int port = uri.getPort();
+    port = (port == -1) ? FTP.DEFAULT_PORT : port;
+    conf.setInt("fs.ftp.host.port", port);
+
+    // get user/password information from URI (overrides info in conf)
+    String userAndPassword = uri.getUserInfo();
+    if (userAndPassword == null) {
+      userAndPassword = (conf.get("fs.ftp.user." + host, null) + ":" + conf
+          .get("fs.ftp.password." + host, null));
+      if (userAndPassword == null) {
+        throw new IOException("Invalid user/passsword specified");
+      }
+    }
+    String[] userPasswdInfo = userAndPassword.split(":");
+    conf.set("fs.ftp.user." + host, userPasswdInfo[0]);
+    if (userPasswdInfo.length > 1) {
+      conf.set("fs.ftp.password." + host, userPasswdInfo[1]);
+    } else {
+      conf.set("fs.ftp.password." + host, null);
+    }
+    setConf(conf);
+    this.uri = uri;
+  }
+
+  /**
+   * Connect to the FTP server using configuration parameters *
+   * 
+   * @return An FTPClient instance
+   * @throws IOException
+   */
+  private FTPClient connect() throws IOException {
+    FTPClient client = null;
+    Configuration conf = getConf();
+    String host = conf.get("fs.ftp.host");
+    int port = conf.getInt("fs.ftp.host.port", FTP.DEFAULT_PORT);
+    String user = conf.get("fs.ftp.user." + host);
+    String password = conf.get("fs.ftp.password." + host);
+    client = new FTPClient();
+    client.connect(host, port);
+    int reply = client.getReplyCode();
+    if (!FTPReply.isPositiveCompletion(reply)) {
+      throw new IOException("Server - " + host
+          + " refused connection on port - " + port);
+    } else if (client.login(user, password)) {
+      client.setFileTransferMode(FTP.BLOCK_TRANSFER_MODE);
+      client.setFileType(FTP.BINARY_FILE_TYPE);
+      client.setBufferSize(DEFAULT_BUFFER_SIZE);
+    } else {
+      throw new IOException("Login failed on server - " + host + ", port - "
+          + port);
+    }
+
+    return client;
+  }
+
+  /**
+   * Logout and disconnect the given FTPClient. *
+   * 
+   * @param client
+   * @throws IOException
+   */
+  private void disconnect(FTPClient client) throws IOException {
+    if (client != null) {
+      if (!client.isConnected()) {
+        throw new FTPException("Client not connected");
+      }
+      boolean logoutSuccess = client.logout();
+      client.disconnect();
+      if (!logoutSuccess) {
+        LOG.warn("Logout failed while disconnecting, error code - "
+            + client.getReplyCode());
+      }
+    }
+  }
+
+  /**
+   * Resolve against given working directory. *
+   * 
+   * @param workDir
+   * @param path
+   * @return
+   */
+  private Path makeAbsolute(Path workDir, Path path) {
+    if (path.isAbsolute()) {
+      return path;
+    }
+    return new Path(workDir, path);
+  }
+
+  @Override
+  public FSDataInputStream open(Path file, int bufferSize) throws IOException {
+    FTPClient client = connect();
+    Path workDir = new Path(client.printWorkingDirectory());
+    Path absolute = makeAbsolute(workDir, file);
+    FileStatus fileStat = getFileStatus(client, absolute);
+    if (fileStat.isDir()) {
+      disconnect(client);
+      throw new IOException("Path " + file + " is a directory.");
+    }
+    client.allocate(bufferSize);
+    Path parent = absolute.getParent();
+    // Change to parent directory on the
+    // server. Only then can we read the
+    // file
+    // on the server by opening up an InputStream. As a side effect the working
+    // directory on the server is changed to the parent directory of the file.
+    // The FTP client connection is closed when close() is called on the
+    // FSDataInputStream.
+    client.changeWorkingDirectory(parent.toUri().getPath());
+    InputStream is = client.retrieveFileStream(file.getName());
+    FSDataInputStream fis = new FSDataInputStream(new FTPInputStream(is,
+        client, statistics));
+    if (!FTPReply.isPositivePreliminary(client.getReplyCode())) {
+      // The ftpClient is an inconsistent state. Must close the stream
+      // which in turn will logout and disconnect from FTP server
+      fis.close();
+      throw new IOException("Unable to open file: " + file + ", Aborting");
+    }
+    return fis;
+  }
+
+  /**
+   * A stream obtained via this call must be closed before using other APIs of
+   * this class or else the invocation will block.
+   */
+  @Override
+  public FSDataOutputStream create(Path file, FsPermission permission,
+      boolean overwrite, int bufferSize, short replication, long blockSize,
+      Progressable progress) throws IOException {
+    final FTPClient client = connect();
+    Path workDir = new Path(client.printWorkingDirectory());
+    Path absolute = makeAbsolute(workDir, file);
+    if (exists(client, file)) {
+      if (overwrite) {
+        delete(client, file);
+      } else {
+        disconnect(client);
+        throw new IOException("File already exists: " + file);
+      }
+    }
+    Path parent = absolute.getParent();
+    if (parent == null || !mkdirs(client, parent, FsPermission.getDefault())) {
+      parent = (parent == null) ? new Path("/") : parent;
+      disconnect(client);
+      throw new IOException("create(): Mkdirs failed to create: " + parent);
+    }
+    client.allocate(bufferSize);
+    // Change to parent directory on the server. Only then can we write to the
+    // file on the server by opening up an OutputStream. As a side effect the
+    // working directory on the server is changed to the parent directory of the
+    // file. The FTP client connection is closed when close() is called on the
+    // FSDataOutputStream.
+    client.changeWorkingDirectory(parent.toUri().getPath());
+    FSDataOutputStream fos = new FSDataOutputStream(client.storeFileStream(file
+        .getName()), statistics) {
+      @Override
+      public void close() throws IOException {
+        super.close();
+        if (!client.isConnected()) {
+          throw new FTPException("Client not connected");
+        }
+        boolean cmdCompleted = client.completePendingCommand();
+        disconnect(client);
+        if (!cmdCompleted) {
+          throw new FTPException("Could not complete transfer, Reply Code - "
+              + client.getReplyCode());
+        }
+      }
+    };
+    if (!FTPReply.isPositivePreliminary(client.getReplyCode())) {
+      // The ftpClient is an inconsistent state. Must close the stream
+      // which in turn will logout and disconnect from FTP server
+      fos.close();
+      throw new IOException("Unable to create file: " + file + ", Aborting");
+    }
+    return fos;
+  }
+
+  /** This optional operation is not yet supported. */
+  public FSDataOutputStream append(Path f, int bufferSize,
+      Progressable progress) throws IOException {
+    throw new IOException("Not supported");
+  }
+  
+  /**
+   * Convenience method, so that we don't open a new connection when using this
+   * method from within another method. Otherwise every API invocation incurs
+   * the overhead of opening/closing a TCP connection.
+   */
+  private boolean exists(FTPClient client, Path file) {
+    try {
+      return getFileStatus(client, file) != null;
+    } catch (FileNotFoundException fnfe) {
+      return false;
+    } catch (IOException ioe) {
+      throw new FTPException("Failed to get file status", ioe);
+    }
+  }
+
+  @Override
+  public boolean delete(Path file, boolean recursive) throws IOException {
+    FTPClient client = connect();
+    try {
+      boolean success = delete(client, file, recursive);
+      return success;
+    } finally {
+      disconnect(client);
+    }
+  }
+
+  /** @deprecated Use delete(Path, boolean) instead */
+  @Deprecated
+  private boolean delete(FTPClient client, Path file) throws IOException {
+    return delete(client, file, false);
+  }
+
+  /**
+   * Convenience method, so that we don't open a new connection when using this
+   * method from within another method. Otherwise every API invocation incurs
+   * the overhead of opening/closing a TCP connection.
+   */
+  private boolean delete(FTPClient client, Path file, boolean recursive)
+      throws IOException {
+    Path workDir = new Path(client.printWorkingDirectory());
+    Path absolute = makeAbsolute(workDir, file);
+    String pathName = absolute.toUri().getPath();
+    FileStatus fileStat = getFileStatus(client, absolute);
+    if (!fileStat.isDir()) {
+      return client.deleteFile(pathName);
+    }
+    FileStatus[] dirEntries = listStatus(client, absolute);
+    if (dirEntries != null && dirEntries.length > 0 && !(recursive)) {
+      throw new IOException("Directory: " + file + " is not empty.");
+    }
+    if (dirEntries != null) {
+      for (int i = 0; i < dirEntries.length; i++) {
+        delete(client, new Path(absolute, dirEntries[i].getPath()), recursive);
+      }
+    }
+    return client.removeDirectory(pathName);
+  }
+
+  private FsAction getFsAction(int accessGroup, FTPFile ftpFile) {
+    FsAction action = FsAction.NONE;
+    if (ftpFile.hasPermission(accessGroup, FTPFile.READ_PERMISSION)) {
+      action.or(FsAction.READ);
+    }
+    if (ftpFile.hasPermission(accessGroup, FTPFile.WRITE_PERMISSION)) {
+      action.or(FsAction.WRITE);
+    }
+    if (ftpFile.hasPermission(accessGroup, FTPFile.EXECUTE_PERMISSION)) {
+      action.or(FsAction.EXECUTE);
+    }
+    return action;
+  }
+
+  private FsPermission getPermissions(FTPFile ftpFile) {
+    FsAction user, group, others;
+    user = getFsAction(FTPFile.USER_ACCESS, ftpFile);
+    group = getFsAction(FTPFile.GROUP_ACCESS, ftpFile);
+    others = getFsAction(FTPFile.WORLD_ACCESS, ftpFile);
+    return new FsPermission(user, group, others);
+  }
+
+  @Override
+  public URI getUri() {
+    return uri;
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path file) throws IOException {
+    FTPClient client = connect();
+    try {
+      FileStatus[] stats = listStatus(client, file);
+      return stats;
+    } finally {
+      disconnect(client);
+    }
+  }
+
+  /**
+   * Convenience method, so that we don't open a new connection when using this
+   * method from within another method. Otherwise every API invocation incurs
+   * the overhead of opening/closing a TCP connection.
+   */
+  private FileStatus[] listStatus(FTPClient client, Path file)
+      throws IOException {
+    Path workDir = new Path(client.printWorkingDirectory());
+    Path absolute = makeAbsolute(workDir, file);
+    FileStatus fileStat = getFileStatus(client, absolute);
+    if (!fileStat.isDir()) {
+      return new FileStatus[] { fileStat };
+    }
+    FTPFile[] ftpFiles = client.listFiles(absolute.toUri().getPath());
+    FileStatus[] fileStats = new FileStatus[ftpFiles.length];
+    for (int i = 0; i < ftpFiles.length; i++) {
+      fileStats[i] = getFileStatus(ftpFiles[i], absolute);
+    }
+    return fileStats;
+  }
+
+  @Override
+  public FileStatus getFileStatus(Path file) throws IOException {
+    FTPClient client = connect();
+    try {
+      FileStatus status = getFileStatus(client, file);
+      return status;
+    } finally {
+      disconnect(client);
+    }
+  }
+
+  /**
+   * Convenience method, so that we don't open a new connection when using this
+   * method from within another method. Otherwise every API invocation incurs
+   * the overhead of opening/closing a TCP connection.
+   */
+  private FileStatus getFileStatus(FTPClient client, Path file)
+      throws IOException {
+    FileStatus fileStat = null;
+    Path workDir = new Path(client.printWorkingDirectory());
+    Path absolute = makeAbsolute(workDir, file);
+    Path parentPath = absolute.getParent();
+    if (parentPath == null) { // root dir
+      long length = -1; // Length of root dir on server not known
+      boolean isDir = true;
+      int blockReplication = 1;
+      long blockSize = DEFAULT_BLOCK_SIZE; // Block Size not known.
+      long modTime = -1; // Modification time of root dir not known.
+      Path root = new Path("/");
+      return new FileStatus(length, isDir, blockReplication, blockSize,
+          modTime, root.makeQualified(this));
+    }
+    String pathName = parentPath.toUri().getPath();
+    FTPFile[] ftpFiles = client.listFiles(pathName);
+    if (ftpFiles != null) {
+      for (FTPFile ftpFile : ftpFiles) {
+        if (ftpFile.getName().equals(file.getName())) { // file found in dir
+          fileStat = getFileStatus(ftpFile, parentPath);
+          break;
+        }
+      }
+      if (fileStat == null) {
+        throw new FileNotFoundException("File " + file + " does not exist.");
+      }
+    } else {
+      throw new FileNotFoundException("File " + file + " does not exist.");
+    }
+    return fileStat;
+  }
+
+  /**
+   * Convert the file information in FTPFile to a {@link FileStatus} object. *
+   * 
+   * @param ftpFile
+   * @param parentPath
+   * @return FileStatus
+   */
+  private FileStatus getFileStatus(FTPFile ftpFile, Path parentPath) {
+    long length = ftpFile.getSize();
+    boolean isDir = ftpFile.isDirectory();
+    int blockReplication = 1;
+    // Using default block size since there is no way in FTP client to know of
+    // block sizes on server. The assumption could be less than ideal.
+    long blockSize = DEFAULT_BLOCK_SIZE;
+    long modTime = ftpFile.getTimestamp().getTimeInMillis();
+    long accessTime = 0;
+    FsPermission permission = getPermissions(ftpFile);
+    String user = ftpFile.getUser();
+    String group = ftpFile.getGroup();
+    Path filePath = new Path(parentPath, ftpFile.getName());
+    return new FileStatus(length, isDir, blockReplication, blockSize, modTime,
+        accessTime, permission, user, group, filePath.makeQualified(this));
+  }
+
+  @Override
+  public boolean mkdirs(Path file, FsPermission permission) throws IOException {
+    FTPClient client = connect();
+    try {
+      boolean success = mkdirs(client, file, permission);
+      return success;
+    } finally {
+      disconnect(client);
+    }
+  }
+
+  /**
+   * Convenience method, so that we don't open a new connection when using this
+   * method from within another method. Otherwise every API invocation incurs
+   * the overhead of opening/closing a TCP connection.
+   */
+  private boolean mkdirs(FTPClient client, Path file, FsPermission permission)
+      throws IOException {
+    boolean created = true;
+    Path workDir = new Path(client.printWorkingDirectory());
+    Path absolute = makeAbsolute(workDir, file);
+    String pathName = absolute.getName();
+    if (!exists(client, absolute)) {
+      Path parent = absolute.getParent();
+      created = (parent == null || mkdirs(client, parent, FsPermission
+          .getDefault()));
+      if (created) {
+        String parentDir = parent.toUri().getPath();
+        client.changeWorkingDirectory(parentDir);
+        created = created & client.makeDirectory(pathName);
+      }
+    } else if (isFile(client, absolute)) {
+      throw new IOException(String.format(
+          "Can't make directory for path %s since it is a file.", absolute));
+    }
+    return created;
+  }
+
+  /**
+   * Convenience method, so that we don't open a new connection when using this
+   * method from within another method. Otherwise every API invocation incurs
+   * the overhead of opening/closing a TCP connection.
+   */
+  private boolean isFile(FTPClient client, Path file) {
+    try {
+      return !getFileStatus(client, file).isDir();
+    } catch (FileNotFoundException e) {
+      return false; // file does not exist
+    } catch (IOException ioe) {
+      throw new FTPException("File check failed", ioe);
+    }
+  }
+
+  /*
+   * Assuming that parent of both source and destination is the same. Is the
+   * assumption correct or it is suppose to work like 'move' ?
+   */
+  @Override
+  public boolean rename(Path src, Path dst) throws IOException {
+    FTPClient client = connect();
+    try {
+      boolean success = rename(client, src, dst);
+      return success;
+    } finally {
+      disconnect(client);
+    }
+  }
+
+  /**
+   * Convenience method, so that we don't open a new connection when using this
+   * method from within another method. Otherwise every API invocation incurs
+   * the overhead of opening/closing a TCP connection.
+   * 
+   * @param client
+   * @param src
+   * @param dst
+   * @return
+   * @throws IOException
+   */
+  private boolean rename(FTPClient client, Path src, Path dst)
+      throws IOException {
+    Path workDir = new Path(client.printWorkingDirectory());
+    Path absoluteSrc = makeAbsolute(workDir, src);
+    Path absoluteDst = makeAbsolute(workDir, dst);
+    if (!exists(client, absoluteSrc)) {
+      throw new IOException("Source path " + src + " does not exist");
+    }
+    if (exists(client, absoluteDst)) {
+      throw new IOException("Destination path " + dst
+          + " already exist, cannot rename!");
+    }
+    String parentSrc = absoluteSrc.getParent().toUri().toString();
+    String parentDst = absoluteDst.getParent().toUri().toString();
+    String from = src.getName();
+    String to = dst.getName();
+    if (!parentSrc.equals(parentDst)) {
+      throw new IOException("Cannot rename parent(source): " + parentSrc
+          + ", parent(destination):  " + parentDst);
+    }
+    client.changeWorkingDirectory(parentSrc);
+    boolean renamed = client.rename(from, to);
+    return renamed;
+  }
+
+  @Override
+  public Path getWorkingDirectory() {
+    // Return home directory always since we do not maintain state.
+    return getHomeDirectory();
+  }
+
+  @Override
+  public Path getHomeDirectory() {
+    FTPClient client = null;
+    try {
+      client = connect();
+      Path homeDir = new Path(client.printWorkingDirectory());
+      return homeDir;
+    } catch (IOException ioe) {
+      throw new FTPException("Failed to get home directory", ioe);
+    } finally {
+      try {
+        disconnect(client);
+      } catch (IOException ioe) {
+        throw new FTPException("Failed to disconnect", ioe);
+      }
+    }
+  }
+
+  @Override
+  public void setWorkingDirectory(Path newDir) {
+    // we do not maintain the working directory state
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/ftp/FTPInputStream.java b/src/java/org/apache/hadoop/fs/ftp/FTPInputStream.java
new file mode 100644
index 00000000000..f1b78955ae2
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/ftp/FTPInputStream.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.ftp;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.net.ftp.FTPClient;
+import org.apache.hadoop.fs.FSInputStream;
+import org.apache.hadoop.fs.FileSystem;
+
+public class FTPInputStream extends FSInputStream {
+
+  InputStream wrappedStream;
+  FTPClient client;
+  FileSystem.Statistics stats;
+  boolean closed;
+  long pos;
+
+  public FTPInputStream(InputStream stream, FTPClient client,
+      FileSystem.Statistics stats) {
+    if (stream == null) {
+      throw new IllegalArgumentException("Null InputStream");
+    }
+    if (client == null || !client.isConnected()) {
+      throw new IllegalArgumentException("FTP client null or not connected");
+    }
+    this.wrappedStream = stream;
+    this.client = client;
+    this.stats = stats;
+    this.pos = 0;
+    this.closed = false;
+  }
+
+  public long getPos() throws IOException {
+    return pos;
+  }
+
+  // We don't support seek.
+  public void seek(long pos) throws IOException {
+    throw new IOException("Seek not supported");
+  }
+
+  public boolean seekToNewSource(long targetPos) throws IOException {
+    throw new IOException("Seek not supported");
+  }
+
+  public synchronized int read() throws IOException {
+    if (closed) {
+      throw new IOException("Stream closed");
+    }
+
+    int byteRead = wrappedStream.read();
+    if (byteRead >= 0) {
+      pos++;
+    }
+    if (stats != null & byteRead >= 0) {
+      stats.incrementBytesRead(1);
+    }
+    return byteRead;
+  }
+
+  public synchronized int read(byte buf[], int off, int len) throws IOException {
+    if (closed) {
+      throw new IOException("Stream closed");
+    }
+
+    int result = wrappedStream.read(buf, off, len);
+    if (result > 0) {
+      pos += result;
+    }
+    if (stats != null & result > 0) {
+      stats.incrementBytesRead(result);
+    }
+
+    return result;
+  }
+
+  public synchronized void close() throws IOException {
+    if (closed) {
+      throw new IOException("Stream closed");
+    }
+    super.close();
+    closed = true;
+    if (!client.isConnected()) {
+      throw new FTPException("Client not connected");
+    }
+
+    boolean cmdCompleted = client.completePendingCommand();
+    client.logout();
+    client.disconnect();
+    if (!cmdCompleted) {
+      throw new FTPException("Could not complete transfer, Reply Code - "
+          + client.getReplyCode());
+    }
+  }
+
+  // Not supported.
+
+  public boolean markSupported() {
+    return false;
+  }
+
+  public void mark(int readLimit) {
+    // Do nothing
+  }
+
+  public void reset() throws IOException {
+    throw new IOException("Mark not supported");
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/kfs/IFSImpl.java b/src/java/org/apache/hadoop/fs/kfs/IFSImpl.java
new file mode 100644
index 00000000000..f2a773663ea
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/kfs/IFSImpl.java
@@ -0,0 +1,60 @@
+/**
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ * @author: Sriram Rao (Kosmix Corp.)
+ * 
+ * We need to provide the ability to the code in fs/kfs without really
+ * having a KFS deployment.  In particular, the glue code that wraps
+ * around calls to KfsAccess object.  This is accomplished by defining a
+ * filesystem implementation interface:  
+ *   -- for testing purposes, a dummy implementation of this interface
+ * will suffice; as long as the dummy implementation is close enough
+ * to doing what KFS does, we are good.
+ *   -- for deployment purposes with KFS, this interface is
+ * implemented by the KfsImpl object.
+ */
+
+package org.apache.hadoop.fs.kfs;
+
+import java.io.*;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.Progressable;
+
+interface IFSImpl {
+    public boolean exists(String path) throws IOException;
+    public boolean isDirectory(String path) throws IOException;
+    public boolean isFile(String path) throws IOException;
+    public String[] readdir(String path) throws IOException;
+    public FileStatus[] readdirplus(Path path) throws IOException;
+
+    public int mkdirs(String path) throws IOException;
+    public int rename(String source, String dest) throws IOException;
+
+    public int rmdir(String path) throws IOException; 
+    public int remove(String path) throws IOException;
+    public long filesize(String path) throws IOException;
+    public short getReplication(String path) throws IOException;
+    public short setReplication(String path, short replication) throws IOException;
+    public String[][] getDataLocation(String path, long start, long len) throws IOException;
+
+    public long getModificationTime(String path) throws IOException;
+    public FSDataOutputStream create(String path, short replication, int bufferSize, Progressable progress) throws IOException;
+    public FSDataInputStream open(String path, int bufferSize) throws IOException;
+    public FSDataOutputStream append(String path, int bufferSize, Progressable progress) throws IOException;
+    
+};
diff --git a/src/java/org/apache/hadoop/fs/kfs/KFSImpl.java b/src/java/org/apache/hadoop/fs/kfs/KFSImpl.java
new file mode 100644
index 00000000000..bc66ec2570a
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/kfs/KFSImpl.java
@@ -0,0 +1,151 @@
+/**
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ * @author: Sriram Rao (Kosmix Corp.)
+ * 
+ * Provide the implementation of KFS which turn into calls to KfsAccess.
+ */
+
+package org.apache.hadoop.fs.kfs;
+
+import java.io.*;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+
+import org.kosmix.kosmosfs.access.KfsAccess;
+import org.kosmix.kosmosfs.access.KfsFileAttr;
+import org.apache.hadoop.util.Progressable;
+
+class KFSImpl implements IFSImpl {
+    private KfsAccess kfsAccess = null;
+    private FileSystem.Statistics statistics;
+
+    @Deprecated
+    public KFSImpl(String metaServerHost, int metaServerPort
+                   ) throws IOException {
+      this(metaServerHost, metaServerPort, null);
+    }
+
+    public KFSImpl(String metaServerHost, int metaServerPort, 
+                   FileSystem.Statistics stats) throws IOException {
+        kfsAccess = new KfsAccess(metaServerHost, metaServerPort);
+        statistics = stats;
+    }
+
+    public boolean exists(String path) throws IOException {
+        return kfsAccess.kfs_exists(path);
+    }
+
+    public boolean isDirectory(String path) throws IOException {
+        return kfsAccess.kfs_isDirectory(path);
+    }
+
+    public boolean isFile(String path) throws IOException {
+        return kfsAccess.kfs_isFile(path);
+    }
+
+    public String[] readdir(String path) throws IOException {
+        return kfsAccess.kfs_readdir(path);
+    }
+
+    public FileStatus[] readdirplus(Path path) throws IOException {
+        String srep = path.toUri().getPath();
+        KfsFileAttr[] fattr = kfsAccess.kfs_readdirplus(srep);
+        if (fattr == null)
+            return null;
+        int numEntries = 0;
+        for (int i = 0; i < fattr.length; i++) {
+            if ((fattr[i].filename.compareTo(".") == 0) || (fattr[i].filename.compareTo("..") == 0))
+                continue;
+            numEntries++;
+        }
+        FileStatus[] fstatus = new FileStatus[numEntries];
+        int j = 0;
+        for (int i = 0; i < fattr.length; i++) {
+            if ((fattr[i].filename.compareTo(".") == 0) || (fattr[i].filename.compareTo("..") == 0))
+                continue;
+            Path fn = new Path(path, fattr[i].filename);
+
+            if (fattr[i].isDirectory)
+                fstatus[j] = new FileStatus(0, true, 1, 0, fattr[i].modificationTime, fn);
+            else
+                fstatus[j] = new FileStatus(fattr[i].filesize, fattr[i].isDirectory,
+                                            fattr[i].replication,
+                                            (long)
+                                            (1 << 26),
+                                            fattr[i].modificationTime,
+                                            fn);
+
+            j++;
+        }
+        return fstatus;
+    }
+
+
+    public int mkdirs(String path) throws IOException {
+        return kfsAccess.kfs_mkdirs(path);
+    }
+
+    public int rename(String source, String dest) throws IOException {
+        return kfsAccess.kfs_rename(source, dest);
+    }
+
+    public int rmdir(String path) throws IOException {
+        return kfsAccess.kfs_rmdir(path);
+    }
+
+    public int remove(String path) throws IOException {
+        return kfsAccess.kfs_remove(path);
+    }
+
+    public long filesize(String path) throws IOException {
+        return kfsAccess.kfs_filesize(path);
+    }
+
+    public short getReplication(String path) throws IOException {
+        return kfsAccess.kfs_getReplication(path);
+    }
+
+    public short setReplication(String path, short replication) throws IOException {
+        return kfsAccess.kfs_setReplication(path, replication);
+    }
+
+    public String[][] getDataLocation(String path, long start, long len) throws IOException {
+        return kfsAccess.kfs_getDataLocation(path, start, len);
+    }
+
+    public long getModificationTime(String path) throws IOException {
+        return kfsAccess.kfs_getModificationTime(path);
+    }
+
+    public FSDataInputStream open(String path, int bufferSize) throws IOException {
+        return new FSDataInputStream(new KFSInputStream(kfsAccess, path, 
+                                                        statistics));
+    }
+
+    public FSDataOutputStream create(String path, short replication, int bufferSize, Progressable progress) throws IOException {
+        return new FSDataOutputStream(new KFSOutputStream(kfsAccess, path, replication, false, progress), 
+                                      statistics);
+    }
+
+    public FSDataOutputStream append(String path, int bufferSize, Progressable progress) throws IOException {
+        // when opening for append, # of replicas is ignored
+        return new FSDataOutputStream(new KFSOutputStream(kfsAccess, path, (short) 1, true, progress), 
+                                      statistics);
+    }
+}
diff --git a/src/java/org/apache/hadoop/fs/kfs/KFSInputStream.java b/src/java/org/apache/hadoop/fs/kfs/KFSInputStream.java
new file mode 100644
index 00000000000..bb2c32c31bd
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/kfs/KFSInputStream.java
@@ -0,0 +1,130 @@
+/**
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ * @author: Sriram Rao (Kosmix Corp.)
+ * 
+ * Implements the Hadoop FSInputStream interfaces to allow applications to read
+ * files in Kosmos File System (KFS).
+ */
+
+package org.apache.hadoop.fs.kfs;
+
+import java.io.*;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FSInputStream;
+
+import org.kosmix.kosmosfs.access.KfsAccess;
+import org.kosmix.kosmosfs.access.KfsInputChannel;
+
+class KFSInputStream extends FSInputStream {
+
+    private KfsInputChannel kfsChannel;
+    private FileSystem.Statistics statistics;
+    private long fsize;
+
+    @Deprecated
+    public KFSInputStream(KfsAccess kfsAccess, String path) {
+      this(kfsAccess, path, null);
+    }
+
+    public KFSInputStream(KfsAccess kfsAccess, String path,
+                            FileSystem.Statistics stats) {
+        this.statistics = stats;
+        this.kfsChannel = kfsAccess.kfs_open(path);
+        if (this.kfsChannel != null)
+            this.fsize = kfsAccess.kfs_filesize(path);
+        else
+            this.fsize = 0;
+    }
+
+    public long getPos() throws IOException {
+        if (kfsChannel == null) {
+            throw new IOException("File closed");
+        }
+        return kfsChannel.tell();
+    }
+
+    public synchronized int available() throws IOException {
+        if (kfsChannel == null) {
+            throw new IOException("File closed");
+        }
+        return (int) (this.fsize - getPos());
+    }
+
+    public synchronized void seek(long targetPos) throws IOException {
+        if (kfsChannel == null) {
+            throw new IOException("File closed");
+        }
+        kfsChannel.seek(targetPos);
+    }
+
+    public synchronized boolean seekToNewSource(long targetPos) throws IOException {
+        return false;
+    }
+
+    public synchronized int read() throws IOException {
+        if (kfsChannel == null) {
+            throw new IOException("File closed");
+        }
+        byte b[] = new byte[1];
+        int res = read(b, 0, 1);
+        if (res == 1) {
+          if (statistics != null) {
+            statistics.incrementBytesRead(1);
+          }
+          return b[0] & 0xff;
+        }
+        return -1;
+    }
+
+    public synchronized int read(byte b[], int off, int len) throws IOException {
+        if (kfsChannel == null) {
+            throw new IOException("File closed");
+        }
+	int res;
+
+	res = kfsChannel.read(ByteBuffer.wrap(b, off, len));
+	// Use -1 to signify EOF
+	if (res == 0)
+	    return -1;
+	if (statistics != null) {
+	  statistics.incrementBytesRead(res);
+	}
+	return res;
+    }
+
+    public synchronized void close() throws IOException {
+        if (kfsChannel == null) {
+            return;
+        }
+
+        kfsChannel.close();
+        kfsChannel = null;
+    }
+
+    public boolean markSupported() {
+        return false;
+    }
+
+    public void mark(int readLimit) {
+        // Do nothing
+    }
+
+    public void reset() throws IOException {
+        throw new IOException("Mark not supported");
+    }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/kfs/KFSOutputStream.java b/src/java/org/apache/hadoop/fs/kfs/KFSOutputStream.java
new file mode 100644
index 00000000000..e55f4205d8f
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/kfs/KFSOutputStream.java
@@ -0,0 +1,97 @@
+/**
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ * @author: Sriram Rao (Kosmix Corp.)
+ * 
+ * Implements the Hadoop FSOutputStream interfaces to allow applications to write to
+ * files in Kosmos File System (KFS).
+ */
+
+package org.apache.hadoop.fs.kfs;
+
+import java.io.*;
+import java.net.*;
+import java.util.*;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.util.Progressable;
+
+import org.kosmix.kosmosfs.access.KfsAccess;
+import org.kosmix.kosmosfs.access.KfsOutputChannel;
+
+class KFSOutputStream extends OutputStream {
+
+    private String path;
+    private KfsOutputChannel kfsChannel;
+    private Progressable progressReporter;
+
+    public KFSOutputStream(KfsAccess kfsAccess, String path, short replication,
+                           boolean append, Progressable prog) {
+        this.path = path;
+
+        if ((append) && (kfsAccess.kfs_isFile(path)))
+                this.kfsChannel = kfsAccess.kfs_append(path);
+        else
+                this.kfsChannel = kfsAccess.kfs_create(path, replication);
+        this.progressReporter = prog;
+    }
+
+    public long getPos() throws IOException {
+        if (kfsChannel == null) {
+            throw new IOException("File closed");
+        }
+        return kfsChannel.tell();
+    }
+
+    public void write(int v) throws IOException {
+        if (kfsChannel == null) {
+            throw new IOException("File closed");
+        }
+        byte[] b = new byte[1];
+
+        b[0] = (byte) v;
+        write(b, 0, 1);
+    }
+
+    public void write(byte b[], int off, int len) throws IOException {
+        if (kfsChannel == null) {
+            throw new IOException("File closed");
+        }
+
+        // touch the progress before going into KFS since the call can block
+        progressReporter.progress();
+        kfsChannel.write(ByteBuffer.wrap(b, off, len));
+    }
+
+    public void flush() throws IOException {
+        if (kfsChannel == null) {
+            throw new IOException("File closed");
+        }
+        // touch the progress before going into KFS since the call can block
+        progressReporter.progress();
+        kfsChannel.sync();
+    }
+
+    public synchronized void close() throws IOException {
+        if (kfsChannel == null) {
+            return;
+        }
+        flush();
+        kfsChannel.close();
+        kfsChannel = null;
+    }
+}
diff --git a/src/java/org/apache/hadoop/fs/kfs/KosmosFileSystem.java b/src/java/org/apache/hadoop/fs/kfs/KosmosFileSystem.java
new file mode 100644
index 00000000000..57b27a2a0e9
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/kfs/KosmosFileSystem.java
@@ -0,0 +1,340 @@
+/**
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ * @author: Sriram Rao (Kosmix Corp.)
+ * 
+ * Implements the Hadoop FS interfaces to allow applications to store
+ *files in Kosmos File System (KFS).
+ */
+
+package org.apache.hadoop.fs.kfs;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+
+/**
+ * A FileSystem backed by KFS.
+ *
+ */
+
+public class KosmosFileSystem extends FileSystem {
+
+    private FileSystem localFs;
+    private IFSImpl kfsImpl = null;
+    private URI uri;
+    private Path workingDir = new Path("/");
+
+    public KosmosFileSystem() {
+
+    }
+
+    KosmosFileSystem(IFSImpl fsimpl) {
+        this.kfsImpl = fsimpl;
+    }
+
+    @Override
+    public URI getUri() {
+	return uri;
+    }
+
+    @Override
+    public void initialize(URI uri, Configuration conf) throws IOException {
+      super.initialize(uri, conf);
+      try {
+        if (kfsImpl == null) {
+          if (uri.getHost() == null) {
+            kfsImpl = new KFSImpl(conf.get("fs.kfs.metaServerHost", ""),
+                                  conf.getInt("fs.kfs.metaServerPort", -1),
+                                  statistics);
+          } else {
+            kfsImpl = new KFSImpl(uri.getHost(), uri.getPort(), statistics);
+          }
+        }
+
+        this.localFs = FileSystem.getLocal(conf);
+        this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
+        this.workingDir = new Path("/user", System.getProperty("user.name")
+                                   ).makeQualified(this);
+        setConf(conf);
+
+      } catch (Exception e) {
+        e.printStackTrace();
+        System.out.println("Unable to initialize KFS");
+        System.exit(-1);
+      }
+    }
+
+    @Override
+    public Path getWorkingDirectory() {
+	return workingDir;
+    }
+
+    @Override
+    public void setWorkingDirectory(Path dir) {
+	workingDir = makeAbsolute(dir);
+    }
+
+    private Path makeAbsolute(Path path) {
+	if (path.isAbsolute()) {
+	    return path;
+	}
+	return new Path(workingDir, path);
+    }
+
+    @Override
+    public boolean mkdirs(Path path, FsPermission permission
+        ) throws IOException {
+	Path absolute = makeAbsolute(path);
+        String srep = absolute.toUri().getPath();
+
+	int res;
+
+	// System.out.println("Calling mkdirs on: " + srep);
+
+	res = kfsImpl.mkdirs(srep);
+	
+	return res == 0;
+    }
+
+    @Override
+    @Deprecated
+    public boolean isDirectory(Path path) throws IOException {
+	Path absolute = makeAbsolute(path);
+        String srep = absolute.toUri().getPath();
+
+	// System.out.println("Calling isdir on: " + srep);
+
+        return kfsImpl.isDirectory(srep);
+    }
+
+    @Override
+    @Deprecated
+    public boolean isFile(Path path) throws IOException {
+	Path absolute = makeAbsolute(path);
+        String srep = absolute.toUri().getPath();
+        return kfsImpl.isFile(srep);
+    }
+
+    @Override
+    public FileStatus[] listStatus(Path path) throws IOException {
+        Path absolute = makeAbsolute(path);
+        String srep = absolute.toUri().getPath();
+
+        if (kfsImpl.isFile(srep))
+                return new FileStatus[] { getFileStatus(path) } ;
+
+        return kfsImpl.readdirplus(absolute);
+    }
+
+    @Override
+    public FileStatus getFileStatus(Path path) throws IOException {
+	Path absolute = makeAbsolute(path);
+        String srep = absolute.toUri().getPath();
+        if (!kfsImpl.exists(srep)) {
+          throw new FileNotFoundException("File " + path + " does not exist.");
+        }
+        if (kfsImpl.isDirectory(srep)) {
+            // System.out.println("Status of path: " + path + " is dir");
+            return new FileStatus(0, true, 1, 0, kfsImpl.getModificationTime(srep), 
+                                  path.makeQualified(this));
+        } else {
+            // System.out.println("Status of path: " + path + " is file");
+            return new FileStatus(kfsImpl.filesize(srep), false, 
+                                  kfsImpl.getReplication(srep),
+                                  getDefaultBlockSize(),
+                                  kfsImpl.getModificationTime(srep),
+                                  path.makeQualified(this));
+        }
+    }
+    
+    @Override
+    public FSDataOutputStream append(Path f, int bufferSize,
+        Progressable progress) throws IOException {
+        Path parent = f.getParent();
+        if (parent != null && !mkdirs(parent)) {
+            throw new IOException("Mkdirs failed to create " + parent);
+        }
+
+        Path absolute = makeAbsolute(f);
+        String srep = absolute.toUri().getPath();
+
+        return kfsImpl.append(srep, bufferSize, progress);
+    }
+
+    @Override
+    public FSDataOutputStream create(Path file, FsPermission permission,
+                                     boolean overwrite, int bufferSize,
+				     short replication, long blockSize, Progressable progress)
+	throws IOException {
+
+        if (exists(file)) {
+            if (overwrite) {
+                delete(file, true);
+            } else {
+                throw new IOException("File already exists: " + file);
+            }
+        }
+
+	Path parent = file.getParent();
+	if (parent != null && !mkdirs(parent)) {
+	    throw new IOException("Mkdirs failed to create " + parent);
+	}
+
+        Path absolute = makeAbsolute(file);
+        String srep = absolute.toUri().getPath();
+
+        return kfsImpl.create(srep, replication, bufferSize, progress);
+    }
+
+    @Override
+    public FSDataInputStream open(Path path, int bufferSize) throws IOException {
+        if (!exists(path))
+            throw new IOException("File does not exist: " + path);
+
+        Path absolute = makeAbsolute(path);
+        String srep = absolute.toUri().getPath();
+
+        return kfsImpl.open(srep, bufferSize);
+    }
+
+    @Override
+    public boolean rename(Path src, Path dst) throws IOException {
+	Path absoluteS = makeAbsolute(src);
+        String srepS = absoluteS.toUri().getPath();
+	Path absoluteD = makeAbsolute(dst);
+        String srepD = absoluteD.toUri().getPath();
+
+        // System.out.println("Calling rename on: " + srepS + " -> " + srepD);
+
+        return kfsImpl.rename(srepS, srepD) == 0;
+    }
+
+    // recursively delete the directory and its contents
+    @Override
+    public boolean delete(Path path, boolean recursive) throws IOException {
+      Path absolute = makeAbsolute(path);
+      String srep = absolute.toUri().getPath();
+      if (kfsImpl.isFile(srep))
+        return kfsImpl.remove(srep) == 0;
+
+      FileStatus[] dirEntries = listStatus(absolute);
+      if ((!recursive) && (dirEntries != null) && 
+            (dirEntries.length != 0)) {
+        throw new IOException("Directory " + path.toString() + 
+        " is not empty.");
+      }
+      if (dirEntries != null) {
+        for (int i = 0; i < dirEntries.length; i++) {
+          delete(new Path(absolute, dirEntries[i].getPath()), recursive);
+        }
+      }
+      return kfsImpl.rmdir(srep) == 0;
+    }
+    
+    @Override
+    public short getDefaultReplication() {
+	return 3;
+    }
+
+    @Override
+    public boolean setReplication(Path path, short replication)
+	throws IOException {
+
+	Path absolute = makeAbsolute(path);
+        String srep = absolute.toUri().getPath();
+
+        int res = kfsImpl.setReplication(srep, replication);
+        return res >= 0;
+    }
+
+    // 64MB is the KFS block size
+
+    @Override
+    public long getDefaultBlockSize() {
+	return 1 << 26;
+    }
+
+    @Deprecated            
+    public void lock(Path path, boolean shared) throws IOException {
+
+    }
+
+    @Deprecated            
+    public void release(Path path) throws IOException {
+
+    }
+
+    /**
+     * Return null if the file doesn't exist; otherwise, get the
+     * locations of the various chunks of the file file from KFS.
+     */
+    @Override
+    public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
+        long len) throws IOException {
+
+      if (file == null) {
+        return null;
+      }
+      String srep = makeAbsolute(file.getPath()).toUri().getPath();
+      String[][] hints = kfsImpl.getDataLocation(srep, start, len);
+      if (hints == null) {
+        return null;
+      }
+      BlockLocation[] result = new BlockLocation[hints.length];
+      long blockSize = getDefaultBlockSize();
+      long length = len;
+      long blockStart = start;
+      for(int i=0; i < result.length; ++i) {
+        result[i] = new BlockLocation(null, hints[i], blockStart, 
+                                      length < blockSize ? length : blockSize);
+        blockStart += blockSize;
+        length -= blockSize;
+      }
+      return result;
+    }
+
+    @Override
+    public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
+	FileUtil.copy(localFs, src, this, dst, delSrc, getConf());
+    }
+
+    @Override
+    public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
+	FileUtil.copy(this, src, localFs, dst, delSrc, getConf());
+    }
+
+    @Override
+    public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+	throws IOException {
+	return tmpLocalFile;
+    }
+
+    @Override
+    public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+	throws IOException {
+	moveFromLocalFile(tmpLocalFile, fsOutputFile);
+    }
+}
diff --git a/src/java/org/apache/hadoop/fs/kfs/package.html b/src/java/org/apache/hadoop/fs/kfs/package.html
new file mode 100644
index 00000000000..365b60b4fa5
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/kfs/package.html
@@ -0,0 +1,98 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<head></head>
+<body>
+<h1>A client for the Kosmos filesystem (KFS)</h1>
+
+<h3>Introduction</h3>
+
+This pages describes how to use Kosmos Filesystem 
+(<a href="http://kosmosfs.sourceforge.net"> KFS </a>) as a backing
+store with Hadoop.   This page assumes that you have downloaded the
+KFS software and installed necessary binaries as outlined in the KFS
+documentation.
+
+<h3>Steps</h3>
+
+        <ul>
+          <li>In the Hadoop conf directory edit core-site.xml,
+          add the following:
+            <pre>
+&lt;property&gt;
+  &lt;name&gt;fs.kfs.impl&lt;/name&gt;
+  &lt;value&gt;org.apache.hadoop.fs.kfs.KosmosFileSystem&lt;/value&gt;
+  &lt;description&gt;The FileSystem for kfs: uris.&lt;/description&gt;
+&lt;/property&gt;
+            </pre>
+
+          <li>In the Hadoop conf directory edit core-site.xml,
+          adding the following (with appropriate values for
+          &lt;server&gt; and &lt;port&gt;):
+            <pre>
+&lt;property&gt;
+  &lt;name&gt;fs.default.name&lt;/name&gt;
+  &lt;value&gt;kfs://&lt;server:port&gt;&lt;/value&gt; 
+&lt;/property&gt;
+
+&lt;property&gt;
+  &lt;name&gt;fs.kfs.metaServerHost&lt;/name&gt;
+  &lt;value&gt;&lt;server&gt;&lt;/value&gt;
+  &lt;description&gt;The location of the KFS meta server.&lt;/description&gt;
+&lt;/property&gt;
+
+&lt;property&gt;
+  &lt;name&gt;fs.kfs.metaServerPort&lt;/name&gt;
+  &lt;value&gt;&lt;port&gt;&lt;/value&gt;
+  &lt;description&gt;The location of the meta server's port.&lt;/description&gt;
+&lt;/property&gt;
+
+</pre>
+          </li>
+
+          <li>Copy KFS's <i> kfs-0.1.jar </i> to Hadoop's lib directory.  This step
+          enables Hadoop's to load the KFS specific modules.  Note
+          that, kfs-0.1.jar was built when you compiled KFS source
+          code.  This jar file contains code that calls KFS's client
+          library code via JNI; the native code is in KFS's <i>
+          libkfsClient.so </i> library.
+          </li>
+
+          <li> When the Hadoop map/reduce trackers start up, those
+processes (on local as well as remote nodes) will now need to load
+KFS's <i> libkfsClient.so </i> library.  To simplify this process, it is advisable to
+store libkfsClient.so in an NFS accessible directory (similar to where
+Hadoop binaries/scripts are stored); then, modify Hadoop's
+conf/hadoop-env.sh adding the following line and providing suitable
+value for &lt;path&gt;:
+<pre>
+export LD_LIBRARY_PATH=&lt;path&gt;
+</pre>
+
+
+          <li>Start only the map/reduce trackers
+          <br />
+          example: execute Hadoop's bin/start-mapred.sh</li>
+        </ul>
+<br/>
+
+If the map/reduce job trackers start up, all file-I/O is done to KFS.
+
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/fs/package.html b/src/java/org/apache/hadoop/fs/package.html
new file mode 100644
index 00000000000..71bfdc8a40f
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/package.html
@@ -0,0 +1,23 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+An abstract file system API.
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/fs/permission/AccessControlException.java b/src/java/org/apache/hadoop/fs/permission/AccessControlException.java
new file mode 100644
index 00000000000..49880f9dcdd
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/permission/AccessControlException.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.permission;
+
+import java.io.IOException;
+
+/**
+ * An exception class for access control related issues.
+ * @deprecated Use {@link org.apache.hadoop.security.AccessControlException} 
+ *             instead.
+ */
+@Deprecated
+public class AccessControlException extends IOException {
+  //Required by {@link java.io.Serializable}.
+  private static final long serialVersionUID = 1L;
+
+  /**
+   * Default constructor is needed for unwrapping from 
+   * {@link org.apache.hadoop.ipc.RemoteException}.
+   */
+  public AccessControlException() {
+    super("Permission denied.");
+  }
+
+  /**
+   * Constructs an {@link AccessControlException}
+   * with the specified detail message.
+   * @param s the detail message.
+   */
+  public AccessControlException(String s) {
+    super(s);
+  }
+  
+  /**
+   * Constructs a new exception with the specified cause and a detail
+   * message of <tt>(cause==null ? null : cause.toString())</tt> (which
+   * typically contains the class and detail message of <tt>cause</tt>).
+   * @param  cause the cause (which is saved for later retrieval by the
+   *         {@link #getCause()} method).  (A <tt>null</tt> value is
+   *         permitted, and indicates that the cause is nonexistent or
+   *         unknown.)
+   */
+  public AccessControlException(Throwable cause) {
+    super(cause);
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/permission/FsAction.java b/src/java/org/apache/hadoop/fs/permission/FsAction.java
new file mode 100644
index 00000000000..5aafd21b33a
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/permission/FsAction.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.permission;
+
+/**
+ * File system actions, e.g. read, write, etc.
+ */
+public enum FsAction {
+  // POSIX style
+  NONE("---"),
+  EXECUTE("--x"),
+  WRITE("-w-"),
+  WRITE_EXECUTE("-wx"),
+  READ("r--"),
+  READ_EXECUTE("r-x"),
+  READ_WRITE("rw-"),
+  ALL("rwx");
+
+  /** Retain reference to value array. */
+  private final static FsAction[] vals = values();
+
+  /** Symbolic representation */
+  public final String SYMBOL;
+
+  private FsAction(String s) {
+    SYMBOL = s;
+  }
+
+  /**
+   * Return true if this action implies that action.
+   * @param that
+   */
+  public boolean implies(FsAction that) {
+    if (that != null) {
+      return (ordinal() & that.ordinal()) == that.ordinal();
+    }
+    return false;
+  }
+
+  /** AND operation. */
+  public FsAction and(FsAction that) {
+    return vals[ordinal() & that.ordinal()];
+  }
+  /** OR operation. */
+  public FsAction or(FsAction that) {
+    return vals[ordinal() | that.ordinal()];
+  }
+  /** NOT operation. */
+  public FsAction not() {
+    return vals[7 - ordinal()];
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/permission/FsPermission.java b/src/java/org/apache/hadoop/fs/permission/FsPermission.java
new file mode 100644
index 00000000000..e92d35bceac
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/permission/FsPermission.java
@@ -0,0 +1,232 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.permission;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.*;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * A class for file/directory permissions.
+ */
+public class FsPermission implements Writable {
+  static final WritableFactory FACTORY = new WritableFactory() {
+    public Writable newInstance() { return new FsPermission(); }
+  };
+  static {                                      // register a ctor
+    WritableFactories.setFactory(FsPermission.class, FACTORY);
+  }
+
+  /** Create an immutable {@link FsPermission} object. */
+  public static FsPermission createImmutable(short permission) {
+    return new FsPermission(permission) {
+      public FsPermission applyUMask(FsPermission umask) {
+        throw new UnsupportedOperationException();
+      }
+      public void readFields(DataInput in) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+    };
+  }
+
+  //POSIX permission style
+  private FsAction useraction = null;
+  private FsAction groupaction = null;
+  private FsAction otheraction = null;
+  private boolean stickyBit = false;
+
+  private FsPermission() {}
+
+  /**
+   * Construct by the given {@link FsAction}.
+   * @param u user action
+   * @param g group action
+   * @param o other action
+   */
+  public FsPermission(FsAction u, FsAction g, FsAction o) {
+    this(u, g, o, false);
+  }
+
+  public FsPermission(FsAction u, FsAction g, FsAction o, boolean sb) {
+    set(u, g, o, sb);
+  }
+
+  /**
+   * Construct by the given mode.
+   * @param mode
+   * @see #toShort()
+   */
+  public FsPermission(short mode) { fromShort(mode); }
+
+  /**
+   * Copy constructor
+   * 
+   * @param other other permission
+   */
+  public FsPermission(FsPermission other) {
+    this.useraction = other.useraction;
+    this.groupaction = other.groupaction;
+    this.otheraction = other.otheraction;
+  }
+  
+  /** Return user {@link FsAction}. */
+  public FsAction getUserAction() {return useraction;}
+
+  /** Return group {@link FsAction}. */
+  public FsAction getGroupAction() {return groupaction;}
+
+  /** Return other {@link FsAction}. */
+  public FsAction getOtherAction() {return otheraction;}
+
+  private void set(FsAction u, FsAction g, FsAction o, boolean sb) {
+    useraction = u;
+    groupaction = g;
+    otheraction = o;
+    stickyBit = sb;
+  }
+
+  public void fromShort(short n) {
+    FsAction[] v = FsAction.values();
+
+    set(v[(n >>> 6) & 7], v[(n >>> 3) & 7], v[n & 7], (((n >>> 9) & 1) == 1) );
+  }
+
+  /** {@inheritDoc} */
+  public void write(DataOutput out) throws IOException {
+    out.writeShort(toShort());
+  }
+
+  /** {@inheritDoc} */
+  public void readFields(DataInput in) throws IOException {
+    fromShort(in.readShort());
+  }
+
+  /**
+   * Create and initialize a {@link FsPermission} from {@link DataInput}.
+   */
+  public static FsPermission read(DataInput in) throws IOException {
+    FsPermission p = new FsPermission();
+    p.readFields(in);
+    return p;
+  }
+
+  /**
+   * Encode the object to a short.
+   */
+  public short toShort() {
+    int s =  (stickyBit ? 1 << 9 : 0)     |
+             (useraction.ordinal() << 6)  |
+             (groupaction.ordinal() << 3) |
+             otheraction.ordinal();
+
+    return (short)s;
+  }
+
+  /** {@inheritDoc} */
+  public boolean equals(Object obj) {
+    if (obj instanceof FsPermission) {
+      FsPermission that = (FsPermission)obj;
+      return this.useraction == that.useraction
+          && this.groupaction == that.groupaction
+          && this.otheraction == that.otheraction
+          && this.stickyBit == that.stickyBit;
+    }
+    return false;
+  }
+
+  /** {@inheritDoc} */
+  public int hashCode() {return toShort();}
+
+  /** {@inheritDoc} */
+  public String toString() {
+    String str = useraction.SYMBOL + groupaction.SYMBOL + otheraction.SYMBOL;
+    if(stickyBit) {
+      StringBuilder str2 = new StringBuilder(str);
+      str2.replace(str2.length() - 1, str2.length(),
+           otheraction.implies(FsAction.EXECUTE) ? "t" : "T");
+      str = str2.toString();
+    }
+
+    return str;
+  }
+
+  /** Apply a umask to this permission and return a new one */
+  public FsPermission applyUMask(FsPermission umask) {
+    return new FsPermission(useraction.and(umask.useraction.not()),
+        groupaction.and(umask.groupaction.not()),
+        otheraction.and(umask.otheraction.not()));
+  }
+
+  /** umask property label */
+  public static final String UMASK_LABEL = "dfs.umask";
+  public static final int DEFAULT_UMASK = 0022;
+
+  /** Get the user file creation mask (umask) */
+  public static FsPermission getUMask(Configuration conf) {
+    int umask = DEFAULT_UMASK;
+    if (conf != null) {
+      umask = conf.getInt(UMASK_LABEL, DEFAULT_UMASK);
+    }
+    return new FsPermission((short)umask);
+  }
+
+  public boolean getStickyBit() {
+    return stickyBit;
+  }
+
+  /** Set the user file creation mask (umask) */
+  public static void setUMask(Configuration conf, FsPermission umask) {
+    conf.setInt(UMASK_LABEL, umask.toShort());
+  }
+
+  /** Get the default permission. */
+  public static FsPermission getDefault() {
+    return new FsPermission((short)00777);
+  }
+
+  /**
+   * Create a FsPermission from a Unix symbolic permission string
+   * @param unixSymbolicPermission e.g. "-rw-rw-rw-"
+   */
+  public static FsPermission valueOf(String unixSymbolicPermission) {
+    if (unixSymbolicPermission == null) {
+      return null;
+    }
+    else if (unixSymbolicPermission.length() != 10) {
+      throw new IllegalArgumentException("length != 10(unixSymbolicPermission="
+          + unixSymbolicPermission + ")");
+    }
+
+    int n = 0;
+    for(int i = 1; i < unixSymbolicPermission.length(); i++) {
+      n = n << 1;
+      char c = unixSymbolicPermission.charAt(i);
+      n += (c == '-' || c == 'T' || c == 'S') ? 0: 1;
+    }
+
+    // Add sticky bit value if set
+    if(unixSymbolicPermission.charAt(9) == 't' ||
+        unixSymbolicPermission.charAt(9) == 'T')
+      n += 01000;
+
+    return new FsPermission((short)n);
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/permission/PermissionStatus.java b/src/java/org/apache/hadoop/fs/permission/PermissionStatus.java
new file mode 100644
index 00000000000..4f36abbe625
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/permission/PermissionStatus.java
@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.permission;
+
+import org.apache.hadoop.io.*;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * Store permission related information.
+ */
+public class PermissionStatus implements Writable {
+  static final WritableFactory FACTORY = new WritableFactory() {
+    public Writable newInstance() { return new PermissionStatus(); }
+  };
+  static {                                      // register a ctor
+    WritableFactories.setFactory(PermissionStatus.class, FACTORY);
+  }
+
+  /** Create an immutable {@link PermissionStatus} object. */
+  public static PermissionStatus createImmutable(
+      String user, String group, FsPermission permission) {
+    return new PermissionStatus(user, group, permission) {
+      public PermissionStatus applyUMask(FsPermission umask) {
+        throw new UnsupportedOperationException();
+      }
+      public void readFields(DataInput in) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+    };
+  }
+
+  private String username;
+  private String groupname;
+  private FsPermission permission;
+
+  private PermissionStatus() {}
+
+  /** Constructor */
+  public PermissionStatus(String user, String group, FsPermission permission) {
+    username = user;
+    groupname = group;
+    this.permission = permission;
+  }
+
+  /** Return user name */
+  public String getUserName() {return username;}
+
+  /** Return group name */
+  public String getGroupName() {return groupname;}
+
+  /** Return permission */
+  public FsPermission getPermission() {return permission;}
+
+  /**
+   * Apply umask.
+   * @see FsPermission#applyUMask(FsPermission)
+   */
+  public PermissionStatus applyUMask(FsPermission umask) {
+    permission = permission.applyUMask(umask);
+    return this;
+  }
+
+  /** {@inheritDoc} */
+  public void readFields(DataInput in) throws IOException {
+    username = Text.readString(in);
+    groupname = Text.readString(in);
+    permission = FsPermission.read(in);
+  }
+
+  /** {@inheritDoc} */
+  public void write(DataOutput out) throws IOException {
+    write(out, username, groupname, permission);
+  }
+
+  /**
+   * Create and initialize a {@link PermissionStatus} from {@link DataInput}.
+   */
+  public static PermissionStatus read(DataInput in) throws IOException {
+    PermissionStatus p = new PermissionStatus();
+    p.readFields(in);
+    return p;
+  }
+
+  /**
+   * Serialize a {@link PermissionStatus} from its base components.
+   */
+  public static void write(DataOutput out,
+                           String username, 
+                           String groupname,
+                           FsPermission permission) throws IOException {
+    Text.writeString(out, username);
+    Text.writeString(out, groupname);
+    permission.write(out);
+  }
+
+  /** {@inheritDoc} */
+  public String toString() {
+    return username + ":" + groupname + ":" + permission;
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/Block.java b/src/java/org/apache/hadoop/fs/s3/Block.java
new file mode 100644
index 00000000000..e24ad264038
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/Block.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3;
+
+/**
+ * Holds metadata about a block of data being stored in a {@link FileSystemStore}.
+ */
+public class Block {
+  private long id;
+
+  private long length;
+
+  public Block(long id, long length) {
+    this.id = id;
+    this.length = length;
+  }
+
+  public long getId() {
+    return id;
+  }
+
+  public long getLength() {
+    return length;
+  }
+
+  @Override
+  public String toString() {
+    return "Block[" + id + ", " + length + "]";
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/FileSystemStore.java b/src/java/org/apache/hadoop/fs/s3/FileSystemStore.java
new file mode 100644
index 00000000000..a46472a8150
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/FileSystemStore.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * A facility for storing and retrieving {@link INode}s and {@link Block}s.
+ */
+public interface FileSystemStore {
+  
+  void initialize(URI uri, Configuration conf) throws IOException;
+  String getVersion() throws IOException;
+
+  void storeINode(Path path, INode inode) throws IOException;
+  void storeBlock(Block block, File file) throws IOException;
+  
+  boolean inodeExists(Path path) throws IOException;
+  boolean blockExists(long blockId) throws IOException;
+
+  INode retrieveINode(Path path) throws IOException;
+  File retrieveBlock(Block block, long byteRangeStart) throws IOException;
+
+  void deleteINode(Path path) throws IOException;
+  void deleteBlock(Block block) throws IOException;
+
+  Set<Path> listSubPaths(Path path) throws IOException;
+  Set<Path> listDeepSubPaths(Path path) throws IOException;
+
+  /**
+   * Delete everything. Used for testing.
+   * @throws IOException
+   */
+  void purge() throws IOException;
+  
+  /**
+   * Diagnostic method to dump all INodes to the console.
+   * @throws IOException
+   */
+  void dump() throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/INode.java b/src/java/org/apache/hadoop/fs/s3/INode.java
new file mode 100644
index 00000000000..ec7f67c266c
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/INode.java
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Holds file metadata including type (regular file, or directory),
+ * and the list of blocks that are pointers to the data.
+ */
+public class INode {
+	
+  enum FileType {
+    DIRECTORY, FILE
+  }
+  
+  public static final FileType[] FILE_TYPES = {
+    FileType.DIRECTORY,
+    FileType.FILE
+  };
+
+  public static final INode DIRECTORY_INODE = new INode(FileType.DIRECTORY, null);
+  
+  private FileType fileType;
+  private Block[] blocks;
+
+  public INode(FileType fileType, Block[] blocks) {
+    this.fileType = fileType;
+    if (isDirectory() && blocks != null) {
+      throw new IllegalArgumentException("A directory cannot contain blocks.");
+    }
+    this.blocks = blocks;
+  }
+
+  public Block[] getBlocks() {
+    return blocks;
+  }
+  
+  public FileType getFileType() {
+    return fileType;
+  }
+
+  public boolean isDirectory() {
+    return fileType == FileType.DIRECTORY;
+  }  
+
+  public boolean isFile() {
+    return fileType == FileType.FILE;
+  }
+  
+  public long getSerializedLength() {
+    return 1L + (blocks == null ? 0 : 4 + blocks.length * 16);
+  }
+  
+
+  public InputStream serialize() throws IOException {
+    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+    DataOutputStream out = new DataOutputStream(bytes);
+    out.writeByte(fileType.ordinal());
+    if (isFile()) {
+      out.writeInt(blocks.length);
+      for (int i = 0; i < blocks.length; i++) {
+        out.writeLong(blocks[i].getId());
+        out.writeLong(blocks[i].getLength());
+      }
+    }
+    out.close();
+    return new ByteArrayInputStream(bytes.toByteArray());
+  }
+  
+  public static INode deserialize(InputStream in) throws IOException {
+    if (in == null) {
+      return null;
+    }
+    DataInputStream dataIn = new DataInputStream(in);
+    FileType fileType = INode.FILE_TYPES[dataIn.readByte()];
+    switch (fileType) {
+    case DIRECTORY:
+      in.close();
+      return INode.DIRECTORY_INODE;
+    case FILE:
+      int numBlocks = dataIn.readInt();
+      Block[] blocks = new Block[numBlocks];
+      for (int i = 0; i < numBlocks; i++) {
+        long id = dataIn.readLong();
+        long length = dataIn.readLong();
+        blocks[i] = new Block(id, length);
+      }
+      in.close();
+      return new INode(fileType, blocks);
+    default:
+      throw new IllegalArgumentException("Cannot deserialize inode.");
+    }    
+  }  
+  
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java b/src/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java
new file mode 100644
index 00000000000..b5131d62449
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java
@@ -0,0 +1,390 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URI;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3.INode.FileType;
+import org.jets3t.service.S3Service;
+import org.jets3t.service.S3ServiceException;
+import org.jets3t.service.impl.rest.httpclient.RestS3Service;
+import org.jets3t.service.model.S3Bucket;
+import org.jets3t.service.model.S3Object;
+import org.jets3t.service.security.AWSCredentials;
+
+class Jets3tFileSystemStore implements FileSystemStore {
+  
+  private static final String FILE_SYSTEM_NAME = "fs";
+  private static final String FILE_SYSTEM_VALUE = "Hadoop";
+
+  private static final String FILE_SYSTEM_TYPE_NAME = "fs-type";
+  private static final String FILE_SYSTEM_TYPE_VALUE = "block";
+
+  private static final String FILE_SYSTEM_VERSION_NAME = "fs-version";
+  private static final String FILE_SYSTEM_VERSION_VALUE = "1";
+  
+  private static final Map<String, String> METADATA =
+    new HashMap<String, String>();
+  
+  static {
+    METADATA.put(FILE_SYSTEM_NAME, FILE_SYSTEM_VALUE);
+    METADATA.put(FILE_SYSTEM_TYPE_NAME, FILE_SYSTEM_TYPE_VALUE);
+    METADATA.put(FILE_SYSTEM_VERSION_NAME, FILE_SYSTEM_VERSION_VALUE);
+  }
+
+  private static final String PATH_DELIMITER = Path.SEPARATOR;
+  private static final String BLOCK_PREFIX = "block_";
+
+  private Configuration conf;
+  
+  private S3Service s3Service;
+
+  private S3Bucket bucket;
+  
+  private int bufferSize;
+  
+  private static final Log LOG = 
+    LogFactory.getLog(Jets3tFileSystemStore.class.getName());
+  
+  public void initialize(URI uri, Configuration conf) throws IOException {
+    
+    this.conf = conf;
+    
+    S3Credentials s3Credentials = new S3Credentials();
+    s3Credentials.initialize(uri, conf);
+    try {
+      AWSCredentials awsCredentials =
+        new AWSCredentials(s3Credentials.getAccessKey(),
+            s3Credentials.getSecretAccessKey());
+      this.s3Service = new RestS3Service(awsCredentials);
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+    bucket = new S3Bucket(uri.getHost());
+
+    this.bufferSize = conf.getInt("io.file.buffer.size", 4096);
+  }
+
+  public String getVersion() throws IOException {
+    return FILE_SYSTEM_VERSION_VALUE;
+  }
+
+  private void delete(String key) throws IOException {
+    try {
+      s3Service.deleteObject(bucket, key);
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+
+  public void deleteINode(Path path) throws IOException {
+    delete(pathToKey(path));
+  }
+
+  public void deleteBlock(Block block) throws IOException {
+    delete(blockToKey(block));
+  }
+
+  public boolean inodeExists(Path path) throws IOException {
+    InputStream in = get(pathToKey(path), true);
+    if (in == null) {
+      return false;
+    }
+    in.close();
+    return true;
+  }
+  
+  public boolean blockExists(long blockId) throws IOException {
+    InputStream in = get(blockToKey(blockId), false);
+    if (in == null) {
+      return false;
+    }
+    in.close();
+    return true;
+  }
+
+  private InputStream get(String key, boolean checkMetadata)
+      throws IOException {
+    
+    try {
+      S3Object object = s3Service.getObject(bucket, key);
+      if (checkMetadata) {
+        checkMetadata(object);
+      }
+      return object.getDataInputStream();
+    } catch (S3ServiceException e) {
+      if ("NoSuchKey".equals(e.getS3ErrorCode())) {
+        return null;
+      }
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+
+  private InputStream get(String key, long byteRangeStart) throws IOException {
+    try {
+      S3Object object = s3Service.getObject(bucket, key, null, null, null,
+                                            null, byteRangeStart, null);
+      return object.getDataInputStream();
+    } catch (S3ServiceException e) {
+      if ("NoSuchKey".equals(e.getS3ErrorCode())) {
+        return null;
+      }
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+
+  private void checkMetadata(S3Object object) throws S3FileSystemException,
+      S3ServiceException {
+    
+    String name = (String) object.getMetadata(FILE_SYSTEM_NAME);
+    if (!FILE_SYSTEM_VALUE.equals(name)) {
+      throw new S3FileSystemException("Not a Hadoop S3 file.");
+    }
+    String type = (String) object.getMetadata(FILE_SYSTEM_TYPE_NAME);
+    if (!FILE_SYSTEM_TYPE_VALUE.equals(type)) {
+      throw new S3FileSystemException("Not a block file.");
+    }
+    String dataVersion = (String) object.getMetadata(FILE_SYSTEM_VERSION_NAME);
+    if (!FILE_SYSTEM_VERSION_VALUE.equals(dataVersion)) {
+      throw new VersionMismatchException(FILE_SYSTEM_VERSION_VALUE,
+          dataVersion);
+    }
+  }
+
+  public INode retrieveINode(Path path) throws IOException {
+    return INode.deserialize(get(pathToKey(path), true));
+  }
+
+  public File retrieveBlock(Block block, long byteRangeStart)
+    throws IOException {
+    File fileBlock = null;
+    InputStream in = null;
+    OutputStream out = null;
+    try {
+      fileBlock = newBackupFile();
+      in = get(blockToKey(block), byteRangeStart);
+      out = new BufferedOutputStream(new FileOutputStream(fileBlock));
+      byte[] buf = new byte[bufferSize];
+      int numRead;
+      while ((numRead = in.read(buf)) >= 0) {
+        out.write(buf, 0, numRead);
+      }
+      return fileBlock;
+    } catch (IOException e) {
+      // close output stream to file then delete file
+      closeQuietly(out);
+      out = null; // to prevent a second close
+      if (fileBlock != null) {
+        boolean b = fileBlock.delete();
+        if (!b) {
+          LOG.warn("Ignoring failed delete");
+        }
+      }
+      throw e;
+    } finally {
+      closeQuietly(out);
+      closeQuietly(in);
+    }
+  }
+  
+  private File newBackupFile() throws IOException {
+    File dir = new File(conf.get("fs.s3.buffer.dir"));
+    if (!dir.exists() && !dir.mkdirs()) {
+      throw new IOException("Cannot create S3 buffer directory: " + dir);
+    }
+    File result = File.createTempFile("input-", ".tmp", dir);
+    result.deleteOnExit();
+    return result;
+  }
+
+  public Set<Path> listSubPaths(Path path) throws IOException {
+    try {
+      String prefix = pathToKey(path);
+      if (!prefix.endsWith(PATH_DELIMITER)) {
+        prefix += PATH_DELIMITER;
+      }
+      S3Object[] objects = s3Service.listObjects(bucket, prefix, PATH_DELIMITER);
+      Set<Path> prefixes = new TreeSet<Path>();
+      for (int i = 0; i < objects.length; i++) {
+        prefixes.add(keyToPath(objects[i].getKey()));
+      }
+      prefixes.remove(path);
+      return prefixes;
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+  
+  public Set<Path> listDeepSubPaths(Path path) throws IOException {
+    try {
+      String prefix = pathToKey(path);
+      if (!prefix.endsWith(PATH_DELIMITER)) {
+        prefix += PATH_DELIMITER;
+      }
+      S3Object[] objects = s3Service.listObjects(bucket, prefix, null);
+      Set<Path> prefixes = new TreeSet<Path>();
+      for (int i = 0; i < objects.length; i++) {
+        prefixes.add(keyToPath(objects[i].getKey()));
+      }
+      prefixes.remove(path);
+      return prefixes;
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }    
+  }
+
+  private void put(String key, InputStream in, long length, boolean storeMetadata)
+      throws IOException {
+    
+    try {
+      S3Object object = new S3Object(key);
+      object.setDataInputStream(in);
+      object.setContentType("binary/octet-stream");
+      object.setContentLength(length);
+      if (storeMetadata) {
+        object.addAllMetadata(METADATA);
+      }
+      s3Service.putObject(bucket, object);
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+
+  public void storeINode(Path path, INode inode) throws IOException {
+    put(pathToKey(path), inode.serialize(), inode.getSerializedLength(), true);
+  }
+
+  public void storeBlock(Block block, File file) throws IOException {
+    BufferedInputStream in = null;
+    try {
+      in = new BufferedInputStream(new FileInputStream(file));
+      put(blockToKey(block), in, block.getLength(), false);
+    } finally {
+      closeQuietly(in);
+    }    
+  }
+
+  private void closeQuietly(Closeable closeable) {
+    if (closeable != null) {
+      try {
+        closeable.close();
+      } catch (IOException e) {
+        // ignore
+      }
+    }
+  }
+
+  private String pathToKey(Path path) {
+    if (!path.isAbsolute()) {
+      throw new IllegalArgumentException("Path must be absolute: " + path);
+    }
+    return path.toUri().getPath();
+  }
+
+  private Path keyToPath(String key) {
+    return new Path(key);
+  }
+  
+  private String blockToKey(long blockId) {
+    return BLOCK_PREFIX + blockId;
+  }
+
+  private String blockToKey(Block block) {
+    return blockToKey(block.getId());
+  }
+
+  public void purge() throws IOException {
+    try {
+      S3Object[] objects = s3Service.listObjects(bucket);
+      for (int i = 0; i < objects.length; i++) {
+        s3Service.deleteObject(bucket, objects[i].getKey());
+      }
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+
+  public void dump() throws IOException {
+    StringBuilder sb = new StringBuilder("S3 Filesystem, ");
+    sb.append(bucket.getName()).append("\n");
+    try {
+      S3Object[] objects = s3Service.listObjects(bucket, PATH_DELIMITER, null);
+      for (int i = 0; i < objects.length; i++) {
+        Path path = keyToPath(objects[i].getKey());
+        sb.append(path).append("\n");
+        INode m = retrieveINode(path);
+        sb.append("\t").append(m.getFileType()).append("\n");
+        if (m.getFileType() == FileType.DIRECTORY) {
+          continue;
+        }
+        for (int j = 0; j < m.getBlocks().length; j++) {
+          sb.append("\t").append(m.getBlocks()[j]).append("\n");
+        }
+      }
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+    System.out.println(sb);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/MigrationTool.java b/src/java/org/apache/hadoop/fs/s3/MigrationTool.java
new file mode 100644
index 00000000000..cce31f0869f
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/MigrationTool.java
@@ -0,0 +1,280 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.s3;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URI;
+import java.net.URLDecoder;
+import java.net.URLEncoder;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.jets3t.service.S3Service;
+import org.jets3t.service.S3ServiceException;
+import org.jets3t.service.impl.rest.httpclient.RestS3Service;
+import org.jets3t.service.model.S3Bucket;
+import org.jets3t.service.model.S3Object;
+import org.jets3t.service.security.AWSCredentials;
+
+/**
+ * <p>
+ * This class is a tool for migrating data from an older to a newer version
+ * of an S3 filesystem.
+ * </p>
+ * <p>
+ * All files in the filesystem are migrated by re-writing the block metadata
+ * - no datafiles are touched.
+ * </p>
+ */
+public class MigrationTool extends Configured implements Tool {
+  
+  private S3Service s3Service;
+  private S3Bucket bucket;
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new MigrationTool(), args);
+    System.exit(res);
+  }
+  
+  public int run(String[] args) throws Exception {
+    
+    if (args.length == 0) {
+      System.err.println("Usage: MigrationTool <S3 file system URI>");
+      System.err.println("\t<S3 file system URI>\tfilesystem to migrate");
+      ToolRunner.printGenericCommandUsage(System.err);
+      return -1;
+    }
+    
+    URI uri = URI.create(args[0]);
+    
+    initialize(uri);
+    
+    FileSystemStore newStore = new Jets3tFileSystemStore();
+    newStore.initialize(uri, getConf());
+    
+    if (get("%2F") != null) { 
+      System.err.println("Current version number is [unversioned].");
+      System.err.println("Target version number is " +
+          newStore.getVersion() + ".");
+      Store oldStore = new UnversionedStore();
+      migrate(oldStore, newStore);
+      return 0;
+    } else {
+      S3Object root = get("/");
+      if (root != null) {
+        String version = (String) root.getMetadata("fs-version");
+        if (version == null) {
+          System.err.println("Can't detect version - exiting.");
+        } else {
+          String newVersion = newStore.getVersion();
+          System.err.println("Current version number is " + version + ".");
+          System.err.println("Target version number is " + newVersion + ".");
+          if (version.equals(newStore.getVersion())) {
+            System.err.println("No migration required.");
+            return 0;
+          }
+          // use version number to create Store
+          //Store oldStore = ... 
+          //migrate(oldStore, newStore);
+          System.err.println("Not currently implemented.");
+          return 0;
+        }
+      }
+      System.err.println("Can't detect version - exiting.");
+      return 0;
+    }
+    
+  }
+  
+  public void initialize(URI uri) throws IOException {
+    
+    
+    
+    try {
+      String accessKey = null;
+      String secretAccessKey = null;
+      String userInfo = uri.getUserInfo();
+      if (userInfo != null) {
+        int index = userInfo.indexOf(':');
+        if (index != -1) {
+          accessKey = userInfo.substring(0, index);
+          secretAccessKey = userInfo.substring(index + 1);
+        } else {
+          accessKey = userInfo;
+        }
+      }
+      if (accessKey == null) {
+        accessKey = getConf().get("fs.s3.awsAccessKeyId");
+      }
+      if (secretAccessKey == null) {
+        secretAccessKey = getConf().get("fs.s3.awsSecretAccessKey");
+      }
+      if (accessKey == null && secretAccessKey == null) {
+        throw new IllegalArgumentException("AWS " +
+                                           "Access Key ID and Secret Access Key " +
+                                           "must be specified as the username " +
+                                           "or password (respectively) of a s3 URL, " +
+                                           "or by setting the " +
+                                           "fs.s3.awsAccessKeyId or " +                         
+                                           "fs.s3.awsSecretAccessKey properties (respectively).");
+      } else if (accessKey == null) {
+        throw new IllegalArgumentException("AWS " +
+                                           "Access Key ID must be specified " +
+                                           "as the username of a s3 URL, or by setting the " +
+                                           "fs.s3.awsAccessKeyId property.");
+      } else if (secretAccessKey == null) {
+        throw new IllegalArgumentException("AWS " +
+                                           "Secret Access Key must be specified " +
+                                           "as the password of a s3 URL, or by setting the " +
+                                           "fs.s3.awsSecretAccessKey property.");         
+      }
+      AWSCredentials awsCredentials =
+        new AWSCredentials(accessKey, secretAccessKey);
+      this.s3Service = new RestS3Service(awsCredentials);
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+    bucket = new S3Bucket(uri.getHost());
+  }
+  
+  private void migrate(Store oldStore, FileSystemStore newStore)
+      throws IOException {
+    for (Path path : oldStore.listAllPaths()) {
+      INode inode = oldStore.retrieveINode(path);
+      oldStore.deleteINode(path);
+      newStore.storeINode(path, inode);
+    }
+  }
+  
+  private S3Object get(String key) {
+    try {
+      return s3Service.getObject(bucket, key);
+    } catch (S3ServiceException e) {
+      if ("NoSuchKey".equals(e.getS3ErrorCode())) {
+        return null;
+      }
+    }
+    return null;
+  }
+  
+  interface Store {
+
+    Set<Path> listAllPaths() throws IOException;
+    INode retrieveINode(Path path) throws IOException;
+    void deleteINode(Path path) throws IOException;
+    
+  }
+  
+  class UnversionedStore implements Store {
+
+    public Set<Path> listAllPaths() throws IOException {
+      try {
+        String prefix = urlEncode(Path.SEPARATOR);
+        S3Object[] objects = s3Service.listObjects(bucket, prefix, null);
+        Set<Path> prefixes = new TreeSet<Path>();
+        for (int i = 0; i < objects.length; i++) {
+          prefixes.add(keyToPath(objects[i].getKey()));
+        }
+        return prefixes;
+      } catch (S3ServiceException e) {
+        if (e.getCause() instanceof IOException) {
+          throw (IOException) e.getCause();
+        }
+        throw new S3Exception(e);
+      }   
+    }
+
+    public void deleteINode(Path path) throws IOException {
+      delete(pathToKey(path));
+    }
+    
+    private void delete(String key) throws IOException {
+      try {
+        s3Service.deleteObject(bucket, key);
+      } catch (S3ServiceException e) {
+        if (e.getCause() instanceof IOException) {
+          throw (IOException) e.getCause();
+        }
+        throw new S3Exception(e);
+      }
+    }
+    
+    public INode retrieveINode(Path path) throws IOException {
+      return INode.deserialize(get(pathToKey(path)));
+    }
+
+    private InputStream get(String key) throws IOException {
+      try {
+        S3Object object = s3Service.getObject(bucket, key);
+        return object.getDataInputStream();
+      } catch (S3ServiceException e) {
+        if ("NoSuchKey".equals(e.getS3ErrorCode())) {
+          return null;
+        }
+        if (e.getCause() instanceof IOException) {
+          throw (IOException) e.getCause();
+        }
+        throw new S3Exception(e);
+      }
+    }
+    
+    private String pathToKey(Path path) {
+      if (!path.isAbsolute()) {
+        throw new IllegalArgumentException("Path must be absolute: " + path);
+      }
+      return urlEncode(path.toUri().getPath());
+    }
+    
+    private Path keyToPath(String key) {
+      return new Path(urlDecode(key));
+    }
+
+    private String urlEncode(String s) {
+      try {
+        return URLEncoder.encode(s, "UTF-8");
+      } catch (UnsupportedEncodingException e) {
+        // Should never happen since every implementation of the Java Platform
+        // is required to support UTF-8.
+        // See http://java.sun.com/j2se/1.5.0/docs/api/java/nio/charset/Charset.html
+        throw new IllegalStateException(e);
+      }
+    }
+    
+    private String urlDecode(String s) {
+      try {
+        return URLDecoder.decode(s, "UTF-8");
+      } catch (UnsupportedEncodingException e) {
+        // Should never happen since every implementation of the Java Platform
+        // is required to support UTF-8.
+        // See http://java.sun.com/j2se/1.5.0/docs/api/java/nio/charset/Charset.html
+        throw new IllegalStateException(e);
+      }
+    }
+    
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/S3Credentials.java b/src/java/org/apache/hadoop/fs/s3/S3Credentials.java
new file mode 100644
index 00000000000..039499e2a65
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/S3Credentials.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3;
+
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * <p>
+ * Extracts AWS credentials from the filesystem URI or configuration.
+ * </p>
+ */
+public class S3Credentials {
+  
+  private String accessKey;
+  private String secretAccessKey; 
+
+  /**
+   * @throws IllegalArgumentException if credentials for S3 cannot be
+   * determined.
+   */
+  public void initialize(URI uri, Configuration conf) {
+    if (uri.getHost() == null) {
+      throw new IllegalArgumentException("Invalid hostname in URI " + uri);
+    }
+    
+    String userInfo = uri.getUserInfo();
+    if (userInfo != null) {
+      int index = userInfo.indexOf(':');
+      if (index != -1) {
+        accessKey = userInfo.substring(0, index);
+        secretAccessKey = userInfo.substring(index + 1);
+      } else {
+        accessKey = userInfo;
+      }
+    }
+    
+    String scheme = uri.getScheme();
+    String accessKeyProperty = String.format("fs.%s.awsAccessKeyId", scheme);
+    String secretAccessKeyProperty =
+      String.format("fs.%s.awsSecretAccessKey", scheme);
+    if (accessKey == null) {
+      accessKey = conf.get(accessKeyProperty);
+    }
+    if (secretAccessKey == null) {
+      secretAccessKey = conf.get(secretAccessKeyProperty);
+    }
+    if (accessKey == null && secretAccessKey == null) {
+      throw new IllegalArgumentException("AWS " +
+                                         "Access Key ID and Secret Access " +
+                                         "Key must be specified as the " +
+                                         "username or password " +
+                                         "(respectively) of a " + scheme +
+                                         " URL, or by setting the " +
+                                         accessKeyProperty + " or " +
+                                         secretAccessKeyProperty +
+                                         " properties (respectively).");
+    } else if (accessKey == null) {
+      throw new IllegalArgumentException("AWS " +
+                                         "Access Key ID must be specified " +
+                                         "as the username of a " + scheme +
+                                         " URL, or by setting the " +
+                                         accessKeyProperty + " property.");
+    } else if (secretAccessKey == null) {
+      throw new IllegalArgumentException("AWS " +
+                                         "Secret Access Key must be " +
+                                         "specified as the password of a " +
+                                         scheme + " URL, or by setting the " +
+                                         secretAccessKeyProperty +
+                                         " property.");       
+    }
+
+  }
+  
+  public String getAccessKey() {
+    return accessKey;
+  }
+  
+  public String getSecretAccessKey() {
+    return secretAccessKey;
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/S3Exception.java b/src/java/org/apache/hadoop/fs/s3/S3Exception.java
new file mode 100644
index 00000000000..7047676a6c7
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/S3Exception.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3;
+
+import java.io.IOException;
+
+/**
+ * Thrown if there is a problem communicating with Amazon S3.
+ */
+public class S3Exception extends IOException {
+
+  private static final long serialVersionUID = 1L;
+
+  public S3Exception(Throwable t) {
+    super(t);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/S3FileSystem.java b/src/java/org/apache/hadoop/fs/s3/S3FileSystem.java
new file mode 100644
index 00000000000..b0013aa0a96
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/S3FileSystem.java
@@ -0,0 +1,361 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.s3native.NativeS3FileSystem;
+import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.io.retry.RetryPolicy;
+import org.apache.hadoop.io.retry.RetryProxy;
+import org.apache.hadoop.util.Progressable;
+
+/**
+ * <p>
+ * A block-based {@link FileSystem} backed by
+ * <a href="http://aws.amazon.com/s3">Amazon S3</a>.
+ * </p>
+ * @see NativeS3FileSystem
+ */
+public class S3FileSystem extends FileSystem {
+
+  private URI uri;
+
+  private FileSystemStore store;
+
+  private Path workingDir;
+
+  public S3FileSystem() {
+    // set store in initialize()
+  }
+  
+  public S3FileSystem(FileSystemStore store) {
+    this.store = store;
+  }
+
+  @Override
+  public URI getUri() {
+    return uri;
+  }
+
+  @Override
+  public void initialize(URI uri, Configuration conf) throws IOException {
+    super.initialize(uri, conf);
+    if (store == null) {
+      store = createDefaultStore(conf);
+    }
+    store.initialize(uri, conf);
+    setConf(conf);
+    this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());    
+    this.workingDir =
+      new Path("/user", System.getProperty("user.name")).makeQualified(this);
+  }  
+
+  private static FileSystemStore createDefaultStore(Configuration conf) {
+    FileSystemStore store = new Jets3tFileSystemStore();
+    
+    RetryPolicy basePolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
+                                                                               conf.getInt("fs.s3.maxRetries", 4),
+                                                                               conf.getLong("fs.s3.sleepTimeSeconds", 10), TimeUnit.SECONDS);
+    Map<Class<? extends Exception>,RetryPolicy> exceptionToPolicyMap =
+      new HashMap<Class<? extends Exception>, RetryPolicy>();
+    exceptionToPolicyMap.put(IOException.class, basePolicy);
+    exceptionToPolicyMap.put(S3Exception.class, basePolicy);
+    
+    RetryPolicy methodPolicy = RetryPolicies.retryByException(
+                                                              RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
+    Map<String,RetryPolicy> methodNameToPolicyMap = new HashMap<String,RetryPolicy>();
+    methodNameToPolicyMap.put("storeBlock", methodPolicy);
+    methodNameToPolicyMap.put("retrieveBlock", methodPolicy);
+    
+    return (FileSystemStore) RetryProxy.create(FileSystemStore.class,
+                                               store, methodNameToPolicyMap);
+  }
+
+  @Override
+  public Path getWorkingDirectory() {
+    return workingDir;
+  }
+
+  @Override
+  public void setWorkingDirectory(Path dir) {
+    workingDir = makeAbsolute(dir);
+  }
+
+  private Path makeAbsolute(Path path) {
+    if (path.isAbsolute()) {
+      return path;
+    }
+    return new Path(workingDir, path);
+  }
+
+  /**
+   * @param permission Currently ignored.
+   */
+  @Override
+  public boolean mkdirs(Path path, FsPermission permission) throws IOException {
+    Path absolutePath = makeAbsolute(path);
+    List<Path> paths = new ArrayList<Path>();
+    do {
+      paths.add(0, absolutePath);
+      absolutePath = absolutePath.getParent();
+    } while (absolutePath != null);
+    
+    boolean result = true;
+    for (Path p : paths) {
+      result &= mkdir(p);
+    }
+    return result;
+  }
+  
+  private boolean mkdir(Path path) throws IOException {
+    Path absolutePath = makeAbsolute(path);
+    INode inode = store.retrieveINode(absolutePath);
+    if (inode == null) {
+      store.storeINode(absolutePath, INode.DIRECTORY_INODE);
+    } else if (inode.isFile()) {
+      throw new IOException(String.format(
+          "Can't make directory for path %s since it is a file.",
+          absolutePath));
+    }
+    return true;
+  }
+
+  @Override
+  public boolean isFile(Path path) throws IOException {
+    INode inode = store.retrieveINode(makeAbsolute(path));
+    if (inode == null) {
+      return false;
+    }
+    return inode.isFile();
+  }
+
+  private INode checkFile(Path path) throws IOException {
+    INode inode = store.retrieveINode(makeAbsolute(path));
+    if (inode == null) {
+      throw new IOException("No such file.");
+    }
+    if (inode.isDirectory()) {
+      throw new IOException("Path " + path + " is a directory.");
+    }
+    return inode;
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path f) throws IOException {
+    Path absolutePath = makeAbsolute(f);
+    INode inode = store.retrieveINode(absolutePath);
+    if (inode == null) {
+      return null;
+    }
+    if (inode.isFile()) {
+      return new FileStatus[] {
+        new S3FileStatus(f.makeQualified(this), inode)
+      };
+    }
+    ArrayList<FileStatus> ret = new ArrayList<FileStatus>();
+    for (Path p : store.listSubPaths(absolutePath)) {
+      ret.add(getFileStatus(p.makeQualified(this)));
+    }
+    return ret.toArray(new FileStatus[0]);
+  }
+
+  /** This optional operation is not yet supported. */
+  public FSDataOutputStream append(Path f, int bufferSize,
+      Progressable progress) throws IOException {
+    throw new IOException("Not supported");
+  }
+
+  /**
+   * @param permission Currently ignored.
+   */
+  @Override
+  public FSDataOutputStream create(Path file, FsPermission permission,
+      boolean overwrite, int bufferSize,
+      short replication, long blockSize, Progressable progress)
+    throws IOException {
+
+    INode inode = store.retrieveINode(makeAbsolute(file));
+    if (inode != null) {
+      if (overwrite) {
+        delete(file, true);
+      } else {
+        throw new IOException("File already exists: " + file);
+      }
+    } else {
+      Path parent = file.getParent();
+      if (parent != null) {
+        if (!mkdirs(parent)) {
+          throw new IOException("Mkdirs failed to create " + parent.toString());
+        }
+      }      
+    }
+    return new FSDataOutputStream
+        (new S3OutputStream(getConf(), store, makeAbsolute(file),
+                            blockSize, progress, bufferSize),
+         statistics);
+  }
+
+  @Override
+  public FSDataInputStream open(Path path, int bufferSize) throws IOException {
+    INode inode = checkFile(path);
+    return new FSDataInputStream(new S3InputStream(getConf(), store, inode,
+                                                   statistics));
+  }
+
+  @Override
+  public boolean rename(Path src, Path dst) throws IOException {
+    Path absoluteSrc = makeAbsolute(src);
+    INode srcINode = store.retrieveINode(absoluteSrc);
+    if (srcINode == null) {
+      // src path doesn't exist
+      return false; 
+    }
+    Path absoluteDst = makeAbsolute(dst);
+    INode dstINode = store.retrieveINode(absoluteDst);
+    if (dstINode != null && dstINode.isDirectory()) {
+      absoluteDst = new Path(absoluteDst, absoluteSrc.getName());
+      dstINode = store.retrieveINode(absoluteDst);
+    }
+    if (dstINode != null) {
+      // dst path already exists - can't overwrite
+      return false;
+    }
+    Path dstParent = absoluteDst.getParent();
+    if (dstParent != null) {
+      INode dstParentINode = store.retrieveINode(dstParent);
+      if (dstParentINode == null || dstParentINode.isFile()) {
+        // dst parent doesn't exist or is a file
+        return false;
+      }
+    }
+    return renameRecursive(absoluteSrc, absoluteDst);
+  }
+  
+  private boolean renameRecursive(Path src, Path dst) throws IOException {
+    INode srcINode = store.retrieveINode(src);
+    store.storeINode(dst, srcINode);
+    store.deleteINode(src);
+    if (srcINode.isDirectory()) {
+      for (Path oldSrc : store.listDeepSubPaths(src)) {
+        INode inode = store.retrieveINode(oldSrc);
+        if (inode == null) {
+          return false;
+        }
+        String oldSrcPath = oldSrc.toUri().getPath();
+        String srcPath = src.toUri().getPath();
+        String dstPath = dst.toUri().getPath();
+        Path newDst = new Path(oldSrcPath.replaceFirst(srcPath, dstPath));
+        store.storeINode(newDst, inode);
+        store.deleteINode(oldSrc);
+      }
+    }
+    return true;
+  }
+
+  public boolean delete(Path path, boolean recursive) throws IOException {
+   Path absolutePath = makeAbsolute(path);
+   INode inode = store.retrieveINode(absolutePath);
+   if (inode == null) {
+     return false;
+   }
+   if (inode.isFile()) {
+     store.deleteINode(absolutePath);
+     for (Block block: inode.getBlocks()) {
+       store.deleteBlock(block);
+     }
+   } else {
+     FileStatus[] contents = listStatus(absolutePath);
+     if (contents == null) {
+       return false;
+     }
+     if ((contents.length !=0) && (!recursive)) {
+       throw new IOException("Directory " + path.toString() 
+           + " is not empty.");
+     }
+     for (FileStatus p:contents) {
+       if (!delete(p.getPath(), recursive)) {
+         return false;
+       }
+     }
+     store.deleteINode(absolutePath);
+   }
+   return true;
+  }
+  
+  /**
+   * FileStatus for S3 file systems. 
+   */
+  @Override
+  public FileStatus getFileStatus(Path f)  throws IOException {
+    INode inode = store.retrieveINode(makeAbsolute(f));
+    if (inode == null) {
+      throw new FileNotFoundException(f + ": No such file or directory.");
+    }
+    return new S3FileStatus(f.makeQualified(this), inode);
+  }
+
+  // diagnostic methods
+
+  void dump() throws IOException {
+    store.dump();
+  }
+
+  void purge() throws IOException {
+    store.purge();
+  }
+
+  private static class S3FileStatus extends FileStatus {
+
+    S3FileStatus(Path f, INode inode) throws IOException {
+      super(findLength(inode), inode.isDirectory(), 1,
+            findBlocksize(inode), 0, f);
+    }
+
+    private static long findLength(INode inode) {
+      if (!inode.isDirectory()) {
+        long length = 0L;
+        for (Block block : inode.getBlocks()) {
+          length += block.getLength();
+        }
+        return length;
+      }
+      return 0;
+    }
+
+    private static long findBlocksize(INode inode) {
+      final Block[] ret = inode.getBlocks();
+      return ret == null ? 0L : ret[0].getLength();
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/S3FileSystemException.java b/src/java/org/apache/hadoop/fs/s3/S3FileSystemException.java
new file mode 100644
index 00000000000..f4a5141adbc
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/S3FileSystemException.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.s3;
+
+import java.io.IOException;
+
+/**
+ * Thrown when there is a fatal exception while using {@link S3FileSystem}.
+ */
+public class S3FileSystemException extends IOException {
+  private static final long serialVersionUID = 1L;
+
+  public S3FileSystemException(String message) {
+    super(message);
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/S3InputStream.java b/src/java/org/apache/hadoop/fs/s3/S3InputStream.java
new file mode 100644
index 00000000000..db5eded7ad3
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/S3InputStream.java
@@ -0,0 +1,211 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSInputStream;
+import org.apache.hadoop.fs.FileSystem;
+
+class S3InputStream extends FSInputStream {
+
+  private FileSystemStore store;
+
+  private Block[] blocks;
+
+  private boolean closed;
+
+  private long fileLength;
+
+  private long pos = 0;
+
+  private File blockFile;
+  
+  private DataInputStream blockStream;
+
+  private long blockEnd = -1;
+  
+  private FileSystem.Statistics stats;
+  
+  private static final Log LOG = 
+    LogFactory.getLog(S3InputStream.class.getName());
+
+
+  @Deprecated
+  public S3InputStream(Configuration conf, FileSystemStore store,
+                       INode inode) {
+    this(conf, store, inode, null);
+  }
+
+  public S3InputStream(Configuration conf, FileSystemStore store,
+                       INode inode, FileSystem.Statistics stats) {
+    
+    this.store = store;
+    this.stats = stats;
+    this.blocks = inode.getBlocks();
+    for (Block block : blocks) {
+      this.fileLength += block.getLength();
+    }
+  }
+
+  @Override
+  public synchronized long getPos() throws IOException {
+    return pos;
+  }
+
+  @Override
+  public synchronized int available() throws IOException {
+    return (int) (fileLength - pos);
+  }
+
+  @Override
+  public synchronized void seek(long targetPos) throws IOException {
+    if (targetPos > fileLength) {
+      throw new IOException("Cannot seek after EOF");
+    }
+    pos = targetPos;
+    blockEnd = -1;
+  }
+
+  @Override
+  public synchronized boolean seekToNewSource(long targetPos) throws IOException {
+    return false;
+  }
+
+  @Override
+  public synchronized int read() throws IOException {
+    if (closed) {
+      throw new IOException("Stream closed");
+    }
+    int result = -1;
+    if (pos < fileLength) {
+      if (pos > blockEnd) {
+        blockSeekTo(pos);
+      }
+      result = blockStream.read();
+      if (result >= 0) {
+        pos++;
+      }
+    }
+    if (stats != null & result >= 0) {
+      stats.incrementBytesRead(1);
+    }
+    return result;
+  }
+
+  @Override
+  public synchronized int read(byte buf[], int off, int len) throws IOException {
+    if (closed) {
+      throw new IOException("Stream closed");
+    }
+    if (pos < fileLength) {
+      if (pos > blockEnd) {
+        blockSeekTo(pos);
+      }
+      int realLen = Math.min(len, (int) (blockEnd - pos + 1));
+      int result = blockStream.read(buf, off, realLen);
+      if (result >= 0) {
+        pos += result;
+      }
+      if (stats != null && result > 0) {
+        stats.incrementBytesRead(result);
+      }
+      return result;
+    }
+    return -1;
+  }
+
+  private synchronized void blockSeekTo(long target) throws IOException {
+    //
+    // Compute desired block
+    //
+    int targetBlock = -1;
+    long targetBlockStart = 0;
+    long targetBlockEnd = 0;
+    for (int i = 0; i < blocks.length; i++) {
+      long blockLength = blocks[i].getLength();
+      targetBlockEnd = targetBlockStart + blockLength - 1;
+
+      if (target >= targetBlockStart && target <= targetBlockEnd) {
+        targetBlock = i;
+        break;
+      } else {
+        targetBlockStart = targetBlockEnd + 1;
+      }
+    }
+    if (targetBlock < 0) {
+      throw new IOException(
+                            "Impossible situation: could not find target position " + target);
+    }
+    long offsetIntoBlock = target - targetBlockStart;
+
+    // read block blocks[targetBlock] from position offsetIntoBlock
+
+    this.blockFile = store.retrieveBlock(blocks[targetBlock], offsetIntoBlock);
+
+    this.pos = target;
+    this.blockEnd = targetBlockEnd;
+    this.blockStream = new DataInputStream(new FileInputStream(blockFile));
+
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (closed) {
+      return;
+    }
+    if (blockStream != null) {
+      blockStream.close();
+      blockStream = null;
+    }
+    if (blockFile != null) {
+      boolean b = blockFile.delete();
+      if (!b) {
+        LOG.warn("Ignoring failed delete");
+      }
+    }
+    super.close();
+    closed = true;
+  }
+
+  /**
+   * We don't support marks.
+   */
+  @Override
+  public boolean markSupported() {
+    return false;
+  }
+
+  @Override
+  public void mark(int readLimit) {
+    // Do nothing
+  }
+
+  @Override
+  public void reset() throws IOException {
+    throw new IOException("Mark not supported");
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/S3OutputStream.java b/src/java/org/apache/hadoop/fs/s3/S3OutputStream.java
new file mode 100644
index 00000000000..f3fee2d5342
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/S3OutputStream.java
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3.INode.FileType;
+import org.apache.hadoop.util.Progressable;
+
+class S3OutputStream extends OutputStream {
+
+  private Configuration conf;
+  
+  private int bufferSize;
+
+  private FileSystemStore store;
+
+  private Path path;
+
+  private long blockSize;
+
+  private File backupFile;
+
+  private OutputStream backupStream;
+
+  private Random r = new Random();
+
+  private boolean closed;
+
+  private int pos = 0;
+
+  private long filePos = 0;
+
+  private int bytesWrittenToBlock = 0;
+
+  private byte[] outBuf;
+
+  private List<Block> blocks = new ArrayList<Block>();
+
+  private Block nextBlock;
+  
+  private static final Log LOG = 
+    LogFactory.getLog(S3OutputStream.class.getName());
+
+
+  public S3OutputStream(Configuration conf, FileSystemStore store,
+                        Path path, long blockSize, Progressable progress,
+                        int buffersize) throws IOException {
+    
+    this.conf = conf;
+    this.store = store;
+    this.path = path;
+    this.blockSize = blockSize;
+    this.backupFile = newBackupFile();
+    this.backupStream = new FileOutputStream(backupFile);
+    this.bufferSize = buffersize;
+    this.outBuf = new byte[bufferSize];
+
+  }
+
+  private File newBackupFile() throws IOException {
+    File dir = new File(conf.get("fs.s3.buffer.dir"));
+    if (!dir.exists() && !dir.mkdirs()) {
+      throw new IOException("Cannot create S3 buffer directory: " + dir);
+    }
+    File result = File.createTempFile("output-", ".tmp", dir);
+    result.deleteOnExit();
+    return result;
+  }
+
+  public long getPos() throws IOException {
+    return filePos;
+  }
+
+  @Override
+  public synchronized void write(int b) throws IOException {
+    if (closed) {
+      throw new IOException("Stream closed");
+    }
+
+    if ((bytesWrittenToBlock + pos == blockSize) || (pos >= bufferSize)) {
+      flush();
+    }
+    outBuf[pos++] = (byte) b;
+    filePos++;
+  }
+
+  @Override
+  public synchronized void write(byte b[], int off, int len) throws IOException {
+    if (closed) {
+      throw new IOException("Stream closed");
+    }
+    while (len > 0) {
+      int remaining = bufferSize - pos;
+      int toWrite = Math.min(remaining, len);
+      System.arraycopy(b, off, outBuf, pos, toWrite);
+      pos += toWrite;
+      off += toWrite;
+      len -= toWrite;
+      filePos += toWrite;
+
+      if ((bytesWrittenToBlock + pos >= blockSize) || (pos == bufferSize)) {
+        flush();
+      }
+    }
+  }
+
+  @Override
+  public synchronized void flush() throws IOException {
+    if (closed) {
+      throw new IOException("Stream closed");
+    }
+
+    if (bytesWrittenToBlock + pos >= blockSize) {
+      flushData((int) blockSize - bytesWrittenToBlock);
+    }
+    if (bytesWrittenToBlock == blockSize) {
+      endBlock();
+    }
+    flushData(pos);
+  }
+
+  private synchronized void flushData(int maxPos) throws IOException {
+    int workingPos = Math.min(pos, maxPos);
+
+    if (workingPos > 0) {
+      //
+      // To the local block backup, write just the bytes
+      //
+      backupStream.write(outBuf, 0, workingPos);
+
+      //
+      // Track position
+      //
+      bytesWrittenToBlock += workingPos;
+      System.arraycopy(outBuf, workingPos, outBuf, 0, pos - workingPos);
+      pos -= workingPos;
+    }
+  }
+
+  private synchronized void endBlock() throws IOException {
+    //
+    // Done with local copy
+    //
+    backupStream.close();
+
+    //
+    // Send it to S3
+    //
+    // TODO: Use passed in Progressable to report progress.
+    nextBlockOutputStream();
+    store.storeBlock(nextBlock, backupFile);
+    internalClose();
+
+    //
+    // Delete local backup, start new one
+    //
+    boolean b = backupFile.delete();
+    if (!b) {
+      LOG.warn("Ignoring failed delete");
+    }
+    backupFile = newBackupFile();
+    backupStream = new FileOutputStream(backupFile);
+    bytesWrittenToBlock = 0;
+  }
+
+  private synchronized void nextBlockOutputStream() throws IOException {
+    long blockId = r.nextLong();
+    while (store.blockExists(blockId)) {
+      blockId = r.nextLong();
+    }
+    nextBlock = new Block(blockId, bytesWrittenToBlock);
+    blocks.add(nextBlock);
+    bytesWrittenToBlock = 0;
+  }
+
+  private synchronized void internalClose() throws IOException {
+    INode inode = new INode(FileType.FILE, blocks.toArray(new Block[blocks
+                                                                    .size()]));
+    store.storeINode(path, inode);
+  }
+
+  @Override
+  public synchronized void close() throws IOException {
+    if (closed) {
+      return;
+    }
+
+    flush();
+    if (filePos == 0 || bytesWrittenToBlock != 0) {
+      endBlock();
+    }
+
+    backupStream.close();
+    boolean b = backupFile.delete();
+    if (!b) {
+      LOG.warn("Ignoring failed delete");
+    }
+
+    super.close();
+
+    closed = true;
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/VersionMismatchException.java b/src/java/org/apache/hadoop/fs/s3/VersionMismatchException.java
new file mode 100644
index 00000000000..22c6d67f777
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/VersionMismatchException.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.s3;
+
+/**
+ * Thrown when Hadoop cannot read the version of the data stored
+ * in {@link S3FileSystem}.
+ */
+public class VersionMismatchException extends S3FileSystemException {
+  private static final long serialVersionUID = 1L;
+
+  public VersionMismatchException(String clientVersion, String dataVersion) {
+    super("Version mismatch: client expects version " + clientVersion +
+        ", but data has version " +
+        (dataVersion == null ? "[unversioned]" : dataVersion));
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/s3/package.html b/src/java/org/apache/hadoop/fs/s3/package.html
new file mode 100644
index 00000000000..dd601e104e5
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3/package.html
@@ -0,0 +1,55 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+
+<p>A distributed, block-based implementation of {@link
+org.apache.hadoop.fs.FileSystem} that uses <a href="http://aws.amazon.com/s3">Amazon S3</a>
+as a backing store.</p>
+
+<p>
+Files are stored in S3 as blocks (represented by 
+{@link org.apache.hadoop.fs.s3.Block}), which have an ID and a length.
+Block metadata is stored in S3 as a small record (represented by 
+{@link org.apache.hadoop.fs.s3.INode}) using the URL-encoded
+path string as a key. Inodes record the file type (regular file or directory) and the list of blocks.
+This design makes it easy to seek to any given position in a file by reading the inode data to compute
+which block to access, then using S3's support for 
+<a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.2">HTTP Range</a> headers
+to start streaming from the correct position.
+Renames are also efficient since only the inode is moved (by a DELETE followed by a PUT since 
+S3 does not support renames).
+</p>
+<p>
+For a single file <i>/dir1/file1</i> which takes two blocks of storage, the file structure in S3
+would be something like this:
+</p>
+<pre>
+/
+/dir1
+/dir1/file1
+block-6415776850131549260
+block-3026438247347758425
+</pre>
+<p>
+Inodes start with a leading <code>/</code>, while blocks are prefixed with <code>block-</code>.
+</p>
+
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/fs/s3native/FileMetadata.java b/src/java/org/apache/hadoop/fs/s3native/FileMetadata.java
new file mode 100644
index 00000000000..23797e81c0d
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3native/FileMetadata.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3native;
+
+/**
+ * <p>
+ * Holds basic metadata for a file stored in a {@link NativeFileSystemStore}.
+ * </p>
+ */
+class FileMetadata {
+  private final String key;
+  private final long length;
+  private final long lastModified;
+  
+  public FileMetadata(String key, long length, long lastModified) {
+    this.key = key;
+    this.length = length;
+    this.lastModified = lastModified;
+  }
+  
+  public String getKey() {
+    return key;
+  }
+  
+  public long getLength() {
+    return length;
+  }
+
+  public long getLastModified() {
+    return lastModified;
+  }
+  
+  @Override
+  public String toString() {
+    return "FileMetadata[" + key + ", " + length + ", " + lastModified + "]";
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java b/src/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java
new file mode 100644
index 00000000000..b24a8e06b7c
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java
@@ -0,0 +1,255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3native;
+
+import static org.apache.hadoop.fs.s3native.NativeS3FileSystem.PATH_DELIMITER;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3.S3Credentials;
+import org.apache.hadoop.fs.s3.S3Exception;
+import org.jets3t.service.S3ObjectsChunk;
+import org.jets3t.service.S3Service;
+import org.jets3t.service.S3ServiceException;
+import org.jets3t.service.impl.rest.httpclient.RestS3Service;
+import org.jets3t.service.model.S3Bucket;
+import org.jets3t.service.model.S3Object;
+import org.jets3t.service.security.AWSCredentials;
+
+class Jets3tNativeFileSystemStore implements NativeFileSystemStore {
+  
+  private S3Service s3Service;
+  private S3Bucket bucket;
+  
+  public void initialize(URI uri, Configuration conf) throws IOException {
+    S3Credentials s3Credentials = new S3Credentials();
+    s3Credentials.initialize(uri, conf);
+    try {
+      AWSCredentials awsCredentials =
+        new AWSCredentials(s3Credentials.getAccessKey(),
+            s3Credentials.getSecretAccessKey());
+      this.s3Service = new RestS3Service(awsCredentials);
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+    bucket = new S3Bucket(uri.getHost());
+  }
+  
+  public void storeFile(String key, File file, byte[] md5Hash)
+    throws IOException {
+    
+    BufferedInputStream in = null;
+    try {
+      in = new BufferedInputStream(new FileInputStream(file));
+      S3Object object = new S3Object(key);
+      object.setDataInputStream(in);
+      object.setContentType("binary/octet-stream");
+      object.setContentLength(file.length());
+      if (md5Hash != null) {
+        object.setMd5Hash(md5Hash);
+      }
+      s3Service.putObject(bucket, object);
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    } finally {
+      if (in != null) {
+        try {
+          in.close();
+        } catch (IOException e) {
+          // ignore
+        }
+      }
+    }
+  }
+
+  public void storeEmptyFile(String key) throws IOException {
+    try {
+      S3Object object = new S3Object(key);
+      object.setDataInputStream(new ByteArrayInputStream(new byte[0]));
+      object.setContentType("binary/octet-stream");
+      object.setContentLength(0);
+      s3Service.putObject(bucket, object);
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+  
+  public FileMetadata retrieveMetadata(String key) throws IOException {
+    try {
+      S3Object object = s3Service.getObjectDetails(bucket, key);
+      return new FileMetadata(key, object.getContentLength(),
+          object.getLastModifiedDate().getTime());
+    } catch (S3ServiceException e) {
+      // Following is brittle. Is there a better way?
+      if (e.getMessage().contains("ResponseCode=404")) {
+        return null;
+      }
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+  
+  public InputStream retrieve(String key) throws IOException {
+    try {
+      S3Object object = s3Service.getObject(bucket, key);
+      return object.getDataInputStream();
+    } catch (S3ServiceException e) {
+      if ("NoSuchKey".equals(e.getS3ErrorCode())) {
+        return null;
+      }
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+  
+  public InputStream retrieve(String key, long byteRangeStart)
+    throws IOException {
+    try {
+      S3Object object = s3Service.getObject(bucket, key, null, null, null,
+                                            null, byteRangeStart, null);
+      return object.getDataInputStream();
+    } catch (S3ServiceException e) {
+      if ("NoSuchKey".equals(e.getS3ErrorCode())) {
+        return null;
+      }
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+
+  public PartialListing list(String prefix, int maxListingLength)
+    throws IOException {
+    return list(prefix, maxListingLength, null);
+  }
+  
+  public PartialListing list(String prefix, int maxListingLength,
+      String priorLastKey) throws IOException {
+
+    return list(prefix, PATH_DELIMITER, maxListingLength, priorLastKey);
+  }
+
+  public PartialListing listAll(String prefix, int maxListingLength,
+      String priorLastKey) throws IOException {
+
+    return list(prefix, null, maxListingLength, priorLastKey);
+  }
+
+  private PartialListing list(String prefix, String delimiter,
+      int maxListingLength, String priorLastKey) throws IOException {
+    try {
+      if (prefix.length() > 0 && !prefix.endsWith(PATH_DELIMITER)) {
+        prefix += PATH_DELIMITER;
+      }
+      S3ObjectsChunk chunk = s3Service.listObjectsChunked(bucket.getName(),
+          prefix, delimiter, maxListingLength, priorLastKey);
+      
+      FileMetadata[] fileMetadata =
+        new FileMetadata[chunk.getObjects().length];
+      for (int i = 0; i < fileMetadata.length; i++) {
+        S3Object object = chunk.getObjects()[i];
+        fileMetadata[i] = new FileMetadata(object.getKey(),
+            object.getContentLength(), object.getLastModifiedDate().getTime());
+      }
+      return new PartialListing(chunk.getPriorLastKey(), fileMetadata,
+          chunk.getCommonPrefixes());
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+
+  public void delete(String key) throws IOException {
+    try {
+      s3Service.deleteObject(bucket, key);
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+  
+  public void rename(String srcKey, String dstKey) throws IOException {
+    try {
+      s3Service.moveObject(bucket.getName(), srcKey, bucket.getName(),
+          new S3Object(dstKey), false);
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+
+  public void purge(String prefix) throws IOException {
+    try {
+      S3Object[] objects = s3Service.listObjects(bucket, prefix, null);
+      for (int i = 0; i < objects.length; i++) {
+        s3Service.deleteObject(bucket, objects[i].getKey());
+      }
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+  }
+
+  public void dump() throws IOException {
+    StringBuilder sb = new StringBuilder("S3 Native Filesystem, ");
+    sb.append(bucket.getName()).append("\n");
+    try {
+      S3Object[] objects = s3Service.listObjects(bucket);
+      for (int i = 0; i < objects.length; i++) {
+        sb.append(objects[i].getKey()).append("\n");
+      }
+    } catch (S3ServiceException e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      }
+      throw new S3Exception(e);
+    }
+    System.out.println(sb);
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java b/src/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java
new file mode 100644
index 00000000000..eb0a6824869
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3native;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * <p>
+ * An abstraction for a key-based {@link File} store.
+ * </p>
+ */
+interface NativeFileSystemStore {
+  
+  void initialize(URI uri, Configuration conf) throws IOException;
+  
+  void storeFile(String key, File file, byte[] md5Hash) throws IOException;
+  void storeEmptyFile(String key) throws IOException;
+  
+  FileMetadata retrieveMetadata(String key) throws IOException;
+  InputStream retrieve(String key) throws IOException;
+  InputStream retrieve(String key, long byteRangeStart) throws IOException;
+  
+  PartialListing list(String prefix, int maxListingLength) throws IOException;
+  PartialListing list(String prefix, int maxListingLength, String priorLastKey)
+    throws IOException;
+  PartialListing listAll(String prefix, int maxListingLength,
+      String priorLastKey) throws IOException;
+  
+  void delete(String key) throws IOException;
+
+  void rename(String srcKey, String dstKey) throws IOException;
+  
+  /**
+   * Delete all keys with the given prefix. Used for testing.
+   * @throws IOException
+   */
+  void purge(String prefix) throws IOException;
+  
+  /**
+   * Diagnostic method to dump state to the console.
+   * @throws IOException
+   */
+  void dump() throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java b/src/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java
new file mode 100644
index 00000000000..7ec60655dd9
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java
@@ -0,0 +1,578 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3native;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URI;
+import java.security.DigestOutputStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BufferedFSInputStream;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FSInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.s3.S3Exception;
+import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.io.retry.RetryPolicy;
+import org.apache.hadoop.io.retry.RetryProxy;
+import org.apache.hadoop.util.Progressable;
+
+/**
+ * <p>
+ * A {@link FileSystem} for reading and writing files stored on
+ * <a href="http://aws.amazon.com/s3">Amazon S3</a>.
+ * Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation
+ * stores files on S3 in their
+ * native form so they can be read by other S3 tools.
+ * </p>
+ * @see org.apache.hadoop.fs.s3.S3FileSystem
+ */
+public class NativeS3FileSystem extends FileSystem {
+  
+  public static final Log LOG = 
+    LogFactory.getLog(NativeS3FileSystem.class);
+  
+  private static final String FOLDER_SUFFIX = "_$folder$";
+  private static final long MAX_S3_FILE_SIZE = 5 * 1024 * 1024 * 1024L;
+  static final String PATH_DELIMITER = Path.SEPARATOR;
+  private static final int S3_MAX_LISTING_LENGTH = 1000;
+  
+  private class NativeS3FsInputStream extends FSInputStream {
+    
+    private InputStream in;
+    private final String key;
+    private long pos = 0;
+    
+    public NativeS3FsInputStream(InputStream in, String key) {
+      this.in = in;
+      this.key = key;
+    }
+    
+    public synchronized int read() throws IOException {
+      int result = in.read();
+      if (result != -1) {
+        pos++;
+      }
+      if (statistics != null && result != -1) {
+        statistics.incrementBytesRead(1);
+      }
+      return result;
+    }
+    public synchronized int read(byte[] b, int off, int len)
+      throws IOException {
+      
+      int result = in.read(b, off, len);
+      if (result > 0) {
+        pos += result;
+      }
+      if (statistics != null && result > 0) {
+        statistics.incrementBytesRead(result);
+      }
+      return result;
+    }
+
+    public void close() throws IOException {
+      in.close();
+    }
+
+    public synchronized void seek(long pos) throws IOException {
+      in.close();
+      in = store.retrieve(key, pos);
+      this.pos = pos;
+    }
+    public synchronized long getPos() throws IOException {
+      return pos;
+    }
+    public boolean seekToNewSource(long targetPos) throws IOException {
+      return false;
+    }
+  }
+  
+  private class NativeS3FsOutputStream extends OutputStream {
+    
+    private Configuration conf;
+    private String key;
+    private File backupFile;
+    private OutputStream backupStream;
+    private MessageDigest digest;
+    private boolean closed;
+    
+    public NativeS3FsOutputStream(Configuration conf,
+        NativeFileSystemStore store, String key, Progressable progress,
+        int bufferSize) throws IOException {
+      this.conf = conf;
+      this.key = key;
+      this.backupFile = newBackupFile();
+      try {
+        this.digest = MessageDigest.getInstance("MD5");
+        this.backupStream = new BufferedOutputStream(new DigestOutputStream(
+            new FileOutputStream(backupFile), this.digest));
+      } catch (NoSuchAlgorithmException e) {
+        LOG.warn("Cannot load MD5 digest algorithm," +
+            "skipping message integrity check.", e);
+        this.backupStream = new BufferedOutputStream(
+            new FileOutputStream(backupFile));
+      }
+    }
+
+    private File newBackupFile() throws IOException {
+      File dir = new File(conf.get("fs.s3.buffer.dir"));
+      if (!dir.mkdirs() && !dir.exists()) {
+        throw new IOException("Cannot create S3 buffer directory: " + dir);
+      }
+      File result = File.createTempFile("output-", ".tmp", dir);
+      result.deleteOnExit();
+      return result;
+    }
+    
+    @Override
+    public void flush() throws IOException {
+      backupStream.flush();
+    }
+    
+    @Override
+    public synchronized void close() throws IOException {
+      if (closed) {
+        return;
+      }
+
+      backupStream.close();
+      
+      try {
+        byte[] md5Hash = digest == null ? null : digest.digest();
+        store.storeFile(key, backupFile, md5Hash);
+      } finally {
+        if (!backupFile.delete()) {
+          LOG.warn("Could not delete temporary s3n file: " + backupFile);
+        }
+        super.close();
+        closed = true;
+      } 
+
+    }
+
+    @Override
+    public void write(int b) throws IOException {
+      backupStream.write(b);
+    }
+
+    @Override
+    public void write(byte[] b, int off, int len) throws IOException {
+      backupStream.write(b, off, len);
+    }
+    
+    
+  }
+  
+  private URI uri;
+  private NativeFileSystemStore store;
+  private Path workingDir;
+  
+  public NativeS3FileSystem() {
+    // set store in initialize()
+  }
+  
+  public NativeS3FileSystem(NativeFileSystemStore store) {
+    this.store = store;
+  }
+  
+  @Override
+  public void initialize(URI uri, Configuration conf) throws IOException {
+    super.initialize(uri, conf);
+    if (store == null) {
+      store = createDefaultStore(conf);
+    }
+    store.initialize(uri, conf);
+    setConf(conf);
+    this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
+    this.workingDir =
+      new Path("/user", System.getProperty("user.name")).makeQualified(this);
+  }
+  
+  private static NativeFileSystemStore createDefaultStore(Configuration conf) {
+    NativeFileSystemStore store = new Jets3tNativeFileSystemStore();
+    
+    RetryPolicy basePolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
+        conf.getInt("fs.s3.maxRetries", 4),
+        conf.getLong("fs.s3.sleepTimeSeconds", 10), TimeUnit.SECONDS);
+    Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
+      new HashMap<Class<? extends Exception>, RetryPolicy>();
+    exceptionToPolicyMap.put(IOException.class, basePolicy);
+    exceptionToPolicyMap.put(S3Exception.class, basePolicy);
+    
+    RetryPolicy methodPolicy = RetryPolicies.retryByException(
+        RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
+    Map<String, RetryPolicy> methodNameToPolicyMap =
+      new HashMap<String, RetryPolicy>();
+    methodNameToPolicyMap.put("storeFile", methodPolicy);
+    
+    return (NativeFileSystemStore)
+      RetryProxy.create(NativeFileSystemStore.class, store,
+          methodNameToPolicyMap);
+  }
+  
+  private static String pathToKey(Path path) {
+    if (!path.isAbsolute()) {
+      throw new IllegalArgumentException("Path must be absolute: " + path);
+    }
+    return path.toUri().getPath().substring(1); // remove initial slash
+  }
+  
+  private static Path keyToPath(String key) {
+    return new Path("/" + key);
+  }
+  
+  private Path makeAbsolute(Path path) {
+    if (path.isAbsolute()) {
+      return path;
+    }
+    return new Path(workingDir, path);
+  }
+
+  /** This optional operation is not yet supported. */
+  public FSDataOutputStream append(Path f, int bufferSize,
+      Progressable progress) throws IOException {
+    throw new IOException("Not supported");
+  }
+  
+  @Override
+  public FSDataOutputStream create(Path f, FsPermission permission,
+      boolean overwrite, int bufferSize, short replication, long blockSize,
+      Progressable progress) throws IOException {
+
+    if (exists(f) && !overwrite) {
+      throw new IOException("File already exists:"+f);
+    }
+    Path absolutePath = makeAbsolute(f);
+    String key = pathToKey(absolutePath);
+    return new FSDataOutputStream(new NativeS3FsOutputStream(getConf(), store,
+        key, progress, bufferSize), statistics);
+  }
+  
+  @Override
+  public boolean delete(Path f, boolean recursive) throws IOException {
+    FileStatus status;
+    try {
+      status = getFileStatus(f);
+    } catch (FileNotFoundException e) {
+      return false;
+    }
+    Path absolutePath = makeAbsolute(f);
+    String key = pathToKey(absolutePath);
+    if (status.isDir()) {
+      FileStatus[] contents = listStatus(f);
+      if (!recursive && contents.length > 0) {
+        throw new IOException("Directory " + f.toString() + " is not empty.");
+      }
+      for (FileStatus p : contents) {
+        if (!delete(p.getPath(), recursive)) {
+          return false;
+        }
+      }
+      store.delete(key + FOLDER_SUFFIX);
+    } else {
+      store.delete(key);
+    }
+    return true;
+  }
+
+  @Override
+  public FileStatus getFileStatus(Path f) throws IOException {
+    
+    Path absolutePath = makeAbsolute(f);
+    String key = pathToKey(absolutePath);
+    
+    if (key.length() == 0) { // root always exists
+      return newDirectory(absolutePath);
+    }
+    
+    FileMetadata meta = store.retrieveMetadata(key);
+    if (meta != null) {
+      return newFile(meta, absolutePath);
+    }
+    if (store.retrieveMetadata(key + FOLDER_SUFFIX) != null) {
+      return newDirectory(absolutePath);
+    }
+    
+    PartialListing listing = store.list(key, 1);
+    if (listing.getFiles().length > 0 ||
+        listing.getCommonPrefixes().length > 0) {
+      return newDirectory(absolutePath);
+    }
+    
+    throw new FileNotFoundException(absolutePath +
+        ": No such file or directory.");
+    
+  }
+
+  @Override
+  public URI getUri() {
+    return uri;
+  }
+
+  /**
+   * <p>
+   * If <code>f</code> is a file, this method will make a single call to S3.
+   * If <code>f</code> is a directory, this method will make a maximum of
+   * (<i>n</i> / 1000) + 2 calls to S3, where <i>n</i> is the total number of
+   * files and directories contained directly in <code>f</code>.
+   * </p>
+   */
+  @Override
+  public FileStatus[] listStatus(Path f) throws IOException {
+
+    Path absolutePath = makeAbsolute(f);
+    String key = pathToKey(absolutePath);
+    
+    if (key.length() > 0) {
+      FileMetadata meta = store.retrieveMetadata(key);
+      if (meta != null) {
+        return new FileStatus[] { newFile(meta, absolutePath) };
+      }
+    }
+    
+    URI pathUri = absolutePath.toUri();
+    Set<FileStatus> status = new TreeSet<FileStatus>();
+    String priorLastKey = null;
+    do {
+      PartialListing listing = store.list(key, S3_MAX_LISTING_LENGTH, 
+          priorLastKey);
+      for (FileMetadata fileMetadata : listing.getFiles()) {
+        Path subpath = keyToPath(fileMetadata.getKey());
+        String relativePath = pathUri.relativize(subpath.toUri()).getPath();
+        if (relativePath.endsWith(FOLDER_SUFFIX)) {
+          status.add(newDirectory(new Path(absolutePath,
+              relativePath.substring(0,
+                  relativePath.indexOf(FOLDER_SUFFIX)))));
+        } else {
+          status.add(newFile(fileMetadata, subpath));
+        }
+      }
+      for (String commonPrefix : listing.getCommonPrefixes()) {
+        Path subpath = keyToPath(commonPrefix);
+        String relativePath = pathUri.relativize(subpath.toUri()).getPath();
+        status.add(newDirectory(new Path(absolutePath, relativePath)));
+      }
+      priorLastKey = listing.getPriorLastKey();
+    } while (priorLastKey != null);
+    
+    if (status.isEmpty() &&
+        store.retrieveMetadata(key + FOLDER_SUFFIX) == null) {
+      return null;
+    }
+    
+    return status.toArray(new FileStatus[0]);
+  }
+  
+  private FileStatus newFile(FileMetadata meta, Path path) {
+    return new FileStatus(meta.getLength(), false, 1, MAX_S3_FILE_SIZE,
+        meta.getLastModified(), path.makeQualified(this));
+  }
+  
+  private FileStatus newDirectory(Path path) {
+    return new FileStatus(0, true, 1, MAX_S3_FILE_SIZE, 0,
+        path.makeQualified(this));
+  }
+
+  @Override
+  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
+    Path absolutePath = makeAbsolute(f);
+    List<Path> paths = new ArrayList<Path>();
+    do {
+      paths.add(0, absolutePath);
+      absolutePath = absolutePath.getParent();
+    } while (absolutePath != null);
+    
+    boolean result = true;
+    for (Path path : paths) {
+      result &= mkdir(path);
+    }
+    return result;
+  }
+  
+  private boolean mkdir(Path f) throws IOException {
+    try {
+      FileStatus fileStatus = getFileStatus(f);
+      if (!fileStatus.isDir()) {
+        throw new IOException(String.format(
+            "Can't make directory for path %s since it is a file.", f));
+
+      }
+    } catch (FileNotFoundException e) {
+      String key = pathToKey(f) + FOLDER_SUFFIX;
+      store.storeEmptyFile(key);    
+    }
+    return true;
+  }
+
+  @Override
+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    if (!exists(f)) {
+      throw new FileNotFoundException(f.toString());
+    }
+    Path absolutePath = makeAbsolute(f);
+    String key = pathToKey(absolutePath);
+    return new FSDataInputStream(new BufferedFSInputStream(
+        new NativeS3FsInputStream(store.retrieve(key), key), bufferSize));
+  }
+  
+  // rename() and delete() use this method to ensure that the parent directory
+  // of the source does not vanish.
+  private void createParent(Path path) throws IOException {
+      Path parent = path.getParent();
+      if (parent != null) {
+          String key = pathToKey(makeAbsolute(parent));
+          if (key.length() > 0) {
+              store.storeEmptyFile(key + FOLDER_SUFFIX);
+          }
+      }
+  }
+  
+  private boolean existsAndIsFile(Path f) throws IOException {
+    
+    Path absolutePath = makeAbsolute(f);
+    String key = pathToKey(absolutePath);
+    
+    if (key.length() == 0) {
+        return false;
+    }
+    
+    FileMetadata meta = store.retrieveMetadata(key);
+    if (meta != null) {
+        // S3 object with given key exists, so this is a file
+        return true;
+    }
+    
+    if (store.retrieveMetadata(key + FOLDER_SUFFIX) != null) {
+        // Signifies empty directory
+        return false;
+    }
+    
+    PartialListing listing = store.list(key, 1, null);
+    if (listing.getFiles().length > 0 ||
+        listing.getCommonPrefixes().length > 0) {
+        // Non-empty directory
+        return false;
+    }
+    
+    throw new FileNotFoundException(absolutePath +
+        ": No such file or directory");
+}
+
+
+  @Override
+  public boolean rename(Path src, Path dst) throws IOException {
+
+    String srcKey = pathToKey(makeAbsolute(src));
+
+    if (srcKey.length() == 0) {
+      // Cannot rename root of file system
+      return false;
+    }
+
+    // Figure out the final destination
+    String dstKey;
+    try {
+      boolean dstIsFile = existsAndIsFile(dst);
+      if (dstIsFile) {
+        // Attempting to overwrite a file using rename()
+        return false;
+      } else {
+        // Move to within the existent directory
+        dstKey = pathToKey(makeAbsolute(new Path(dst, src.getName())));
+      }
+    } catch (FileNotFoundException e) {
+      // dst doesn't exist, so we can proceed
+      dstKey = pathToKey(makeAbsolute(dst));
+      try {
+        if (!getFileStatus(dst.getParent()).isDir()) {
+          return false; // parent dst is a file
+        }
+      } catch (FileNotFoundException ex) {
+        return false; // parent dst does not exist
+      }
+    }
+
+    try {
+      boolean srcIsFile = existsAndIsFile(src);
+      if (srcIsFile) {
+        store.rename(srcKey, dstKey);
+      } else {
+        // Move the folder object
+        store.delete(srcKey + FOLDER_SUFFIX);
+        store.storeEmptyFile(dstKey + FOLDER_SUFFIX);
+
+        // Move everything inside the folder
+        String priorLastKey = null;
+        do {
+          PartialListing listing = store.listAll(srcKey, S3_MAX_LISTING_LENGTH,
+              priorLastKey);
+          for (FileMetadata file : listing.getFiles()) {
+            store.rename(file.getKey(), dstKey
+                + file.getKey().substring(srcKey.length()));
+          }
+          priorLastKey = listing.getPriorLastKey();
+        } while (priorLastKey != null);
+      }
+
+      createParent(src);
+      return true;
+
+    } catch (FileNotFoundException e) {
+      // Source file does not exist;
+      return false;
+    }
+  }
+
+
+  /**
+   * Set the working directory to the given directory.
+   */
+  @Override
+  public void setWorkingDirectory(Path newDir) {
+    workingDir = newDir;
+  }
+  
+  @Override
+  public Path getWorkingDirectory() {
+    return workingDir;
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/fs/s3native/PartialListing.java b/src/java/org/apache/hadoop/fs/s3native/PartialListing.java
new file mode 100644
index 00000000000..899758660d2
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3native/PartialListing.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3native;
+
+/**
+ * <p>
+ * Holds information on a directory listing for a
+ * {@link NativeFileSystemStore}.
+ * This includes the {@link FileMetadata files} and directories
+ * (their names) contained in a directory.
+ * </p>
+ * <p>
+ * This listing may be returned in chunks, so a <code>priorLastKey</code>
+ * is provided so that the next chunk may be requested.
+ * </p>
+ * @see NativeFileSystemStore#list(String, int, String)
+ */
+class PartialListing {
+  
+  private final String priorLastKey;
+  private final FileMetadata[] files;
+  private final String[] commonPrefixes;
+  
+  public PartialListing(String priorLastKey, FileMetadata[] files,
+      String[] commonPrefixes) {
+    this.priorLastKey = priorLastKey;
+    this.files = files;
+    this.commonPrefixes = commonPrefixes;
+  }
+
+  public FileMetadata[] getFiles() {
+    return files;
+  }
+
+  public String[] getCommonPrefixes() {
+    return commonPrefixes;
+  }
+
+  public String getPriorLastKey() {
+    return priorLastKey;
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/fs/s3native/package.html b/src/java/org/apache/hadoop/fs/s3native/package.html
new file mode 100644
index 00000000000..24b9b1df460
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/s3native/package.html
@@ -0,0 +1,32 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+
+<p>
+A distributed implementation of {@link
+org.apache.hadoop.fs.FileSystem} for reading and writing files on
+<a href="http://aws.amazon.com/s3">Amazon S3</a>.
+Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem}, which is block-based,
+this implementation stores
+files on S3 in their native form for interoperability with other S3 tools.
+</p>
+
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/fs/shell/Command.java b/src/java/org/apache/hadoop/fs/shell/Command.java
new file mode 100644
index 00000000000..06883a2086f
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/shell/Command.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.shell;
+
+import java.io.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ipc.RemoteException;
+
+/**
+ * An abstract class for the execution of a file system command
+ */
+abstract public class Command extends Configured {
+  protected String[] args;
+  
+  /** Constructor */
+  protected Command(Configuration conf) {
+    super(conf);
+  }
+  
+  /** Return the command's name excluding the leading character - */
+  abstract public String getCommandName();
+  
+  /** 
+   * Execute the command on the input path
+   * 
+   * @param path the input path
+   * @throws IOException if any error occurs
+   */
+  abstract protected void run(Path path) throws IOException;
+  
+  /** 
+   * For each source path, execute the command
+   * 
+   * @return 0 if it runs successfully; -1 if it fails
+   */
+  public int runAll() {
+    int exitCode = 0;
+    for (String src : args) {
+      try {
+        Path srcPath = new Path(src);
+        FileSystem fs = srcPath.getFileSystem(getConf());
+        FileStatus[] statuses = fs.globStatus(srcPath);
+        if (statuses == null) {
+          System.err.println("Can not find listing for " + src);
+          exitCode = -1;
+        } else {
+          for(FileStatus s : statuses) {
+            run(s.getPath());
+          }
+        }
+      } catch (RemoteException re) {
+        exitCode = -1;
+        String content = re.getLocalizedMessage();
+        int eol = content.indexOf('\n');
+        if (eol>=0) {
+          content = content.substring(0, eol);
+        }
+        System.err.println(getCommandName() + ": " + content);
+      } catch (IOException e) {
+        exitCode = -1;
+        System.err.println(getCommandName() + ": " + e.getLocalizedMessage());
+      }
+    }
+    return exitCode;
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/shell/CommandFormat.java b/src/java/org/apache/hadoop/fs/shell/CommandFormat.java
new file mode 100644
index 00000000000..c1d84d3670c
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/shell/CommandFormat.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.shell;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Parse the args of a command and check the format of args.
+ */
+public class CommandFormat {
+  final String name;
+  final int minPar, maxPar;
+  final Map<String, Boolean> options = new HashMap<String, Boolean>();
+
+  /** constructor */
+  public CommandFormat(String n, int min, int max, String ... possibleOpt) {
+    name = n;
+    minPar = min;
+    maxPar = max;
+    for(String opt : possibleOpt)
+      options.put(opt, Boolean.FALSE);
+  }
+
+  /** Parse parameters starting from the given position
+   * 
+   * @param args an array of input arguments
+   * @param pos the position at which starts to parse
+   * @return a list of parameters
+   */
+  public List<String> parse(String[] args, int pos) {
+    List<String> parameters = new ArrayList<String>();
+    for(; pos < args.length; pos++) {
+      if (args[pos].charAt(0) == '-' && args[pos].length() > 1) {
+        String opt = args[pos].substring(1);
+        if (options.containsKey(opt))
+          options.put(opt, Boolean.TRUE);
+        else
+          throw new IllegalArgumentException("Illegal option " + args[pos]);
+      }
+      else
+        parameters.add(args[pos]);
+    }
+    int psize = parameters.size();
+    if (psize < minPar || psize > maxPar)
+      throw new IllegalArgumentException("Illegal number of arguments");
+    return parameters;
+  }
+  
+  /** Return if the option is set or not
+   * 
+   * @param option String representation of an option
+   * @return true is the option is set; false otherwise
+   */
+  public boolean getOpt(String option) {
+    return options.get(option);
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/shell/CommandUtils.java b/src/java/org/apache/hadoop/fs/shell/CommandUtils.java
new file mode 100644
index 00000000000..2a1317ee6c0
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/shell/CommandUtils.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.shell;
+
+final class CommandUtils {
+  static String formatDescription(String usage, String... desciptions) {
+    StringBuilder b = new StringBuilder(usage + ": " + desciptions[0]);
+    for(int i = 1; i < desciptions.length; i++) {
+      b.append("\n\t\t" + desciptions[i]);
+    }
+    return b.toString();
+  }
+}
diff --git a/src/java/org/apache/hadoop/fs/shell/Count.java b/src/java/org/apache/hadoop/fs/shell/Count.java
new file mode 100644
index 00000000000..abacb2a690d
--- /dev/null
+++ b/src/java/org/apache/hadoop/fs/shell/Count.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.shell;
+
+import java.io.*;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Count the number of directories, files, bytes, quota, and remaining quota.
+ */
+public class Count extends Command {
+  public static final String NAME = "count";
+  public static final String USAGE = "-" + NAME + "[-q] <path>";
+  public static final String DESCRIPTION = CommandUtils.formatDescription(USAGE, 
+      "Count the number of directories, files and bytes under the paths",
+      "that match the specified file pattern.  The output columns are:",
+      "DIR_COUNT FILE_COUNT CONTENT_SIZE FILE_NAME or",
+      "QUOTA REMAINING_QUATA SPACE_QUOTA REMAINING_SPACE_QUOTA ",
+      "      DIR_COUNT FILE_COUNT CONTENT_SIZE FILE_NAME");
+  
+  private boolean qOption;
+
+  /** Constructor
+   * 
+   * @param cmd the count command
+   * @param pos the starting index of the arguments 
+   */
+  public Count(String[] cmd, int pos, Configuration conf) {
+    super(conf);
+    CommandFormat c = new CommandFormat(NAME, 1, Integer.MAX_VALUE, "q");
+    List<String> parameters = c.parse(cmd, pos);
+    this.args = parameters.toArray(new String[parameters.size()]);
+    if (this.args.length == 0) { // default path is the current working directory
+      this.args = new String[] {"."};
+    }
+    this.qOption = c.getOpt("q") ? true: false;
+  }
+  
+  /** Check if a command is the count command
+   * 
+   * @param cmd A string representation of a command starting with "-"
+   * @return true if this is a count command; false otherwise
+   */
+  public static boolean matches(String cmd) {
+    return ("-" + NAME).equals(cmd); 
+  }
+
+  @Override
+  public String getCommandName() {
+    return NAME;
+  }
+
+  @Override
+  protected void run(Path path) throws IOException {
+    FileSystem fs = path.getFileSystem(getConf());
+    System.out.println(fs.getContentSummary(path).toString(qOption) + path);
+  }
+}
diff --git a/src/java/org/apache/hadoop/http/FilterContainer.java b/src/java/org/apache/hadoop/http/FilterContainer.java
new file mode 100644
index 00000000000..40557c08d7a
--- /dev/null
+++ b/src/java/org/apache/hadoop/http/FilterContainer.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.http;
+
+import java.util.Map;
+
+/**
+ * A container class for javax.servlet.Filter. 
+ */
+public interface FilterContainer {
+  /**
+   * Add a filter to the container.
+   * @param name Filter name
+   * @param classname Filter class name
+   * @param parameters a map from parameter names to initial values
+   */
+  void addFilter(String name, String classname, Map<String, String> parameters);
+  /**
+   * Add a global filter to the container.
+   * @param name filter name
+   * @param classname filter class name
+   * @param parameters a map from parameter names to initial values
+   */
+  void addGlobalFilter(String name, String classname, Map<String, String> parameters);
+}
diff --git a/src/java/org/apache/hadoop/http/FilterInitializer.java b/src/java/org/apache/hadoop/http/FilterInitializer.java
new file mode 100644
index 00000000000..3f4765e29be
--- /dev/null
+++ b/src/java/org/apache/hadoop/http/FilterInitializer.java
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.http;
+
+/**
+ * Initialize a javax.servlet.Filter. 
+ */
+public abstract class FilterInitializer {
+  /**
+   * Initialize a Filter to a FilterContainer.
+   * @param container The filter container
+   */
+  abstract void initFilter(FilterContainer container);
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/http/HttpServer.java b/src/java/org/apache/hadoop/http/HttpServer.java
new file mode 100644
index 00000000000..a739ba69ace
--- /dev/null
+++ b/src/java/org/apache/hadoop/http/HttpServer.java
@@ -0,0 +1,519 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.http;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.BindException;
+import java.net.InetSocketAddress;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.nio.channels.ServerSocketChannel;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.log.LogLevel;
+import org.apache.hadoop.metrics.MetricsServlet;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import org.mortbay.jetty.Connector;
+import org.mortbay.jetty.Handler;
+import org.mortbay.jetty.Server;
+import org.mortbay.jetty.handler.ContextHandlerCollection;
+import org.mortbay.jetty.nio.SelectChannelConnector;
+import org.mortbay.jetty.security.SslSocketConnector;
+import org.mortbay.jetty.servlet.Context;
+import org.mortbay.jetty.servlet.DefaultServlet;
+import org.mortbay.jetty.servlet.FilterHolder;
+import org.mortbay.jetty.servlet.FilterMapping;
+import org.mortbay.jetty.servlet.ServletHandler;
+import org.mortbay.jetty.servlet.ServletHolder;
+import org.mortbay.jetty.webapp.WebAppContext;
+import org.mortbay.thread.QueuedThreadPool;
+import org.mortbay.util.MultiException;
+
+/**
+ * Create a Jetty embedded server to answer http requests. The primary goal
+ * is to serve up status information for the server.
+ * There are three contexts:
+ *   "/logs/" -> points to the log directory
+ *   "/static/" -> points to common static files (src/webapps/static)
+ *   "/" -> the jsp server code from (src/webapps/<name>)
+ */
+public class HttpServer implements FilterContainer {
+  public static final Log LOG = LogFactory.getLog(HttpServer.class);
+
+  static final String FILTER_INITIALIZER_PROPERTY
+      = "hadoop.http.filter.initializers";
+
+  protected final Server webServer;
+  protected final Connector listener;
+  protected final WebAppContext webAppContext;
+  protected final boolean findPort;
+  protected final Map<Context, Boolean> defaultContexts =
+      new HashMap<Context, Boolean>();
+  protected final List<String> filterNames = new ArrayList<String>();
+  private static final int MAX_RETRIES = 10;
+
+  /** Same as this(name, bindAddress, port, findPort, null); */
+  public HttpServer(String name, String bindAddress, int port, boolean findPort
+      ) throws IOException {
+    this(name, bindAddress, port, findPort, new Configuration());
+  }
+
+  /**
+   * Create a status server on the given port.
+   * The jsp scripts are taken from src/webapps/<name>.
+   * @param name The name of the server
+   * @param port The port to use on the server
+   * @param findPort whether the server should start at the given port and 
+   *        increment by 1 until it finds a free port.
+   * @param conf Configuration 
+   */
+  public HttpServer(String name, String bindAddress, int port,
+      boolean findPort, Configuration conf) throws IOException {
+    webServer = new Server();
+    this.findPort = findPort;
+
+    listener = createBaseListener(conf);
+    listener.setHost(bindAddress);
+    listener.setPort(port);
+    webServer.addConnector(listener);
+
+    webServer.setThreadPool(new QueuedThreadPool());
+
+    final String appDir = getWebAppsPath();
+    ContextHandlerCollection contexts = new ContextHandlerCollection();
+    webServer.setHandler(contexts);
+
+    webAppContext = new WebAppContext();
+    webAppContext.setContextPath("/");
+    webAppContext.setWar(appDir + "/" + name);
+    webServer.addHandler(webAppContext);
+
+    addDefaultApps(contexts, appDir);
+
+    final FilterInitializer[] initializers = getFilterInitializers(conf); 
+    if (initializers != null) {
+      for(FilterInitializer c : initializers) {
+        c.initFilter(this);
+      }
+    }
+    addDefaultServlets();
+  }
+
+  /**
+   * Create a required listener for the Jetty instance listening on the port
+   * provided. This wrapper and all subclasses must create at least one
+   * listener.
+   */
+  protected Connector createBaseListener(Configuration conf)
+      throws IOException {
+    SelectChannelConnector ret = new SelectChannelConnector();
+    ret.setLowResourceMaxIdleTime(10000);
+    ret.setAcceptQueueSize(128);
+    ret.setResolveNames(false);
+    ret.setUseDirectBuffers(false);
+    return ret;
+  }
+
+  /** Get an array of FilterConfiguration specified in the conf */
+  private static FilterInitializer[] getFilterInitializers(Configuration conf) {
+    if (conf == null) {
+      return null;
+    }
+
+    Class<?>[] classes = conf.getClasses(FILTER_INITIALIZER_PROPERTY);
+    if (classes == null) {
+      return null;
+    }
+
+    FilterInitializer[] initializers = new FilterInitializer[classes.length];
+    for(int i = 0; i < classes.length; i++) {
+      initializers[i] = (FilterInitializer)ReflectionUtils.newInstance(
+          classes[i], conf);
+    }
+    return initializers;
+  }
+
+  /**
+   * Add default apps.
+   * @param appDir The application directory
+   * @throws IOException
+   */
+  protected void addDefaultApps(ContextHandlerCollection parent,
+      final String appDir) throws IOException {
+    // set up the context for "/logs/" if "hadoop.log.dir" property is defined. 
+    String logDir = System.getProperty("hadoop.log.dir");
+    if (logDir != null) {
+      Context logContext = new Context(parent, "/logs");
+      logContext.setResourceBase(logDir);
+      logContext.addServlet(DefaultServlet.class, "/");
+      defaultContexts.put(logContext, true);
+    }
+    // set up the context for "/static/*"
+    Context staticContext = new Context(parent, "/static");
+    staticContext.setResourceBase(appDir + "/static");
+    staticContext.addServlet(DefaultServlet.class, "/*");
+    defaultContexts.put(staticContext, true);
+  }
+  
+  /**
+   * Add default servlets.
+   */
+  protected void addDefaultServlets() {
+    // set up default servlets
+    addServlet("stacks", "/stacks", StackServlet.class);
+    addServlet("logLevel", "/logLevel", LogLevel.Servlet.class);
+    addServlet("metrics", "/metrics", MetricsServlet.class);
+  }
+
+  public void addContext(Context ctxt, boolean isFiltered)
+      throws IOException {
+    webServer.addHandler(ctxt);
+    defaultContexts.put(ctxt, isFiltered);
+  }
+
+  /**
+   * Add a context 
+   * @param pathSpec The path spec for the context
+   * @param dir The directory containing the context
+   * @param isFiltered if true, the servlet is added to the filter path mapping 
+   * @throws IOException
+   */
+  protected void addContext(String pathSpec, String dir, boolean isFiltered) throws IOException {
+    if (0 == webServer.getHandlers().length) {
+      throw new RuntimeException("Couldn't find handler");
+    }
+    WebAppContext webAppCtx = new WebAppContext();
+    webAppCtx.setContextPath(pathSpec);
+    webAppCtx.setWar(dir);
+    addContext(webAppCtx, true);
+  }
+
+  /**
+   * Set a value in the webapp context. These values are available to the jsp
+   * pages as "application.getAttribute(name)".
+   * @param name The name of the attribute
+   * @param value The value of the attribute
+   */
+  public void setAttribute(String name, Object value) {
+    webAppContext.setAttribute(name, value);
+  }
+
+  /**
+   * Add a servlet in the server.
+   * @param name The name of the servlet (can be passed as null)
+   * @param pathSpec The path spec for the servlet
+   * @param clazz The servlet class
+   */
+  public void addServlet(String name, String pathSpec,
+      Class<? extends HttpServlet> clazz) {
+    addInternalServlet(name, pathSpec, clazz);
+    addFilterPathMapping(pathSpec, webAppContext);
+  }
+
+  /**
+   * Add an internal servlet in the server.
+   * @param name The name of the servlet (can be passed as null)
+   * @param pathSpec The path spec for the servlet
+   * @param clazz The servlet class
+   * @deprecated this is a temporary method
+   */
+  @Deprecated
+  public void addInternalServlet(String name, String pathSpec,
+      Class<? extends HttpServlet> clazz) {
+    ServletHolder holder = new ServletHolder(clazz);
+    if (name != null) {
+      holder.setName(name);
+    }
+    webAppContext.addServlet(holder, pathSpec);
+  }
+
+  /** {@inheritDoc} */
+  public void addFilter(String name, String classname,
+      Map<String, String> parameters) {
+
+    final String[] USER_FACING_URLS = { "*.html", "*.jsp" };
+    defineFilter(webAppContext, name, classname, parameters, USER_FACING_URLS);
+    final String[] ALL_URLS = { "/*" };
+    for (Map.Entry<Context, Boolean> e : defaultContexts.entrySet()) {
+      if (e.getValue()) {
+        Context ctx = e.getKey();
+        defineFilter(ctx, name, classname, parameters, ALL_URLS);
+        LOG.info("Added filter " + name + " (class=" + classname
+            + ") to context " + ctx.getDisplayName());
+      }
+    }
+    filterNames.add(name);
+  }
+
+  /** {@inheritDoc} */
+  public void addGlobalFilter(String name, String classname,
+      Map<String, String> parameters) {
+    final String[] ALL_URLS = { "/*" };
+    defineFilter(webAppContext, name, classname, parameters, ALL_URLS);
+    for (Context ctx : defaultContexts.keySet()) {
+      defineFilter(ctx, name, classname, parameters, ALL_URLS);
+    }
+    LOG.info("Added global filter" + name + " (class=" + classname + ")");
+  }
+
+  /**
+   * Define a filter for a context and set up default url mappings.
+   */
+  protected void defineFilter(Context ctx, String name,
+      String classname, Map<String,String> parameters, String[] urls) {
+
+    FilterHolder holder = new FilterHolder();
+    holder.setName(name);
+    holder.setClassName(classname);
+    holder.setInitParameters(parameters);
+    FilterMapping fmap = new FilterMapping();
+    fmap.setPathSpecs(urls);
+    fmap.setDispatches(Handler.ALL);
+    fmap.setFilterName(name);
+    ServletHandler handler = ctx.getServletHandler();
+    handler.addFilter(holder, fmap);
+  }
+
+  /**
+   * Add the path spec to the filter path mapping.
+   * @param pathSpec The path spec
+   * @param webAppCtx The WebApplicationContext to add to
+   */
+  protected void addFilterPathMapping(String pathSpec,
+      Context webAppCtx) {
+    ServletHandler handler = webAppCtx.getServletHandler();
+    for(String name : filterNames) {
+      FilterMapping fmap = new FilterMapping();
+      fmap.setPathSpec(pathSpec);
+      fmap.setFilterName(name);
+      fmap.setDispatches(Handler.ALL);
+      handler.addFilterMapping(fmap);
+    }
+  }
+  
+  /**
+   * Get the value in the webapp context.
+   * @param name The name of the attribute
+   * @return The value of the attribute
+   */
+  public Object getAttribute(String name) {
+    return webAppContext.getAttribute(name);
+  }
+
+  /**
+   * Get the pathname to the webapps files.
+   * @return the pathname as a URL
+   * @throws IOException if 'webapps' directory cannot be found on CLASSPATH.
+   */
+  protected String getWebAppsPath() throws IOException {
+    URL url = getClass().getClassLoader().getResource("webapps");
+    if (url == null) 
+      throw new IOException("webapps not found in CLASSPATH"); 
+    return url.toString();
+  }
+
+  /**
+   * Get the port that the server is on
+   * @return the port
+   */
+  public int getPort() {
+    return webServer.getConnectors()[0].getLocalPort();
+  }
+
+  /**
+   * Set the min, max number of worker threads (simultaneous connections).
+   */
+  public void setThreads(int min, int max) {
+    QueuedThreadPool pool = (QueuedThreadPool) webServer.getThreadPool() ;
+    pool.setMinThreads(min);
+    pool.setMaxThreads(max);
+  }
+
+  /**
+   * Configure an ssl listener on the server.
+   * @param addr address to listen on
+   * @param keystore location of the keystore
+   * @param storPass password for the keystore
+   * @param keyPass password for the key
+   * @deprecated Use {@link #addSslListener(InetSocketAddress, Configuration, boolean)}
+   */
+  @Deprecated
+  public void addSslListener(InetSocketAddress addr, String keystore,
+      String storPass, String keyPass) throws IOException {
+    if (webServer.isStarted()) {
+      throw new IOException("Failed to add ssl listener");
+    }
+    SslSocketConnector sslListener = new SslSocketConnector();
+    sslListener.setHost(addr.getHostName());
+    sslListener.setPort(addr.getPort());
+    sslListener.setKeystore(keystore);
+    sslListener.setPassword(storPass);
+    sslListener.setKeyPassword(keyPass);
+    webServer.addConnector(sslListener);
+  }
+
+  /**
+   * Configure an ssl listener on the server.
+   * @param addr address to listen on
+   * @param sslConf conf to retrieve ssl options
+   * @param needClientAuth whether client authentication is required
+   */
+  public void addSslListener(InetSocketAddress addr, Configuration sslConf,
+      boolean needClientAuth) throws IOException {
+    if (webServer.isStarted()) {
+      throw new IOException("Failed to add ssl listener");
+    }
+    if (needClientAuth) {
+      // setting up SSL truststore for authenticating clients
+      System.setProperty("javax.net.ssl.trustStore", sslConf.get(
+          "ssl.server.truststore.location", ""));
+      System.setProperty("javax.net.ssl.trustStorePassword", sslConf.get(
+          "ssl.server.truststore.password", ""));
+      System.setProperty("javax.net.ssl.trustStoreType", sslConf.get(
+          "ssl.server.truststore.type", "jks"));
+    }
+    SslSocketConnector sslListener = new SslSocketConnector();
+    sslListener.setHost(addr.getHostName());
+    sslListener.setPort(addr.getPort());
+    sslListener.setKeystore(sslConf.get("ssl.server.keystore.location"));
+    sslListener.setPassword(sslConf.get("ssl.server.keystore.password", ""));
+    sslListener.setKeyPassword(sslConf.get("ssl.server.keystore.keypassword", ""));
+    sslListener.setKeystoreType(sslConf.get("ssl.server.keystore.type", "jks"));
+    sslListener.setNeedClientAuth(needClientAuth);
+    webServer.addConnector(sslListener);
+  }
+
+  /**
+   * Start the server. Does not wait for the server to start.
+   */
+  public void start() throws IOException {
+    try {
+      int port = 0;
+      int oriPort = listener.getPort(); // The original requested port
+      while (true) {
+        try {
+          port = webServer.getConnectors()[0].getLocalPort();
+          LOG.info("Port returned by webServer.getConnectors()[0]." +
+          		"getLocalPort() before open() is "+ port + 
+          		". Opening the listener on " + oriPort);
+          listener.open();
+          port = listener.getLocalPort();
+          LOG.info("listener.getLocalPort() returned " + listener.getLocalPort() + 
+                " webServer.getConnectors()[0].getLocalPort() returned " +
+                webServer.getConnectors()[0].getLocalPort());
+          //Workaround to handle the problem reported in HADOOP-4744
+          if (port < 0) {
+            Thread.sleep(100);
+            int numRetries = 1;
+            while (port < 0) {
+              LOG.warn("listener.getLocalPort returned " + port);
+              if (numRetries++ > MAX_RETRIES) {
+                throw new Exception(" listener.getLocalPort is returning " +
+                		"less than 0 even after " +numRetries+" resets");
+              }
+              for (int i = 0; i < 2; i++) {
+                LOG.info("Retrying listener.getLocalPort()");
+                port = listener.getLocalPort();
+                if (port > 0) {
+                  break;
+                }
+                Thread.sleep(200);
+              }
+              if (port > 0) {
+                break;
+              }
+              LOG.info("Bouncing the listener");
+              listener.close();
+              Thread.sleep(1000);
+              listener.setPort(oriPort == 0 ? 0 : (oriPort += 1));
+              listener.open();
+              Thread.sleep(100);
+              port = listener.getLocalPort();
+            }
+          } //Workaround end
+          LOG.info("Jetty bound to port " + port);
+          webServer.start();
+          break;
+        } catch (IOException ex) {
+          // if this is a bind exception,
+          // then try the next port number.
+          if (ex instanceof BindException) {
+            if (!findPort) {
+              throw (BindException) ex;
+            }
+          } else {
+            LOG.info("HttpServer.start() threw a non Bind IOException"); 
+            throw ex;
+          }
+        } catch (MultiException ex) {
+          LOG.info("HttpServer.start() threw a MultiException"); 
+          throw ex;
+        }
+        listener.setPort((oriPort += 1));
+      }
+    } catch (IOException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new IOException("Problem starting http server", e);
+    }
+  }
+
+  /**
+   * stop the server
+   */
+  public void stop() throws Exception {
+    listener.close();
+    webServer.stop();
+  }
+
+  public void join() throws InterruptedException {
+    webServer.join();
+  }
+
+  /**
+   * A very simple servlet to serve up a text representation of the current
+   * stack traces. It both returns the stacks to the caller and logs them.
+   * Currently the stack traces are done sequentially rather than exactly the
+   * same data.
+   */
+  public static class StackServlet extends HttpServlet {
+    private static final long serialVersionUID = -6284183679759467039L;
+
+    @Override
+    public void doGet(HttpServletRequest request, HttpServletResponse response)
+      throws ServletException, IOException {
+      
+      PrintWriter out = new PrintWriter(response.getOutputStream());
+      ReflectionUtils.printThreadInfo(out, "");
+      out.close();
+      ReflectionUtils.logThreadInfo(LOG, "jsp requested", 1);      
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/AbstractMapWritable.java b/src/java/org/apache/hadoop/io/AbstractMapWritable.java
new file mode 100644
index 00000000000..5829d4f1111
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/AbstractMapWritable.java
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Abstract base class for MapWritable and SortedMapWritable
+ * 
+ * Unlike org.apache.nutch.crawl.MapWritable, this class allows creation of
+ * MapWritable&lt;Writable, MapWritable&gt; so the CLASS_TO_ID and ID_TO_CLASS
+ * maps travel with the class instead of being static.
+ * 
+ * Class ids range from 1 to 127 so there can be at most 127 distinct classes
+ * in any specific map instance.
+ */
+public abstract class AbstractMapWritable implements Writable, Configurable {
+  private AtomicReference<Configuration> conf;
+  
+  /* Class to id mappings */
+  private Map<Class, Byte> classToIdMap = new ConcurrentHashMap<Class, Byte>();
+  
+  /* Id to Class mappings */
+  private Map<Byte, Class> idToClassMap = new ConcurrentHashMap<Byte, Class>();
+  
+  /* The number of new classes (those not established by the constructor) */
+  private volatile byte newClasses = 0;
+  
+  /** @return the number of known classes */
+  byte getNewClasses() {
+    return newClasses;
+  }
+
+  /**
+   * Used to add "predefined" classes and by Writable to copy "new" classes.
+   */
+  private synchronized void addToMap(Class clazz, byte id) {
+    if (classToIdMap.containsKey(clazz)) {
+      byte b = classToIdMap.get(clazz);
+      if (b != id) {
+        throw new IllegalArgumentException ("Class " + clazz.getName() +
+          " already registered but maps to " + b + " and not " + id);
+      }
+    }
+    if (idToClassMap.containsKey(id)) {
+      Class c = idToClassMap.get(id);
+      if (!c.equals(clazz)) {
+        throw new IllegalArgumentException("Id " + id + " exists but maps to " +
+            c.getName() + " and not " + clazz.getName());
+      }
+    }
+    classToIdMap.put(clazz, id);
+    idToClassMap.put(id, clazz);
+  }
+  
+  /** Add a Class to the maps if it is not already present. */ 
+  protected synchronized void addToMap(Class clazz) {
+    if (classToIdMap.containsKey(clazz)) {
+      return;
+    }
+    if (newClasses + 1 > Byte.MAX_VALUE) {
+      throw new IndexOutOfBoundsException("adding an additional class would" +
+      " exceed the maximum number allowed");
+    }
+    byte id = ++newClasses;
+    addToMap(clazz, id);
+  }
+
+  /** @return the Class class for the specified id */
+  protected Class getClass(byte id) {
+    return idToClassMap.get(id);
+  }
+
+  /** @return the id for the specified Class */
+  protected byte getId(Class clazz) {
+    return classToIdMap.containsKey(clazz) ? classToIdMap.get(clazz) : -1;
+  }
+
+  /** Used by child copy constructors. */
+  protected synchronized void copy(Writable other) {
+    if (other != null) {
+      try {
+        DataOutputBuffer out = new DataOutputBuffer();
+        other.write(out);
+        DataInputBuffer in = new DataInputBuffer();
+        in.reset(out.getData(), out.getLength());
+        readFields(in);
+        
+      } catch (IOException e) {
+        throw new IllegalArgumentException("map cannot be copied: " +
+            e.getMessage());
+      }
+      
+    } else {
+      throw new IllegalArgumentException("source map cannot be null");
+    }
+  }
+  
+  /** constructor. */
+  protected AbstractMapWritable() {
+    this.conf = new AtomicReference<Configuration>();
+
+    addToMap(ArrayWritable.class,
+        Byte.valueOf(Integer.valueOf(-127).byteValue())); 
+    addToMap(BooleanWritable.class,
+        Byte.valueOf(Integer.valueOf(-126).byteValue()));
+    addToMap(BytesWritable.class,
+        Byte.valueOf(Integer.valueOf(-125).byteValue()));
+    addToMap(FloatWritable.class,
+        Byte.valueOf(Integer.valueOf(-124).byteValue()));
+    addToMap(IntWritable.class,
+        Byte.valueOf(Integer.valueOf(-123).byteValue()));
+    addToMap(LongWritable.class,
+        Byte.valueOf(Integer.valueOf(-122).byteValue()));
+    addToMap(MapWritable.class,
+        Byte.valueOf(Integer.valueOf(-121).byteValue()));
+    addToMap(MD5Hash.class,
+        Byte.valueOf(Integer.valueOf(-120).byteValue()));
+    addToMap(NullWritable.class,
+        Byte.valueOf(Integer.valueOf(-119).byteValue()));
+    addToMap(ObjectWritable.class,
+        Byte.valueOf(Integer.valueOf(-118).byteValue()));
+    addToMap(SortedMapWritable.class,
+        Byte.valueOf(Integer.valueOf(-117).byteValue()));
+    addToMap(Text.class,
+        Byte.valueOf(Integer.valueOf(-116).byteValue()));
+    addToMap(TwoDArrayWritable.class,
+        Byte.valueOf(Integer.valueOf(-115).byteValue()));
+    
+    // UTF8 is deprecated so we don't support it
+
+    addToMap(VIntWritable.class,
+        Byte.valueOf(Integer.valueOf(-114).byteValue()));
+    addToMap(VLongWritable.class,
+        Byte.valueOf(Integer.valueOf(-113).byteValue()));
+
+  }
+
+  /** @return the conf */
+  public Configuration getConf() {
+    return conf.get();
+  }
+
+  /** @param conf the conf to set */
+  public void setConf(Configuration conf) {
+    this.conf.set(conf);
+  }
+  
+  /** {@inheritDoc} */
+  public void write(DataOutput out) throws IOException {
+    
+    // First write out the size of the class table and any classes that are
+    // "unknown" classes
+    
+    out.writeByte(newClasses);
+
+    for (byte i = 1; i <= newClasses; i++) {
+      out.writeByte(i);
+      out.writeUTF(getClass(i).getName());
+    }
+  }
+  
+  /** {@inheritDoc} */
+  public void readFields(DataInput in) throws IOException {
+    
+    // Get the number of "unknown" classes
+    
+    newClasses = in.readByte();
+    
+    // Then read in the class names and add them to our tables
+    
+    for (int i = 0; i < newClasses; i++) {
+      byte id = in.readByte();
+      String className = in.readUTF();
+      try {
+        addToMap(Class.forName(className), id);
+        
+      } catch (ClassNotFoundException e) {
+        throw new IOException("can't find class: " + className + " because "+
+            e.getMessage());
+      }
+    }
+  }    
+}
diff --git a/src/java/org/apache/hadoop/io/ArrayFile.java b/src/java/org/apache/hadoop/io/ArrayFile.java
new file mode 100644
index 00000000000..dafb6ae600e
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/ArrayFile.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.*;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+
+
+/** A dense file-based mapping from integers to values. */
+public class ArrayFile extends MapFile {
+
+  protected ArrayFile() {}                            // no public ctor
+
+  /** Write a new array file. */
+  public static class Writer extends MapFile.Writer {
+    private LongWritable count = new LongWritable(0);
+
+    /** Create the named file for values of the named class. */
+    public Writer(Configuration conf, FileSystem fs,
+                  String file, Class<? extends Writable> valClass)
+      throws IOException {
+      super(conf, fs, file, LongWritable.class, valClass);
+    }
+
+    /** Create the named file for values of the named class. */
+    public Writer(Configuration conf, FileSystem fs,
+                  String file, Class<? extends Writable> valClass,
+                  CompressionType compress, Progressable progress)
+      throws IOException {
+      super(conf, fs, file, LongWritable.class, valClass, compress, progress);
+    }
+
+    /** Append a value to the file. */
+    public synchronized void append(Writable value) throws IOException {
+      super.append(count, value);                 // add to map
+      count.set(count.get()+1);                   // increment count
+    }
+  }
+
+  /** Provide access to an existing array file. */
+  public static class Reader extends MapFile.Reader {
+    private LongWritable key = new LongWritable();
+
+    /** Construct an array reader for the named file.*/
+    public Reader(FileSystem fs, String file, Configuration conf) throws IOException {
+      super(fs, file, conf);
+    }
+
+    /** Positions the reader before its <code>n</code>th value. */
+    public synchronized void seek(long n) throws IOException {
+      key.set(n);
+      seek(key);
+    }
+
+    /** Read and return the next value in the file. */
+    public synchronized Writable next(Writable value) throws IOException {
+      return next(key, value) ? value : null;
+    }
+
+    /** Returns the key associated with the most recent call to {@link
+     * #seek(long)}, {@link #next(Writable)}, or {@link
+     * #get(long,Writable)}. */
+    public synchronized long key() throws IOException {
+      return key.get();
+    }
+
+    /** Return the <code>n</code>th value in the file. */
+    public synchronized Writable get(long n, Writable value)
+      throws IOException {
+      key.set(n);
+      return get(key, value);
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/ArrayWritable.java b/src/java/org/apache/hadoop/io/ArrayWritable.java
new file mode 100644
index 00000000000..9c6643548a0
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/ArrayWritable.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+import java.lang.reflect.Array;
+
+/** 
+ * A Writable for arrays containing instances of a class. The elements of this
+ * writable must all be instances of the same class. If this writable will be
+ * the input for a Reducer, you will need to create a subclass that sets the
+ * value to be of the proper type.
+ *
+ * For example:
+ * <code>
+ * public class IntArrayWritable extends ArrayWritable {
+ *   public IntArrayWritable() { 
+ *     super(IntWritable.class); 
+ *   }	
+ * }
+ * </code>
+ */
+public class ArrayWritable implements Writable {
+  private Class<? extends Writable> valueClass;
+  private Writable[] values;
+
+  public ArrayWritable(Class<? extends Writable> valueClass) {
+    if (valueClass == null) { 
+      throw new IllegalArgumentException("null valueClass"); 
+    }    
+    this.valueClass = valueClass;
+  }
+
+  public ArrayWritable(Class<? extends Writable> valueClass, Writable[] values) {
+    this(valueClass);
+    this.values = values;
+  }
+
+  public ArrayWritable(String[] strings) {
+    this(UTF8.class, new Writable[strings.length]);
+    for (int i = 0; i < strings.length; i++) {
+      values[i] = new UTF8(strings[i]);
+    }
+  }
+
+  public Class getValueClass() {
+    return valueClass;
+  }
+
+  public String[] toStrings() {
+    String[] strings = new String[values.length];
+    for (int i = 0; i < values.length; i++) {
+      strings[i] = values[i].toString();
+    }
+    return strings;
+  }
+
+  public Object toArray() {
+    Object result = Array.newInstance(valueClass, values.length);
+    for (int i = 0; i < values.length; i++) {
+      Array.set(result, i, values[i]);
+    }
+    return result;
+  }
+
+  public void set(Writable[] values) { this.values = values; }
+
+  public Writable[] get() { return values; }
+
+  public void readFields(DataInput in) throws IOException {
+    values = new Writable[in.readInt()];          // construct values
+    for (int i = 0; i < values.length; i++) {
+      Writable value = WritableFactories.newInstance(valueClass);
+      value.readFields(in);                       // read a value
+      values[i] = value;                          // store it in values
+    }
+  }
+
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(values.length);                 // write values
+    for (int i = 0; i < values.length; i++) {
+      values[i].write(out);
+    }
+  }
+
+}
+
diff --git a/src/java/org/apache/hadoop/io/BinaryComparable.java b/src/java/org/apache/hadoop/io/BinaryComparable.java
new file mode 100644
index 00000000000..0fb0882e4f7
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/BinaryComparable.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+/**
+ * Interface supported by {@link org.apache.hadoop.io.WritableComparable}
+ * types supporting ordering/permutation by a representative set of bytes.
+ */
+public abstract class BinaryComparable implements Comparable<BinaryComparable> {
+
+  /**
+   * Return n st bytes 0..n-1 from {#getBytes()} are valid.
+   */
+  public abstract int getLength();
+
+  /**
+   * Return representative byte array for this instance.
+   */
+  public abstract byte[] getBytes();
+
+  /**
+   * Compare bytes from {#getBytes()}.
+   * @see org.apache.hadoop.io.WritableComparator#compareBytes(byte[],int,int,byte[],int,int)
+   */
+  public int compareTo(BinaryComparable other) {
+    if (this == other)
+      return 0;
+    return WritableComparator.compareBytes(getBytes(), 0, getLength(),
+             other.getBytes(), 0, other.getLength());
+  }
+
+  /**
+   * Compare bytes from {#getBytes()} to those provided.
+   */
+  public int compareTo(byte[] other, int off, int len) {
+    return WritableComparator.compareBytes(getBytes(), 0, getLength(),
+             other, off, len);
+  }
+
+  /**
+   * Return true if bytes from {#getBytes()} match.
+   */
+  public boolean equals(Object other) {
+    if (!(other instanceof BinaryComparable))
+      return false;
+    BinaryComparable that = (BinaryComparable)other;
+    if (this.getLength() != that.getLength())
+      return false;
+    return this.compareTo(that) == 0;
+  }
+
+  /**
+   * Return a hash of the bytes returned from {#getBytes()}.
+   * @see org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int)
+   */
+  public int hashCode() {
+    return WritableComparator.hashBytes(getBytes(), getLength());
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/BloomMapFile.java b/src/java/org/apache/hadoop/io/BloomMapFile.java
new file mode 100644
index 00000000000..aa616a4565d
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/BloomMapFile.java
@@ -0,0 +1,259 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.bloom.DynamicBloomFilter;
+import org.apache.hadoop.util.bloom.Filter;
+import org.apache.hadoop.util.bloom.Key;
+import org.apache.hadoop.util.hash.Hash;
+
+/**
+ * This class extends {@link MapFile} and provides very much the same
+ * functionality. However, it uses dynamic Bloom filters to provide
+ * quick membership test for keys, and it offers a fast version of 
+ * {@link Reader#get(WritableComparable, Writable)} operation, especially in
+ * case of sparsely populated MapFile-s.
+ */
+public class BloomMapFile {
+  private static final Log LOG = LogFactory.getLog(BloomMapFile.class);
+  public static final String BLOOM_FILE_NAME = "bloom";
+  public static final int HASH_COUNT = 5;
+  
+  public static void delete(FileSystem fs, String name) throws IOException {
+    Path dir = new Path(name);
+    Path data = new Path(dir, MapFile.DATA_FILE_NAME);
+    Path index = new Path(dir, MapFile.INDEX_FILE_NAME);
+    Path bloom = new Path(dir, BLOOM_FILE_NAME);
+
+    fs.delete(data, true);
+    fs.delete(index, true);
+    fs.delete(bloom, true);
+    fs.delete(dir, true);
+  }
+  
+  public static class Writer extends MapFile.Writer {
+    private DynamicBloomFilter bloomFilter;
+    private int numKeys;
+    private int vectorSize;
+    private Key bloomKey = new Key();
+    private DataOutputBuffer buf = new DataOutputBuffer();
+    private FileSystem fs;
+    private Path dir;
+    
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+        Class<? extends WritableComparable> keyClass,
+        Class<? extends Writable> valClass, CompressionType compress,
+        CompressionCodec codec, Progressable progress) throws IOException {
+      super(conf, fs, dirName, keyClass, valClass, compress, codec, progress);
+      this.fs = fs;
+      this.dir = new Path(dirName);
+      initBloomFilter(conf);
+    }
+
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+        Class<? extends WritableComparable> keyClass,
+        Class valClass, CompressionType compress,
+        Progressable progress) throws IOException {
+      super(conf, fs, dirName, keyClass, valClass, compress, progress);
+      this.fs = fs;
+      this.dir = new Path(dirName);
+      initBloomFilter(conf);
+    }
+
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+        Class<? extends WritableComparable> keyClass,
+        Class valClass, CompressionType compress)
+        throws IOException {
+      super(conf, fs, dirName, keyClass, valClass, compress);
+      this.fs = fs;
+      this.dir = new Path(dirName);
+      initBloomFilter(conf);
+    }
+
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+        WritableComparator comparator, Class valClass,
+        CompressionType compress, CompressionCodec codec, Progressable progress)
+        throws IOException {
+      super(conf, fs, dirName, comparator, valClass, compress, codec, progress);
+      this.fs = fs;
+      this.dir = new Path(dirName);
+      initBloomFilter(conf);
+    }
+
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+        WritableComparator comparator, Class valClass,
+        CompressionType compress, Progressable progress) throws IOException {
+      super(conf, fs, dirName, comparator, valClass, compress, progress);
+      this.fs = fs;
+      this.dir = new Path(dirName);
+      initBloomFilter(conf);
+    }
+
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+        WritableComparator comparator, Class valClass, CompressionType compress)
+        throws IOException {
+      super(conf, fs, dirName, comparator, valClass, compress);
+      this.fs = fs;
+      this.dir = new Path(dirName);
+      initBloomFilter(conf);
+    }
+
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+        WritableComparator comparator, Class valClass) throws IOException {
+      super(conf, fs, dirName, comparator, valClass);
+      this.fs = fs;
+      this.dir = new Path(dirName);
+      initBloomFilter(conf);
+    }
+
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+        Class<? extends WritableComparable> keyClass,
+        Class valClass) throws IOException {
+      super(conf, fs, dirName, keyClass, valClass);
+      this.fs = fs;
+      this.dir = new Path(dirName);
+      initBloomFilter(conf);
+    }
+
+    private synchronized void initBloomFilter(Configuration conf) {
+      numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
+      // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
+      // single key, where <code> is the number of hash functions,
+      // <code>n</code> is the number of keys and <code>c</code> is the desired
+      // max. error rate.
+      // Our desired error rate is by default 0.005, i.e. 0.5%
+      float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
+      vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
+          Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
+      bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
+          Hash.getHashType(conf), numKeys);
+    }
+
+    @Override
+    public synchronized void append(WritableComparable key, Writable val)
+        throws IOException {
+      super.append(key, val);
+      buf.reset();
+      key.write(buf);
+      bloomKey.set(buf.getData(), 1.0);
+      bloomFilter.add(bloomKey);
+    }
+
+    @Override
+    public synchronized void close() throws IOException {
+      super.close();
+      DataOutputStream out = fs.create(new Path(dir, BLOOM_FILE_NAME), true);
+      bloomFilter.write(out);
+      out.flush();
+      out.close();
+    }
+
+  }
+  
+  public static class Reader extends MapFile.Reader {
+    private DynamicBloomFilter bloomFilter;
+    private DataOutputBuffer buf = new DataOutputBuffer();
+    private Key bloomKey = new Key();
+
+    public Reader(FileSystem fs, String dirName, Configuration conf)
+        throws IOException {
+      super(fs, dirName, conf);
+      initBloomFilter(fs, dirName, conf);
+    }
+
+    public Reader(FileSystem fs, String dirName, WritableComparator comparator,
+        Configuration conf, boolean open) throws IOException {
+      super(fs, dirName, comparator, conf, open);
+      initBloomFilter(fs, dirName, conf);
+    }
+
+    public Reader(FileSystem fs, String dirName, WritableComparator comparator,
+        Configuration conf) throws IOException {
+      super(fs, dirName, comparator, conf);
+      initBloomFilter(fs, dirName, conf);
+    }
+    
+    private void initBloomFilter(FileSystem fs, String dirName,
+        Configuration conf) {
+      try {
+        DataInputStream in = fs.open(new Path(dirName, BLOOM_FILE_NAME));
+        bloomFilter = new DynamicBloomFilter();
+        bloomFilter.readFields(in);
+        in.close();
+      } catch (IOException ioe) {
+        LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile.");
+        bloomFilter = null;
+      }
+    }
+    
+    /**
+     * Checks if this MapFile has the indicated key. The membership test is
+     * performed using a Bloom filter, so the result has always non-zero
+     * probability of false positives.
+     * @param key key to check
+     * @return  false iff key doesn't exist, true if key probably exists.
+     * @throws IOException
+     */
+    public boolean probablyHasKey(WritableComparable key) throws IOException {
+      if (bloomFilter == null) {
+        return true;
+      }
+      buf.reset();
+      key.write(buf);
+      bloomKey.set(buf.getData(), 1.0);
+      return bloomFilter.membershipTest(bloomKey);
+    }
+    
+    /**
+     * Fast version of the
+     * {@link MapFile.Reader#get(WritableComparable, Writable)} method. First
+     * it checks the Bloom filter for the existence of the key, and only if
+     * present it performs the real get operation. This yields significant
+     * performance improvements for get operations on sparsely populated files.
+     */
+    @Override
+    public synchronized Writable get(WritableComparable key, Writable val)
+        throws IOException {
+      if (!probablyHasKey(key)) {
+        return null;
+      }
+      return super.get(key, val);
+    }
+    
+    /**
+     * Retrieve the Bloom filter used by this instance of the Reader.
+     * @return a Bloom filter (see {@link Filter})
+     */
+    public Filter getBloomFilter() {
+      return bloomFilter;
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/BooleanWritable.java b/src/java/org/apache/hadoop/io/BooleanWritable.java
new file mode 100644
index 00000000000..1ef1a294571
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/BooleanWritable.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** 
+ * A WritableComparable for booleans. 
+ */
+public class BooleanWritable implements WritableComparable {
+  private boolean value;
+
+  /** 
+   */
+  public BooleanWritable() {};
+
+  /** 
+   */
+  public BooleanWritable(boolean value) {
+    set(value);
+  }
+
+  /** 
+   * Set the value of the BooleanWritable
+   */    
+  public void set(boolean value) {
+    this.value = value;
+  }
+
+  /**
+   * Returns the value of the BooleanWritable
+   */
+  public boolean get() {
+    return value;
+  }
+
+  /**
+   */
+  public void readFields(DataInput in) throws IOException {
+    value = in.readBoolean();
+  }
+
+  /**
+   */
+  public void write(DataOutput out) throws IOException {
+    out.writeBoolean(value);
+  }
+
+  /**
+   */
+  public boolean equals(Object o) {
+    if (!(o instanceof BooleanWritable)) {
+      return false;
+    }
+    BooleanWritable other = (BooleanWritable) o;
+    return this.value == other.value;
+  }
+
+  public int hashCode() {
+    return value ? 0 : 1;
+  }
+
+
+
+  /**
+   */
+  public int compareTo(Object o) {
+    boolean a = this.value;
+    boolean b = ((BooleanWritable) o).value;
+    return ((a == b) ? 0 : (a == false) ? -1 : 1);
+  }
+  
+  public String toString() {
+    return Boolean.toString(get());
+  }
+
+  /** 
+   * A Comparator optimized for BooleanWritable. 
+   */ 
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(BooleanWritable.class);
+    }
+
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      return compareBytes(b1, s1, l1, b2, s2, l2);
+    }
+  }
+
+
+  static {
+    WritableComparator.define(BooleanWritable.class, new Comparator());
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/ByteWritable.java b/src/java/org/apache/hadoop/io/ByteWritable.java
new file mode 100644
index 00000000000..f9bd2e8eb60
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/ByteWritable.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** A WritableComparable for a single byte. */
+public class ByteWritable implements WritableComparable {
+  private byte value;
+
+  public ByteWritable() {}
+
+  public ByteWritable(byte value) { set(value); }
+
+  /** Set the value of this ByteWritable. */
+  public void set(byte value) { this.value = value; }
+
+  /** Return the value of this ByteWritable. */
+  public byte get() { return value; }
+
+  public void readFields(DataInput in) throws IOException {
+    value = in.readByte();
+  }
+
+  public void write(DataOutput out) throws IOException {
+    out.writeByte(value);
+  }
+
+  /** Returns true iff <code>o</code> is a ByteWritable with the same value. */
+  public boolean equals(Object o) {
+    if (!(o instanceof ByteWritable)) {
+      return false;
+    }
+    ByteWritable other = (ByteWritable)o;
+    return this.value == other.value;
+  }
+
+  public int hashCode() {
+    return (int)value;
+  }
+
+  /** Compares two ByteWritables. */
+  public int compareTo(Object o) {
+    int thisValue = this.value;
+    int thatValue = ((ByteWritable)o).value;
+    return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
+  }
+
+  public String toString() {
+    return Byte.toString(value);
+  }
+
+  /** A Comparator optimized for ByteWritable. */ 
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(ByteWritable.class);
+    }
+
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      byte thisValue = b1[s1];
+      byte thatValue = b2[s2];
+      return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
+    }
+  }
+
+  static {                                        // register this comparator
+    WritableComparator.define(ByteWritable.class, new Comparator());
+  }
+}
+
diff --git a/src/java/org/apache/hadoop/io/BytesWritable.java b/src/java/org/apache/hadoop/io/BytesWritable.java
new file mode 100644
index 00000000000..9f6bbe0e46d
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/BytesWritable.java
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.IOException;
+import java.io.DataInput;
+import java.io.DataOutput;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/** 
+ * A byte sequence that is usable as a key or value.
+ * It is resizable and distinguishes between the size of the seqeunce and
+ * the current capacity. The hash function is the front of the md5 of the 
+ * buffer. The sort order is the same as memcmp.
+ */
+public class BytesWritable extends BinaryComparable
+    implements WritableComparable<BinaryComparable> {
+  private static final Log LOG = LogFactory.getLog(BytesWritable.class);
+  private static final int LENGTH_BYTES = 4;
+  private static final byte[] EMPTY_BYTES = {};
+
+  private int size;
+  private byte[] bytes;
+  
+  /**
+   * Create a zero-size sequence.
+   */
+  public BytesWritable() {this(EMPTY_BYTES);}
+  
+  /**
+   * Create a BytesWritable using the byte array as the initial value.
+   * @param bytes This array becomes the backing storage for the object.
+   */
+  public BytesWritable(byte[] bytes) {
+    this.bytes = bytes;
+    this.size = bytes.length;
+  }
+  
+  /**
+   * Get the data from the BytesWritable.
+   * @return The data is only valid between 0 and getLength() - 1.
+   */
+  public byte[] getBytes() {
+    return bytes;
+  }
+
+  /**
+   * Get the data from the BytesWritable.
+   * @deprecated Use {@link #getBytes()} instead.
+   */
+  @Deprecated
+  public byte[] get() {
+    return getBytes();
+  }
+
+  /**
+   * Get the current size of the buffer.
+   */
+  public int getLength() {
+    return size;
+  }
+
+  /**
+   * Get the current size of the buffer.
+   * @deprecated Use {@link #getLength()} instead.
+   */
+  @Deprecated
+  public int getSize() {
+    return getLength();
+  }
+  
+  /**
+   * Change the size of the buffer. The values in the old range are preserved
+   * and any new values are undefined. The capacity is changed if it is 
+   * necessary.
+   * @param size The new number of bytes
+   */
+  public void setSize(int size) {
+    if (size > getCapacity()) {
+      setCapacity(size * 3 / 2);
+    }
+    this.size = size;
+  }
+  
+  /**
+   * Get the capacity, which is the maximum size that could handled without
+   * resizing the backing storage.
+   * @return The number of bytes
+   */
+  public int getCapacity() {
+    return bytes.length;
+  }
+  
+  /**
+   * Change the capacity of the backing storage.
+   * The data is preserved.
+   * @param new_cap The new capacity in bytes.
+   */
+  public void setCapacity(int new_cap) {
+    if (new_cap != getCapacity()) {
+      byte[] new_data = new byte[new_cap];
+      if (new_cap < size) {
+        size = new_cap;
+      }
+      if (size != 0) {
+        System.arraycopy(bytes, 0, new_data, 0, size);
+      }
+      bytes = new_data;
+    }
+  }
+
+  /**
+   * Set the BytesWritable to the contents of the given newData.
+   * @param newData the value to set this BytesWritable to.
+   */
+  public void set(BytesWritable newData) {
+    set(newData.bytes, 0, newData.size);
+  }
+
+  /**
+   * Set the value to a copy of the given byte range
+   * @param newData the new values to copy in
+   * @param offset the offset in newData to start at
+   * @param length the number of bytes to copy
+   */
+  public void set(byte[] newData, int offset, int length) {
+    setSize(0);
+    setSize(length);
+    System.arraycopy(newData, offset, bytes, 0, size);
+  }
+
+  // inherit javadoc
+  public void readFields(DataInput in) throws IOException {
+    setSize(0); // clear the old data
+    setSize(in.readInt());
+    in.readFully(bytes, 0, size);
+  }
+  
+  // inherit javadoc
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(size);
+    out.write(bytes, 0, size);
+  }
+  
+  public int hashCode() {
+    return super.hashCode();
+  }
+
+  /**
+   * Are the two byte sequences equal?
+   */
+  public boolean equals(Object right_obj) {
+    if (right_obj instanceof BytesWritable)
+      return super.equals(right_obj);
+    return false;
+  }
+
+  /**
+   * Generate the stream of bytes as hex pairs separated by ' '.
+   */
+  public String toString() { 
+    StringBuffer sb = new StringBuffer(3*size);
+    for (int idx = 0; idx < size; idx++) {
+      // if not the first, put a blank separator in
+      if (idx != 0) {
+        sb.append(' ');
+      }
+      String num = Integer.toHexString(0xff & bytes[idx]);
+      // if it is only one digit, add a leading 0.
+      if (num.length() < 2) {
+        sb.append('0');
+      }
+      sb.append(num);
+    }
+    return sb.toString();
+  }
+
+  /** A Comparator optimized for BytesWritable. */ 
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(BytesWritable.class);
+    }
+    
+    /**
+     * Compare the buffers in serialized form.
+     */
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES, 
+                          b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES);
+    }
+  }
+  
+  static {                                        // register this comparator
+    WritableComparator.define(BytesWritable.class, new Comparator());
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/io/Closeable.java b/src/java/org/apache/hadoop/io/Closeable.java
new file mode 100644
index 00000000000..a0cf8a69441
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/Closeable.java
@@ -0,0 +1,24 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+/** @deprecated use java.io.Closeable */
+@Deprecated
+public interface Closeable extends java.io.Closeable{
+}
diff --git a/src/java/org/apache/hadoop/io/CompressedWritable.java b/src/java/org/apache/hadoop/io/CompressedWritable.java
new file mode 100644
index 00000000000..17aca07c4d8
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/CompressedWritable.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.IOException;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.DataInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.ByteArrayInputStream;
+import java.util.zip.Deflater;
+import java.util.zip.DeflaterOutputStream;
+import java.util.zip.InflaterInputStream;
+
+/** A base-class for Writables which store themselves compressed and lazily
+ * inflate on field access.  This is useful for large objects whose fields are
+ * not be altered during a map or reduce operation: leaving the field data
+ * compressed makes copying the instance from one file to another much
+ * faster. */
+public abstract class CompressedWritable implements Writable {
+  // if non-null, the compressed field data of this instance.
+  private byte[] compressed;
+
+  public CompressedWritable() {}
+
+  public final void readFields(DataInput in) throws IOException {
+    compressed = new byte[in.readInt()];
+    in.readFully(compressed, 0, compressed.length);
+  }
+
+  /** Must be called by all methods which access fields to ensure that the data
+   * has been uncompressed. */
+  protected void ensureInflated() {
+    if (compressed != null) {
+      try {
+        ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
+        DataInput inflater =
+          new DataInputStream(new InflaterInputStream(deflated));
+        readFieldsCompressed(inflater);
+        compressed = null;
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+
+  /** Subclasses implement this instead of {@link #readFields(DataInput)}. */
+  protected abstract void readFieldsCompressed(DataInput in)
+    throws IOException;
+
+  public final void write(DataOutput out) throws IOException {
+    if (compressed == null) {
+      ByteArrayOutputStream deflated = new ByteArrayOutputStream();
+      Deflater deflater = new Deflater(Deflater.BEST_SPEED);
+      DataOutputStream dout =
+        new DataOutputStream(new DeflaterOutputStream(deflated, deflater));
+      writeCompressed(dout);
+      dout.close();
+      deflater.end();
+      compressed = deflated.toByteArray();
+    }
+    out.writeInt(compressed.length);
+    out.write(compressed);
+  }
+
+  /** Subclasses implement this instead of {@link #write(DataOutput)}. */
+  protected abstract void writeCompressed(DataOutput out) throws IOException;
+
+}
diff --git a/src/java/org/apache/hadoop/io/DataInputBuffer.java b/src/java/org/apache/hadoop/io/DataInputBuffer.java
new file mode 100644
index 00000000000..71b98f81a39
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/DataInputBuffer.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** A reusable {@link DataInput} implementation that reads from an in-memory
+ * buffer.
+ *
+ * <p>This saves memory over creating a new DataInputStream and
+ * ByteArrayInputStream each time data is read.
+ *
+ * <p>Typical usage is something like the following:<pre>
+ *
+ * DataInputBuffer buffer = new DataInputBuffer();
+ * while (... loop condition ...) {
+ *   byte[] data = ... get data ...;
+ *   int dataLength = ... get data length ...;
+ *   buffer.reset(data, dataLength);
+ *   ... read buffer using DataInput methods ...
+ * }
+ * </pre>
+ *  
+ */
+public class DataInputBuffer extends DataInputStream {
+  private static class Buffer extends ByteArrayInputStream {
+    public Buffer() {
+      super(new byte[] {});
+    }
+
+    public void reset(byte[] input, int start, int length) {
+      this.buf = input;
+      this.count = start+length;
+      this.mark = start;
+      this.pos = start;
+    }
+
+    public byte[] getData() { return buf; }
+    public int getPosition() { return pos; }
+    public int getLength() { return count; }
+  }
+
+  private Buffer buffer;
+  
+  /** Constructs a new empty buffer. */
+  public DataInputBuffer() {
+    this(new Buffer());
+  }
+
+  private DataInputBuffer(Buffer buffer) {
+    super(buffer);
+    this.buffer = buffer;
+  }
+
+  /** Resets the data that the buffer reads. */
+  public void reset(byte[] input, int length) {
+    buffer.reset(input, 0, length);
+  }
+
+  /** Resets the data that the buffer reads. */
+  public void reset(byte[] input, int start, int length) {
+    buffer.reset(input, start, length);
+  }
+  
+  public byte[] getData() {
+    return buffer.getData();
+  }
+
+  /** Returns the current position in the input. */
+  public int getPosition() { return buffer.getPosition(); }
+
+  /** Returns the length of the input. */
+  public int getLength() { return buffer.getLength(); }
+
+}
diff --git a/src/java/org/apache/hadoop/io/DataOutputBuffer.java b/src/java/org/apache/hadoop/io/DataOutputBuffer.java
new file mode 100644
index 00000000000..a7ad89839ea
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/DataOutputBuffer.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** A reusable {@link DataOutput} implementation that writes to an in-memory
+ * buffer.
+ *
+ * <p>This saves memory over creating a new DataOutputStream and
+ * ByteArrayOutputStream each time data is written.
+ *
+ * <p>Typical usage is something like the following:<pre>
+ *
+ * DataOutputBuffer buffer = new DataOutputBuffer();
+ * while (... loop condition ...) {
+ *   buffer.reset();
+ *   ... write buffer using DataOutput methods ...
+ *   byte[] data = buffer.getData();
+ *   int dataLength = buffer.getLength();
+ *   ... write data to its ultimate destination ...
+ * }
+ * </pre>
+ *  
+ */
+public class DataOutputBuffer extends DataOutputStream {
+
+  private static class Buffer extends ByteArrayOutputStream {
+    public byte[] getData() { return buf; }
+    public int getLength() { return count; }
+
+    public Buffer() {
+      super();
+    }
+    
+    public Buffer(int size) {
+      super(size);
+    }
+    
+    public void write(DataInput in, int len) throws IOException {
+      int newcount = count + len;
+      if (newcount > buf.length) {
+        byte newbuf[] = new byte[Math.max(buf.length << 1, newcount)];
+        System.arraycopy(buf, 0, newbuf, 0, count);
+        buf = newbuf;
+      }
+      in.readFully(buf, count, len);
+      count = newcount;
+    }
+  }
+
+  private Buffer buffer;
+  
+  /** Constructs a new empty buffer. */
+  public DataOutputBuffer() {
+    this(new Buffer());
+  }
+  
+  public DataOutputBuffer(int size) {
+    this(new Buffer(size));
+  }
+  
+  private DataOutputBuffer(Buffer buffer) {
+    super(buffer);
+    this.buffer = buffer;
+  }
+
+  /** Returns the current contents of the buffer.
+   *  Data is only valid to {@link #getLength()}.
+   */
+  public byte[] getData() { return buffer.getData(); }
+
+  /** Returns the length of the valid data currently in the buffer. */
+  public int getLength() { return buffer.getLength(); }
+
+  /** Resets the buffer to empty. */
+  public DataOutputBuffer reset() {
+    this.written = 0;
+    buffer.reset();
+    return this;
+  }
+
+  /** Writes bytes from a DataInput directly into the buffer. */
+  public void write(DataInput in, int length) throws IOException {
+    buffer.write(in, length);
+  }
+
+  /** Write to a file stream */
+  public void writeTo(OutputStream out) throws IOException {
+    buffer.writeTo(out);
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/DefaultStringifier.java b/src/java/org/apache/hadoop/io/DefaultStringifier.java
new file mode 100644
index 00000000000..124a550942d
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/DefaultStringifier.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.IOException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.ArrayList;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.Serialization;
+import org.apache.hadoop.io.serializer.SerializationFactory;
+import org.apache.hadoop.io.serializer.Serializer;
+import org.apache.hadoop.util.GenericsUtil;
+
+/**
+ * DefaultStringifier is the default implementation of the {@link Stringifier}
+ * interface which stringifies the objects using base64 encoding of the
+ * serialized version of the objects. The {@link Serializer} and
+ * {@link Deserializer} are obtained from the {@link SerializationFactory}.
+ * <br>
+ * DefaultStringifier offers convenience methods to store/load objects to/from
+ * the configuration.
+ * 
+ * @param <T> the class of the objects to stringify
+ */
+public class DefaultStringifier<T> implements Stringifier<T> {
+
+  private static final String SEPARATOR = ",";
+
+  private Serializer<T> serializer;
+
+  private Deserializer<T> deserializer;
+
+  private DataInputBuffer inBuf;
+
+  private DataOutputBuffer outBuf;
+
+  public DefaultStringifier(Configuration conf, Class<T> c) {
+
+    SerializationFactory factory = new SerializationFactory(conf);
+    this.serializer = factory.getSerializer(c);
+    this.deserializer = factory.getDeserializer(c);
+    this.inBuf = new DataInputBuffer();
+    this.outBuf = new DataOutputBuffer();
+    try {
+      serializer.open(outBuf);
+      deserializer.open(inBuf);
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  public T fromString(String str) throws IOException {
+    try {
+      byte[] bytes = Base64.decodeBase64(str.getBytes("UTF-8"));
+      inBuf.reset(bytes, bytes.length);
+      T restored = deserializer.deserialize(null);
+      return restored;
+    } catch (UnsupportedCharsetException ex) {
+      throw new IOException(ex.toString());
+    }
+  }
+
+  public String toString(T obj) throws IOException {
+    outBuf.reset();
+    serializer.serialize(obj);
+    byte[] buf = new byte[outBuf.getLength()];
+    System.arraycopy(outBuf.getData(), 0, buf, 0, buf.length);
+    return new String(Base64.encodeBase64(buf));
+  }
+
+  public void close() throws IOException {
+    inBuf.close();
+    outBuf.close();
+    deserializer.close();
+    serializer.close();
+  }
+
+  /**
+   * Stores the item in the configuration with the given keyName.
+   * 
+   * @param <K>  the class of the item
+   * @param conf the configuration to store
+   * @param item the object to be stored
+   * @param keyName the name of the key to use
+   * @throws IOException : forwards Exceptions from the underlying 
+   * {@link Serialization} classes. 
+   */
+  public static <K> void store(Configuration conf, K item, String keyName)
+  throws IOException {
+
+    DefaultStringifier<K> stringifier = new DefaultStringifier<K>(conf,
+        GenericsUtil.getClass(item));
+    conf.set(keyName, stringifier.toString(item));
+    stringifier.close();
+  }
+
+  /**
+   * Restores the object from the configuration.
+   * 
+   * @param <K> the class of the item
+   * @param conf the configuration to use
+   * @param keyName the name of the key to use
+   * @param itemClass the class of the item
+   * @return restored object
+   * @throws IOException : forwards Exceptions from the underlying 
+   * {@link Serialization} classes.
+   */
+  public static <K> K load(Configuration conf, String keyName,
+      Class<K> itemClass) throws IOException {
+    DefaultStringifier<K> stringifier = new DefaultStringifier<K>(conf,
+        itemClass);
+    try {
+      String itemStr = conf.get(keyName);
+      return stringifier.fromString(itemStr);
+    } finally {
+      stringifier.close();
+    }
+  }
+
+  /**
+   * Stores the array of items in the configuration with the given keyName.
+   * 
+   * @param <K> the class of the item
+   * @param conf the configuration to use 
+   * @param items the objects to be stored
+   * @param keyName the name of the key to use
+   * @throws IndexOutOfBoundsException if the items array is empty
+   * @throws IOException : forwards Exceptions from the underlying 
+   * {@link Serialization} classes.         
+   */
+  public static <K> void storeArray(Configuration conf, K[] items,
+      String keyName) throws IOException {
+
+    DefaultStringifier<K> stringifier = new DefaultStringifier<K>(conf, 
+        GenericsUtil.getClass(items[0]));
+    try {
+      StringBuilder builder = new StringBuilder();
+      for (K item : items) {
+        builder.append(stringifier.toString(item)).append(SEPARATOR);
+      }
+      conf.set(keyName, builder.toString());
+    }
+    finally {
+      stringifier.close();
+    }
+  }
+
+  /**
+   * Restores the array of objects from the configuration.
+   * 
+   * @param <K> the class of the item
+   * @param conf the configuration to use
+   * @param keyName the name of the key to use
+   * @param itemClass the class of the item
+   * @return restored object
+   * @throws IOException : forwards Exceptions from the underlying 
+   * {@link Serialization} classes.
+   */
+  public static <K> K[] loadArray(Configuration conf, String keyName,
+      Class<K> itemClass) throws IOException {
+    DefaultStringifier<K> stringifier = new DefaultStringifier<K>(conf,
+        itemClass);
+    try {
+      String itemStr = conf.get(keyName);
+      ArrayList<K> list = new ArrayList<K>();
+      String[] parts = itemStr.split(SEPARATOR);
+
+      for (String part : parts) {
+        if (!part.equals(""))
+          list.add(stringifier.fromString(part));
+      }
+
+      return GenericsUtil.toArray(itemClass, list);
+    }
+    finally {
+      stringifier.close();
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/DeprecatedUTF8.java b/src/java/org/apache/hadoop/io/DeprecatedUTF8.java
new file mode 100644
index 00000000000..b27973c180e
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/DeprecatedUTF8.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * Wrapper for {@link UTF8}.
+ * This class should be used only when it is absolutely necessary
+ * to use {@link UTF8}. The only difference is that using this class
+ * does not require "@SuppressWarning" annotation to avoid javac warning. 
+ * Instead the deprecation is implied in the class name.
+ */
+@SuppressWarnings("deprecation")
+public class DeprecatedUTF8 extends UTF8 {
+  
+  public DeprecatedUTF8() {
+    super();
+  }
+
+  /** Construct from a given string. */
+  public DeprecatedUTF8(String string) {
+    super(string);
+  }
+
+  /** Construct from a given string. */
+  public DeprecatedUTF8(DeprecatedUTF8 utf8) {
+    super(utf8);
+  }
+  
+  /* The following two are the mostly commonly used methods.
+   * wrapping them so that editors do not complain about the deprecation.
+   */
+  
+  public static String readString(DataInput in) throws IOException {
+    return UTF8.readString(in);
+  }
+  
+  public static int writeString(DataOutput out, String s) throws IOException {
+    return UTF8.writeString(out, s);
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/DoubleWritable.java b/src/java/org/apache/hadoop/io/DoubleWritable.java
new file mode 100644
index 00000000000..fa6f3843bf3
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/DoubleWritable.java
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * Writable for Double values.
+ */
+public class DoubleWritable implements WritableComparable {
+
+  private double value = 0.0;
+  
+  public DoubleWritable() {
+    
+  }
+  
+  public DoubleWritable(double value) {
+    set(value);
+  }
+  
+  public void readFields(DataInput in) throws IOException {
+    value = in.readDouble();
+  }
+
+  public void write(DataOutput out) throws IOException {
+    out.writeDouble(value);
+  }
+  
+  public void set(double value) { this.value = value; }
+  
+  public double get() { return value; }
+
+  /**
+   * Returns true iff <code>o</code> is a DoubleWritable with the same value.
+   */
+  public boolean equals(Object o) {
+    if (!(o instanceof DoubleWritable)) {
+      return false;
+    }
+    DoubleWritable other = (DoubleWritable)o;
+    return this.value == other.value;
+  }
+  
+  public int hashCode() {
+    return (int)Double.doubleToLongBits(value);
+  }
+  
+  public int compareTo(Object o) {
+    DoubleWritable other = (DoubleWritable)o;
+    return (value < other.value ? -1 : (value == other.value ? 0 : 1));
+  }
+  
+  public String toString() {
+    return Double.toString(value);
+  }
+
+  /** A Comparator optimized for DoubleWritable. */ 
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(DoubleWritable.class);
+    }
+
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      double thisValue = readDouble(b1, s1);
+      double thatValue = readDouble(b2, s2);
+      return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
+    }
+  }
+
+  static {                                        // register this comparator
+    WritableComparator.define(DoubleWritable.class, new Comparator());
+  }
+
+}
+
diff --git a/src/java/org/apache/hadoop/io/EnumSetWritable.java b/src/java/org/apache/hadoop/io/EnumSetWritable.java
new file mode 100644
index 00000000000..7549dca2b6e
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/EnumSetWritable.java
@@ -0,0 +1,202 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.EnumSet;
+import java.util.Iterator;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+
+/** A Writable wrapper for EnumSet. */
+public class EnumSetWritable<E extends Enum<E>> implements Writable,
+    Configurable {
+
+  private EnumSet<E> value;
+
+  private Class<E> elementType;
+
+  private Configuration conf;
+
+  EnumSetWritable() {
+  }
+
+  /**
+   * Construct a new EnumSetWritable. If the <tt>value</tt> argument is null or
+   * its size is zero, the <tt>elementType</tt> argument must not be null. If
+   * the argument <tt>value</tt>'s size is bigger than zero, the argument
+   * <tt>elementType</tt> is not be used.
+   * 
+   * @param value
+   * @param elementType
+   */
+  public EnumSetWritable(EnumSet<E> value, Class<E> elementType) {
+    set(value, elementType);
+  }
+
+  /**
+   * Construct a new EnumSetWritable. Argument <tt>value</tt> should not be null
+   * or empty.
+   * 
+   * @param value
+   */
+  public EnumSetWritable(EnumSet<E> value) {
+    this(value, null);
+  }
+
+  /**
+   * reset the EnumSetWritable with specified
+   * <tt>value</value> and <tt>elementType</tt>. If the <tt>value</tt> argument
+   * is null or its size is zero, the <tt>elementType</tt> argument must not be
+   * null. If the argument <tt>value</tt>'s size is bigger than zero, the
+   * argument <tt>elementType</tt> is not be used.
+   * 
+   * @param value
+   * @param elementType
+   */
+  public void set(EnumSet<E> value, Class<E> elementType) {
+    if ((value == null || value.size() == 0)
+        && (this.elementType == null && elementType == null)) {
+      throw new IllegalArgumentException(
+          "The EnumSet argument is null, or is an empty set but with no elementType provided.");
+    }
+    this.value = value;
+    if (value != null && value.size() > 0) {
+      Iterator<E> iterator = value.iterator();
+      this.elementType = iterator.next().getDeclaringClass();
+    } else if (elementType != null) {
+      this.elementType = elementType;
+    }
+  }
+
+  /** Return the value of this EnumSetWritable. */
+  public EnumSet<E> get() {
+    return value;
+  }
+
+  /** {@inheritDoc} */
+  @SuppressWarnings("unchecked")
+  public void readFields(DataInput in) throws IOException {
+    int length = in.readInt();
+    if (length == -1)
+      this.value = null;
+    else if (length == 0) {
+      this.elementType = (Class<E>) ObjectWritable.loadClass(conf,
+          WritableUtils.readString(in));
+      this.value = EnumSet.noneOf(this.elementType);
+    } else {
+      E first = (E) ObjectWritable.readObject(in, conf);
+      this.value = (EnumSet<E>) EnumSet.of(first);
+      for (int i = 1; i < length; i++)
+        this.value.add((E) ObjectWritable.readObject(in, conf));
+    }
+  }
+
+  /** {@inheritDoc} */
+  public void write(DataOutput out) throws IOException {
+    if (this.value == null) {
+      out.writeInt(-1);
+      WritableUtils.writeString(out, this.elementType.getName());
+    } else {
+      Object[] array = this.value.toArray();
+      int length = array.length;
+      out.writeInt(length);
+      if (length == 0) {
+        if (this.elementType == null)
+          throw new UnsupportedOperationException(
+              "Unable to serialize empty EnumSet with no element type provided.");
+        WritableUtils.writeString(out, this.elementType.getName());
+      }
+      for (int i = 0; i < length; i++) {
+        ObjectWritable.writeObject(out, array[i], array[i].getClass(), conf);
+      }
+    }
+  }
+
+  /**
+   * Returns true if <code>o</code> is an EnumSetWritable with the same value,
+   * or both are null.
+   */
+  public boolean equals(Object o) {
+    if (o == null) {
+      throw new IllegalArgumentException("null argument passed in equal().");
+    }
+
+    if (!(o instanceof EnumSetWritable))
+      return false;
+
+    EnumSetWritable<?> other = (EnumSetWritable<?>) o;
+
+    if (this == o || (this.value == other.value))
+      return true;
+    if (this.value == null) // other.value must not be null if we reach here
+      return false;
+
+    return this.value.equals(other.value);
+  }
+
+  /**
+   * Returns the class of all the elements of the underlying EnumSetWriable. It
+   * may return null.
+   * 
+   * @return the element class
+   */
+  public Class<E> getElementType() {
+    return elementType;
+  }
+
+  /** {@inheritDoc} */
+  public int hashCode() {
+    if (value == null)
+      return 0;
+    return (int) value.hashCode();
+  }
+
+  /** {@inheritDoc} */
+  public String toString() {
+    if (value == null)
+      return "(null)";
+    return value.toString();
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  static {
+    WritableFactories.setFactory(EnumSetWritable.class, new WritableFactory() {
+      @SuppressWarnings("unchecked")
+      @Override
+      public Writable newInstance() {
+        return new EnumSetWritable();
+      }
+    });
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/FloatWritable.java b/src/java/org/apache/hadoop/io/FloatWritable.java
new file mode 100644
index 00000000000..484423f0b45
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/FloatWritable.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** A WritableComparable for floats. */
+public class FloatWritable implements WritableComparable {
+  private float value;
+
+  public FloatWritable() {}
+
+  public FloatWritable(float value) { set(value); }
+
+  /** Set the value of this FloatWritable. */
+  public void set(float value) { this.value = value; }
+
+  /** Return the value of this FloatWritable. */
+  public float get() { return value; }
+
+  public void readFields(DataInput in) throws IOException {
+    value = in.readFloat();
+  }
+
+  public void write(DataOutput out) throws IOException {
+    out.writeFloat(value);
+  }
+
+  /** Returns true iff <code>o</code> is a FloatWritable with the same value. */
+  public boolean equals(Object o) {
+    if (!(o instanceof FloatWritable))
+      return false;
+    FloatWritable other = (FloatWritable)o;
+    return this.value == other.value;
+  }
+
+  public int hashCode() {
+    return Float.floatToIntBits(value);
+  }
+
+  /** Compares two FloatWritables. */
+  public int compareTo(Object o) {
+    float thisValue = this.value;
+    float thatValue = ((FloatWritable)o).value;
+    return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+  }
+
+  public String toString() {
+    return Float.toString(value);
+  }
+
+  /** A Comparator optimized for FloatWritable. */ 
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(FloatWritable.class);
+    }
+
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      float thisValue = readFloat(b1, s1);
+      float thatValue = readFloat(b2, s2);
+      return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+    }
+  }
+
+  static {                                        // register this comparator
+    WritableComparator.define(FloatWritable.class, new Comparator());
+  }
+
+}
+
diff --git a/src/java/org/apache/hadoop/io/GenericWritable.java b/src/java/org/apache/hadoop/io/GenericWritable.java
new file mode 100644
index 00000000000..41df13635a5
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/GenericWritable.java
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * A wrapper for Writable instances.
+ * <p>
+ * When two sequence files, which have same Key type but different Value
+ * types, are mapped out to reduce, multiple Value types is not allowed.
+ * In this case, this class can help you wrap instances with different types.
+ * </p>
+ * 
+ * <p>
+ * Compared with <code>ObjectWritable</code>, this class is much more effective,
+ * because <code>ObjectWritable</code> will append the class declaration as a String 
+ * into the output file in every Key-Value pair.
+ * </p>
+ * 
+ * <p>
+ * Generic Writable implements {@link Configurable} interface, so that it will be 
+ * configured by the framework. The configuration is passed to the wrapped objects
+ * implementing {@link Configurable} interface <i>before deserialization</i>. 
+ * </p>
+ * 
+ * how to use it: <br>
+ * 1. Write your own class, such as GenericObject, which extends GenericWritable.<br> 
+ * 2. Implements the abstract method <code>getTypes()</code>, defines 
+ *    the classes which will be wrapped in GenericObject in application.
+ *    Attention: this classes defined in <code>getTypes()</code> method, must
+ *    implement <code>Writable</code> interface.
+ * <br><br>
+ * 
+ * The code looks like this:
+ * <blockquote><pre>
+ * public class GenericObject extends GenericWritable {
+ * 
+ *   private static Class[] CLASSES = {
+ *               ClassType1.class, 
+ *               ClassType2.class,
+ *               ClassType3.class,
+ *               };
+ *
+ *   protected Class[] getTypes() {
+ *       return CLASSES;
+ *   }
+ *
+ * }
+ * </pre></blockquote>
+ * 
+ * @since Nov 8, 2006
+ */
+public abstract class GenericWritable implements Writable, Configurable {
+
+  private static final byte NOT_SET = -1;
+
+  private byte type = NOT_SET;
+
+  private Writable instance;
+
+  private Configuration conf = null;
+  
+  /**
+   * Set the instance that is wrapped.
+   * 
+   * @param obj
+   */
+  public void set(Writable obj) {
+    instance = obj;
+    Class<? extends Writable> instanceClazz = instance.getClass();
+    Class<? extends Writable>[] clazzes = getTypes();
+    for (int i = 0; i < clazzes.length; i++) {
+      Class<? extends Writable> clazz = clazzes[i];
+      if (clazz.equals(instanceClazz)) {
+        type = (byte) i;
+        return;
+      }
+    }
+    throw new RuntimeException("The type of instance is: "
+                               + instance.getClass() + ", which is NOT registered.");
+  }
+
+  /**
+   * Return the wrapped instance.
+   */
+  public Writable get() {
+    return instance;
+  }
+  
+  public String toString() {
+    return "GW[" + (instance != null ? ("class=" + instance.getClass().getName() +
+        ",value=" + instance.toString()) : "(null)") + "]";
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    type = in.readByte();
+    Class<? extends Writable> clazz = getTypes()[type & 0xff];
+    try {
+      instance = ReflectionUtils.newInstance(clazz, conf);
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new IOException("Cannot initialize the class: " + clazz);
+    }
+    instance.readFields(in);
+  }
+
+  public void write(DataOutput out) throws IOException {
+    if (type == NOT_SET || instance == null)
+      throw new IOException("The GenericWritable has NOT been set correctly. type="
+                            + type + ", instance=" + instance);
+    out.writeByte(type);
+    instance.write(out);
+  }
+
+  /**
+   * Return all classes that may be wrapped.  Subclasses should implement this
+   * to return a constant array of classes.
+   */
+  abstract protected Class<? extends Writable>[] getTypes();
+
+  public Configuration getConf() {
+    return conf;
+  }
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/io/IOUtils.java b/src/java/org/apache/hadoop/io/IOUtils.java
new file mode 100644
index 00000000000..44723f4c325
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/IOUtils.java
@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+import java.net.Socket;
+
+import org.apache.commons.logging.Log;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * An utility class for I/O related functionality. 
+ */
+public class IOUtils {
+
+  /**
+   * Copies from one stream to another.
+   * @param in InputStrem to read from
+   * @param out OutputStream to write to
+   * @param buffSize the size of the buffer 
+   * @param close whether or not close the InputStream and 
+   * OutputStream at the end. The streams are closed in the finally clause.  
+   */
+  public static void copyBytes(InputStream in, OutputStream out, int buffSize, boolean close) 
+    throws IOException {
+
+    PrintStream ps = out instanceof PrintStream ? (PrintStream)out : null;
+    byte buf[] = new byte[buffSize];
+    try {
+      int bytesRead = in.read(buf);
+      while (bytesRead >= 0) {
+        out.write(buf, 0, bytesRead);
+        if ((ps != null) && ps.checkError()) {
+          throw new IOException("Unable to write to output stream.");
+        }
+        bytesRead = in.read(buf);
+      }
+    } finally {
+      if(close) {
+        out.close();
+        in.close();
+      }
+    }
+  }
+  
+  /**
+   * Copies from one stream to another. <strong>closes the input and output streams 
+   * at the end</strong>.
+   * @param in InputStrem to read from
+   * @param out OutputStream to write to
+   * @param conf the Configuration object 
+   */
+  public static void copyBytes(InputStream in, OutputStream out, Configuration conf)
+    throws IOException {
+    copyBytes(in, out, conf.getInt("io.file.buffer.size", 4096), true);
+  }
+  
+  /**
+   * Copies from one stream to another.
+   * @param in InputStrem to read from
+   * @param out OutputStream to write to
+   * @param conf the Configuration object
+   * @param close whether or not close the InputStream and 
+   * OutputStream at the end. The streams are closed in the finally clause.
+   */
+  public static void copyBytes(InputStream in, OutputStream out, Configuration conf, boolean close)
+    throws IOException {
+    copyBytes(in, out, conf.getInt("io.file.buffer.size", 4096),  close);
+  }
+  
+  /** Reads len bytes in a loop.
+   * @param in The InputStream to read from
+   * @param buf The buffer to fill
+   * @param off offset from the buffer
+   * @param len the length of bytes to read
+   * @throws IOException if it could not read requested number of bytes 
+   * for any reason (including EOF)
+   */
+  public static void readFully( InputStream in, byte buf[],
+      int off, int len ) throws IOException {
+    int toRead = len;
+    while ( toRead > 0 ) {
+      int ret = in.read( buf, off, toRead );
+      if ( ret < 0 ) {
+        throw new IOException( "Premeture EOF from inputStream");
+      }
+      toRead -= ret;
+      off += ret;
+    }
+  }
+  
+  /** Similar to readFully(). Skips bytes in a loop.
+   * @param in The InputStream to skip bytes from
+   * @param len number of bytes to skip.
+   * @throws IOException if it could not skip requested number of bytes 
+   * for any reason (including EOF)
+   */
+  public static void skipFully( InputStream in, long len ) throws IOException {
+    while ( len > 0 ) {
+      long ret = in.skip( len );
+      if ( ret < 0 ) {
+        throw new IOException( "Premeture EOF from inputStream");
+      }
+      len -= ret;
+    }
+  }
+  
+  /**
+   * Close the Closeable objects and <b>ignore</b> any {@link IOException} or 
+   * null pointers. Must only be used for cleanup in exception handlers.
+   * @param log the log to record problems to at debug level. Can be null.
+   * @param closeables the objects to close
+   */
+  public static void cleanup(Log log, java.io.Closeable... closeables) {
+    for(java.io.Closeable c : closeables) {
+      if (c != null) {
+        try {
+          c.close();
+        } catch(IOException e) {
+          if (log != null && log.isDebugEnabled()) {
+            log.debug("Exception in closing " + c, e);
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Closes the stream ignoring {@link IOException}.
+   * Must only be called in cleaning up from exception handlers.
+   * @param stream the Stream to close
+   */
+  public static void closeStream( java.io.Closeable stream ) {
+    cleanup(null, stream);
+  }
+  
+  /**
+   * Closes the socket ignoring {@link IOException} 
+   * @param sock the Socket to close
+   */
+  public static void closeSocket( Socket sock ) {
+    // avoids try { close() } dance
+    if ( sock != null ) {
+      try {
+       sock.close();
+      } catch ( IOException ignored ) {
+      }
+    }
+  }
+  
+  /** /dev/null of OutputStreams.
+   */
+  public static class NullOutputStream extends OutputStream {
+    public void write(byte[] b, int off, int len) throws IOException {
+    }
+
+    public void write(int b) throws IOException {
+    }
+  }  
+}
diff --git a/src/java/org/apache/hadoop/io/InputBuffer.java b/src/java/org/apache/hadoop/io/InputBuffer.java
new file mode 100644
index 00000000000..272a707738b
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/InputBuffer.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+
+/** A reusable {@link InputStream} implementation that reads from an in-memory
+ * buffer.
+ *
+ * <p>This saves memory over creating a new InputStream and
+ * ByteArrayInputStream each time data is read.
+ *
+ * <p>Typical usage is something like the following:<pre>
+ *
+ * InputBuffer buffer = new InputBuffer();
+ * while (... loop condition ...) {
+ *   byte[] data = ... get data ...;
+ *   int dataLength = ... get data length ...;
+ *   buffer.reset(data, dataLength);
+ *   ... read buffer using InputStream methods ...
+ * }
+ * </pre>
+ * @see DataInputBuffer
+ * @see DataOutput
+ */
+public class InputBuffer extends FilterInputStream {
+
+  private static class Buffer extends ByteArrayInputStream {
+    public Buffer() {
+      super(new byte[] {});
+    }
+
+    public void reset(byte[] input, int start, int length) {
+      this.buf = input;
+      this.count = start+length;
+      this.mark = start;
+      this.pos = start;
+    }
+
+    public int getPosition() { return pos; }
+    public int getLength() { return count; }
+  }
+
+  private Buffer buffer;
+  
+  /** Constructs a new empty buffer. */
+  public InputBuffer() {
+    this(new Buffer());
+  }
+
+  private InputBuffer(Buffer buffer) {
+    super(buffer);
+    this.buffer = buffer;
+  }
+
+  /** Resets the data that the buffer reads. */
+  public void reset(byte[] input, int length) {
+    buffer.reset(input, 0, length);
+  }
+
+  /** Resets the data that the buffer reads. */
+  public void reset(byte[] input, int start, int length) {
+    buffer.reset(input, start, length);
+  }
+
+  /** Returns the current position in the input. */
+  public int getPosition() { return buffer.getPosition(); }
+
+  /** Returns the length of the input. */
+  public int getLength() { return buffer.getLength(); }
+
+}
diff --git a/src/java/org/apache/hadoop/io/IntWritable.java b/src/java/org/apache/hadoop/io/IntWritable.java
new file mode 100644
index 00000000000..99875030a63
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/IntWritable.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** A WritableComparable for ints. */
+public class IntWritable implements WritableComparable {
+  private int value;
+
+  public IntWritable() {}
+
+  public IntWritable(int value) { set(value); }
+
+  /** Set the value of this IntWritable. */
+  public void set(int value) { this.value = value; }
+
+  /** Return the value of this IntWritable. */
+  public int get() { return value; }
+
+  public void readFields(DataInput in) throws IOException {
+    value = in.readInt();
+  }
+
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(value);
+  }
+
+  /** Returns true iff <code>o</code> is a IntWritable with the same value. */
+  public boolean equals(Object o) {
+    if (!(o instanceof IntWritable))
+      return false;
+    IntWritable other = (IntWritable)o;
+    return this.value == other.value;
+  }
+
+  public int hashCode() {
+    return value;
+  }
+
+  /** Compares two IntWritables. */
+  public int compareTo(Object o) {
+    int thisValue = this.value;
+    int thatValue = ((IntWritable)o).value;
+    return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+  }
+
+  public String toString() {
+    return Integer.toString(value);
+  }
+
+  /** A Comparator optimized for IntWritable. */ 
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(IntWritable.class);
+    }
+
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      int thisValue = readInt(b1, s1);
+      int thatValue = readInt(b2, s2);
+      return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+    }
+  }
+
+  static {                                        // register this comparator
+    WritableComparator.define(IntWritable.class, new Comparator());
+  }
+}
+
diff --git a/src/java/org/apache/hadoop/io/LongWritable.java b/src/java/org/apache/hadoop/io/LongWritable.java
new file mode 100644
index 00000000000..7f2c610e6b9
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/LongWritable.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** A WritableComparable for longs. */
+public class LongWritable implements WritableComparable {
+  private long value;
+
+  public LongWritable() {}
+
+  public LongWritable(long value) { set(value); }
+
+  /** Set the value of this LongWritable. */
+  public void set(long value) { this.value = value; }
+
+  /** Return the value of this LongWritable. */
+  public long get() { return value; }
+
+  public void readFields(DataInput in) throws IOException {
+    value = in.readLong();
+  }
+
+  public void write(DataOutput out) throws IOException {
+    out.writeLong(value);
+  }
+
+  /** Returns true iff <code>o</code> is a LongWritable with the same value. */
+  public boolean equals(Object o) {
+    if (!(o instanceof LongWritable))
+      return false;
+    LongWritable other = (LongWritable)o;
+    return this.value == other.value;
+  }
+
+  public int hashCode() {
+    return (int)value;
+  }
+
+  /** Compares two LongWritables. */
+  public int compareTo(Object o) {
+    long thisValue = this.value;
+    long thatValue = ((LongWritable)o).value;
+    return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+  }
+
+  public String toString() {
+    return Long.toString(value);
+  }
+
+  /** A Comparator optimized for LongWritable. */ 
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(LongWritable.class);
+    }
+
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      long thisValue = readLong(b1, s1);
+      long thatValue = readLong(b2, s2);
+      return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+    }
+  }
+
+  /** A decreasing Comparator optimized for LongWritable. */ 
+  public static class DecreasingComparator extends Comparator {
+    public int compare(WritableComparable a, WritableComparable b) {
+      return -super.compare(a, b);
+    }
+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+      return -super.compare(b1, s1, l1, b2, s2, l2);
+    }
+  }
+
+  static {                                       // register default comparator
+    WritableComparator.define(LongWritable.class, new Comparator());
+  }
+
+}
+
diff --git a/src/java/org/apache/hadoop/io/MD5Hash.java b/src/java/org/apache/hadoop/io/MD5Hash.java
new file mode 100644
index 00000000000..a28c3ae20a2
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/MD5Hash.java
@@ -0,0 +1,221 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.IOException;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.security.*;
+
+/** A Writable for MD5 hash values.
+ */
+public class MD5Hash implements WritableComparable<MD5Hash> {
+  public static final int MD5_LEN = 16;
+
+  private static ThreadLocal<MessageDigest> DIGESTER_FACTORY = new ThreadLocal<MessageDigest>() {
+    protected MessageDigest initialValue() {
+      try {
+        return MessageDigest.getInstance("MD5");
+      } catch (NoSuchAlgorithmException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  };
+
+  private byte[] digest;
+
+  /** Constructs an MD5Hash. */
+  public MD5Hash() {
+    this.digest = new byte[MD5_LEN];
+  }
+
+  /** Constructs an MD5Hash from a hex string. */
+  public MD5Hash(String hex) {
+    setDigest(hex);
+  }
+  
+  /** Constructs an MD5Hash with a specified value. */
+  public MD5Hash(byte[] digest) {
+    if (digest.length != MD5_LEN)
+      throw new IllegalArgumentException("Wrong length: " + digest.length);
+    this.digest = digest;
+  }
+  
+  // javadoc from Writable
+  public void readFields(DataInput in) throws IOException {
+    in.readFully(digest);
+  }
+
+  /** Constructs, reads and returns an instance. */
+  public static MD5Hash read(DataInput in) throws IOException {
+    MD5Hash result = new MD5Hash();
+    result.readFields(in);
+    return result;
+  }
+
+  // javadoc from Writable
+  public void write(DataOutput out) throws IOException {
+    out.write(digest);
+  }
+
+  /** Copy the contents of another instance into this instance. */
+  public void set(MD5Hash that) {
+    System.arraycopy(that.digest, 0, this.digest, 0, MD5_LEN);
+  }
+
+  /** Returns the digest bytes. */
+  public byte[] getDigest() { return digest; }
+
+  /** Construct a hash value for a byte array. */
+  public static MD5Hash digest(byte[] data) {
+    return digest(data, 0, data.length);
+  }
+
+  /** Construct a hash value for the content from the InputStream. */
+  public static MD5Hash digest(InputStream in) throws IOException {
+    final byte[] buffer = new byte[4*1024]; 
+
+    final MessageDigest digester = DIGESTER_FACTORY.get();
+    for(int n; (n = in.read(buffer)) != -1; ) {
+      digester.update(buffer, 0, n);
+    }
+
+    return new MD5Hash(digester.digest());
+  }
+
+  /** Construct a hash value for a byte array. */
+  public static MD5Hash digest(byte[] data, int start, int len) {
+    byte[] digest;
+    MessageDigest digester = DIGESTER_FACTORY.get();
+    digester.update(data, start, len);
+    digest = digester.digest();
+    return new MD5Hash(digest);
+  }
+
+  /** Construct a hash value for a String. */
+  public static MD5Hash digest(String string) {
+    return digest(UTF8.getBytes(string));
+  }
+
+  /** Construct a hash value for a String. */
+  public static MD5Hash digest(UTF8 utf8) {
+    return digest(utf8.getBytes(), 0, utf8.getLength());
+  }
+
+  /** Construct a half-sized version of this MD5.  Fits in a long **/
+  public long halfDigest() {
+    long value = 0;
+    for (int i = 0; i < 8; i++)
+      value |= ((digest[i] & 0xffL) << (8*(7-i)));
+    return value;
+  }
+
+  /**
+   * Return a 32-bit digest of the MD5.
+   * @return the first 4 bytes of the md5
+   */
+  public int quarterDigest() {
+    int value = 0;
+    for (int i = 0; i < 4; i++)
+      value |= ((digest[i] & 0xff) << (8*(3-i)));
+    return value;    
+  }
+
+  /** Returns true iff <code>o</code> is an MD5Hash whose digest contains the
+   * same values.  */
+  public boolean equals(Object o) {
+    if (!(o instanceof MD5Hash))
+      return false;
+    MD5Hash other = (MD5Hash)o;
+    return Arrays.equals(this.digest, other.digest);
+  }
+
+  /** Returns a hash code value for this object.
+   * Only uses the first 4 bytes, since md5s are evenly distributed.
+   */
+  public int hashCode() {
+    return quarterDigest();
+  }
+
+
+  /** Compares this object with the specified object for order.*/
+  public int compareTo(MD5Hash that) {
+    return WritableComparator.compareBytes(this.digest, 0, MD5_LEN,
+                                           that.digest, 0, MD5_LEN);
+  }
+
+  /** A WritableComparator optimized for MD5Hash keys. */
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(MD5Hash.class);
+    }
+
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      return compareBytes(b1, s1, MD5_LEN, b2, s2, MD5_LEN);
+    }
+  }
+
+  static {                                        // register this comparator
+    WritableComparator.define(MD5Hash.class, new Comparator());
+  }
+
+  private static final char[] HEX_DIGITS =
+  {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
+
+  /** Returns a string representation of this object. */
+  public String toString() {
+    StringBuffer buf = new StringBuffer(MD5_LEN*2);
+    for (int i = 0; i < MD5_LEN; i++) {
+      int b = digest[i];
+      buf.append(HEX_DIGITS[(b >> 4) & 0xf]);
+      buf.append(HEX_DIGITS[b & 0xf]);
+    }
+    return buf.toString();
+  }
+
+  /** Sets the digest value from a hex string. */
+  public void setDigest(String hex) {
+    if (hex.length() != MD5_LEN*2)
+      throw new IllegalArgumentException("Wrong length: " + hex.length());
+    byte[] digest = new byte[MD5_LEN];
+    for (int i = 0; i < MD5_LEN; i++) {
+      int j = i << 1;
+      digest[i] = (byte)(charToNibble(hex.charAt(j)) << 4 |
+                         charToNibble(hex.charAt(j+1)));
+    }
+    this.digest = digest;
+  }
+
+  private static final int charToNibble(char c) {
+    if (c >= '0' && c <= '9') {
+      return c - '0';
+    } else if (c >= 'a' && c <= 'f') {
+      return 0xa + (c - 'a');
+    } else if (c >= 'A' && c <= 'F') {
+      return 0xA + (c - 'A');
+    } else {
+      throw new RuntimeException("Not a hex character: " + c);
+    }
+  }
+
+
+}
diff --git a/src/java/org/apache/hadoop/io/MapFile.java b/src/java/org/apache/hadoop/io/MapFile.java
new file mode 100644
index 00000000000..10598f0a42a
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/MapFile.java
@@ -0,0 +1,713 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.io.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.DefaultCodec;
+
+/** A file-based map from keys to values.
+ * 
+ * <p>A map is a directory containing two files, the <code>data</code> file,
+ * containing all keys and values in the map, and a smaller <code>index</code>
+ * file, containing a fraction of the keys.  The fraction is determined by
+ * {@link Writer#getIndexInterval()}.
+ *
+ * <p>The index file is read entirely into memory.  Thus key implementations
+ * should try to keep themselves small.
+ *
+ * <p>Map files are created by adding entries in-order.  To maintain a large
+ * database, perform updates by copying the previous version of a database and
+ * merging in a sorted change list, to create a new version of the database in
+ * a new file.  Sorting large change lists can be done with {@link
+ * SequenceFile.Sorter}.
+ */
+public class MapFile {
+  private static final Log LOG = LogFactory.getLog(MapFile.class);
+
+  /** The name of the index file. */
+  public static final String INDEX_FILE_NAME = "index";
+
+  /** The name of the data file. */
+  public static final String DATA_FILE_NAME = "data";
+
+  protected MapFile() {}                          // no public ctor
+
+  /** Writes a new map. */
+  public static class Writer implements java.io.Closeable {
+    private SequenceFile.Writer data;
+    private SequenceFile.Writer index;
+
+    final private static String INDEX_INTERVAL = "io.map.index.interval";
+    private int indexInterval = 128;
+
+    private long size;
+    private LongWritable position = new LongWritable();
+
+    // the following fields are used only for checking key order
+    private WritableComparator comparator;
+    private DataInputBuffer inBuf = new DataInputBuffer();
+    private DataOutputBuffer outBuf = new DataOutputBuffer();
+    private WritableComparable lastKey;
+
+    /** What's the position (in bytes) we wrote when we got the last index */
+    private long lastIndexPos = -1;
+
+    /**
+     * What was size when we last wrote an index. Set to MIN_VALUE to ensure that
+     * we have an index at position zero -- midKey will throw an exception if this
+     * is not the case
+     */
+    private long lastIndexKeyCount = Long.MIN_VALUE;
+
+
+    /** Create the named map for keys of the named class. */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+                  Class<? extends WritableComparable> keyClass, Class valClass)
+      throws IOException {
+      this(conf, fs, dirName,
+           WritableComparator.get(keyClass), valClass,
+           SequenceFile.getCompressionType(conf));
+    }
+
+    /** Create the named map for keys of the named class. */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+                  Class<? extends WritableComparable> keyClass, Class valClass,
+                  CompressionType compress, Progressable progress)
+      throws IOException {
+      this(conf, fs, dirName, WritableComparator.get(keyClass), valClass,
+           compress, progress);
+    }
+
+    /** Create the named map for keys of the named class. */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+                  Class<? extends WritableComparable> keyClass, Class valClass,
+                  CompressionType compress, CompressionCodec codec,
+                  Progressable progress)
+      throws IOException {
+      this(conf, fs, dirName, WritableComparator.get(keyClass), valClass,
+           compress, codec, progress);
+    }
+
+    /** Create the named map for keys of the named class. */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+                  Class<? extends WritableComparable> keyClass, Class valClass,
+                  CompressionType compress)
+      throws IOException {
+      this(conf, fs, dirName, WritableComparator.get(keyClass), valClass, compress);
+    }
+
+    /** Create the named map using the named key comparator. */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+                  WritableComparator comparator, Class valClass)
+      throws IOException {
+      this(conf, fs, dirName, comparator, valClass,
+           SequenceFile.getCompressionType(conf));
+    }
+    /** Create the named map using the named key comparator. */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+                  WritableComparator comparator, Class valClass,
+                  SequenceFile.CompressionType compress)
+      throws IOException {
+      this(conf, fs, dirName, comparator, valClass, compress, null);
+    }
+    /** Create the named map using the named key comparator. */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+                  WritableComparator comparator, Class valClass,
+                  SequenceFile.CompressionType compress,
+                  Progressable progress)
+      throws IOException {
+      this(conf, fs, dirName, comparator, valClass, 
+           compress, new DefaultCodec(), progress);
+    }
+    /** Create the named map using the named key comparator. */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+                  WritableComparator comparator, Class valClass,
+                  SequenceFile.CompressionType compress, CompressionCodec codec,
+                  Progressable progress)
+      throws IOException {
+
+      this.indexInterval = conf.getInt(INDEX_INTERVAL, this.indexInterval);
+
+      this.comparator = comparator;
+      this.lastKey = comparator.newKey();
+
+      Path dir = new Path(dirName);
+      if (!fs.mkdirs(dir)) {
+        throw new IOException("Mkdirs failed to create directory " + dir.toString());
+      }
+      Path dataFile = new Path(dir, DATA_FILE_NAME);
+      Path indexFile = new Path(dir, INDEX_FILE_NAME);
+
+      Class keyClass = comparator.getKeyClass();
+      this.data =
+        SequenceFile.createWriter
+        (fs, conf, dataFile, keyClass, valClass, compress, codec, progress);
+      this.index =
+        SequenceFile.createWriter
+        (fs, conf, indexFile, keyClass, LongWritable.class,
+         CompressionType.BLOCK, progress);
+    }
+    
+    /** The number of entries that are added before an index entry is added.*/
+    public int getIndexInterval() { return indexInterval; }
+
+    /** Sets the index interval.
+     * @see #getIndexInterval()
+     */
+    public void setIndexInterval(int interval) { indexInterval = interval; }
+
+    /** Sets the index interval and stores it in conf
+     * @see #getIndexInterval()
+     */
+    public static void setIndexInterval(Configuration conf, int interval) {
+      conf.setInt(INDEX_INTERVAL, interval);
+    }
+
+    /** Close the map. */
+    public synchronized void close() throws IOException {
+      data.close();
+      index.close();
+    }
+
+    /** Append a key/value pair to the map.  The key must be greater or equal
+     * to the previous key added to the map. */
+    public synchronized void append(WritableComparable key, Writable val)
+      throws IOException {
+
+      checkKey(key);
+
+      long pos = data.getLength();      
+      // Only write an index if we've changed positions. In a block compressed
+      // file, this means we write an entry at the start of each block      
+      if (size >= lastIndexKeyCount + indexInterval && pos > lastIndexPos) {
+        position.set(pos);                        // point to current eof
+        index.append(key, position);
+        lastIndexPos = pos;
+        lastIndexKeyCount = size;
+      }
+
+      data.append(key, val);                      // append key/value to data
+      size++;
+    }
+
+    private void checkKey(WritableComparable key) throws IOException {
+      // check that keys are well-ordered
+      if (size != 0 && comparator.compare(lastKey, key) > 0)
+        throw new IOException("key out of order: "+key+" after "+lastKey);
+          
+      // update lastKey with a copy of key by writing and reading
+      outBuf.reset();
+      key.write(outBuf);                          // write new key
+
+      inBuf.reset(outBuf.getData(), outBuf.getLength());
+      lastKey.readFields(inBuf);                  // read into lastKey
+    }
+
+  }
+  
+  /** Provide access to an existing map. */
+  public static class Reader implements java.io.Closeable {
+      
+    /** Number of index entries to skip between each entry.  Zero by default.
+     * Setting this to values larger than zero can facilitate opening large map
+     * files using less memory. */
+    private int INDEX_SKIP = 0;
+      
+    private WritableComparator comparator;
+
+    private WritableComparable nextKey;
+    private long seekPosition = -1;
+    private int seekIndex = -1;
+    private long firstPosition;
+
+    // the data, on disk
+    private SequenceFile.Reader data;
+    private SequenceFile.Reader index;
+
+    // whether the index Reader was closed
+    private boolean indexClosed = false;
+
+    // the index, in memory
+    private int count = -1;
+    private WritableComparable[] keys;
+    private long[] positions;
+
+    /** Returns the class of keys in this file. */
+    public Class<?> getKeyClass() { return data.getKeyClass(); }
+
+    /** Returns the class of values in this file. */
+    public Class<?> getValueClass() { return data.getValueClass(); }
+
+    /** Construct a map reader for the named map.*/
+    public Reader(FileSystem fs, String dirName, Configuration conf) throws IOException {
+      this(fs, dirName, null, conf);
+      INDEX_SKIP = conf.getInt("io.map.index.skip", 0);
+    }
+
+    /** Construct a map reader for the named map using the named comparator.*/
+    public Reader(FileSystem fs, String dirName, WritableComparator comparator, Configuration conf)
+      throws IOException {
+      this(fs, dirName, comparator, conf, true);
+    }
+    
+    /**
+     * Hook to allow subclasses to defer opening streams until further
+     * initialization is complete.
+     * @see #createDataFileReader(FileSystem, Path, Configuration)
+     */
+    protected Reader(FileSystem fs, String dirName,
+        WritableComparator comparator, Configuration conf, boolean open)
+      throws IOException {
+      
+      if (open) {
+        open(fs, dirName, comparator, conf);
+      }
+    }
+    
+    protected synchronized void open(FileSystem fs, String dirName,
+        WritableComparator comparator, Configuration conf) throws IOException {
+      Path dir = new Path(dirName);
+      Path dataFile = new Path(dir, DATA_FILE_NAME);
+      Path indexFile = new Path(dir, INDEX_FILE_NAME);
+
+      // open the data
+      this.data = createDataFileReader(fs, dataFile, conf);
+      this.firstPosition = data.getPosition();
+
+      if (comparator == null)
+        this.comparator = WritableComparator.get(data.getKeyClass().asSubclass(WritableComparable.class));
+      else
+        this.comparator = comparator;
+
+      // open the index
+      this.index = new SequenceFile.Reader(fs, indexFile, conf);
+    }
+
+    /**
+     * Override this method to specialize the type of
+     * {@link SequenceFile.Reader} returned.
+     */
+    protected SequenceFile.Reader createDataFileReader(FileSystem fs,
+        Path dataFile, Configuration conf) throws IOException {
+      return new SequenceFile.Reader(fs, dataFile,  conf);
+    }
+
+    private void readIndex() throws IOException {
+      // read the index entirely into memory
+      if (this.keys != null)
+        return;
+      this.count = 0;
+      this.positions = new long[1024];
+
+      try {
+        int skip = INDEX_SKIP;
+        LongWritable position = new LongWritable();
+        WritableComparable lastKey = null;
+        long lastIndex = -1;
+        ArrayList<WritableComparable> keyBuilder = new ArrayList<WritableComparable>(1024);
+        while (true) {
+          WritableComparable k = comparator.newKey();
+
+          if (!index.next(k, position))
+            break;
+
+          // check order to make sure comparator is compatible
+          if (lastKey != null && comparator.compare(lastKey, k) > 0)
+            throw new IOException("key out of order: "+k+" after "+lastKey);
+          lastKey = k;
+          if (skip > 0) {
+            skip--;
+            continue;                             // skip this entry
+          } else {
+            skip = INDEX_SKIP;                    // reset skip
+          }
+
+	  // don't read an index that is the same as the previous one. Block
+	  // compressed map files used to do this (multiple entries would point
+	  // at the same block)
+	  if (position.get() == lastIndex)
+	    continue;
+
+          if (count == positions.length) {
+	    positions = Arrays.copyOf(positions, positions.length * 2);
+          }
+
+          keyBuilder.add(k);
+          positions[count] = position.get();
+          count++;
+        }
+
+        this.keys = keyBuilder.toArray(new WritableComparable[count]);
+        positions = Arrays.copyOf(positions, count);
+      } catch (EOFException e) {
+        LOG.warn("Unexpected EOF reading " + index +
+                              " at entry #" + count + ".  Ignoring.");
+      } finally {
+	indexClosed = true;
+        index.close();
+      }
+    }
+
+    /** Re-positions the reader before its first key. */
+    public synchronized void reset() throws IOException {
+      data.seek(firstPosition);
+    }
+
+    /** Get the key at approximately the middle of the file. Or null if the
+     *  file is empty. 
+     */
+    public synchronized WritableComparable midKey() throws IOException {
+
+      readIndex();
+      if (count == 0) {
+        return null;
+      }
+    
+      return keys[(count - 1) / 2];
+    }
+    
+    /** Reads the final key from the file.
+     *
+     * @param key key to read into
+     */
+    public synchronized void finalKey(WritableComparable key)
+      throws IOException {
+
+      long originalPosition = data.getPosition(); // save position
+      try {
+        readIndex();                              // make sure index is valid
+        if (count > 0) {
+          data.seek(positions[count-1]);          // skip to last indexed entry
+        } else {
+          reset();                                // start at the beginning
+        }
+        while (data.next(key)) {}                 // scan to eof
+
+      } finally {
+        data.seek(originalPosition);              // restore position
+      }
+    }
+
+    /** Positions the reader at the named key, or if none such exists, at the
+     * first entry after the named key.  Returns true iff the named key exists
+     * in this map.
+     */
+    public synchronized boolean seek(WritableComparable key) throws IOException {
+      return seekInternal(key) == 0;
+    }
+
+    /** 
+     * Positions the reader at the named key, or if none such exists, at the
+     * first entry after the named key.
+     *
+     * @return  0   - exact match found
+     *          < 0 - positioned at next record
+     *          1   - no more records in file
+     */
+    private synchronized int seekInternal(WritableComparable key)
+      throws IOException {
+      return seekInternal(key, false);
+    }
+
+    /** 
+     * Positions the reader at the named key, or if none such exists, at the
+     * key that falls just before or just after dependent on how the
+     * <code>before</code> parameter is set.
+     * 
+     * @param before - IF true, and <code>key</code> does not exist, position
+     * file at entry that falls just before <code>key</code>.  Otherwise,
+     * position file at record that sorts just after.
+     * @return  0   - exact match found
+     *          < 0 - positioned at next record
+     *          1   - no more records in file
+     */
+    private synchronized int seekInternal(WritableComparable key,
+        final boolean before)
+      throws IOException {
+      readIndex();                                // make sure index is read
+
+      if (seekIndex != -1                         // seeked before
+          && seekIndex+1 < count           
+          && comparator.compare(key, keys[seekIndex+1])<0 // before next indexed
+          && comparator.compare(key, nextKey)
+          >= 0) {                                 // but after last seeked
+        // do nothing
+      } else {
+        seekIndex = binarySearch(key);
+        if (seekIndex < 0)                        // decode insertion point
+          seekIndex = -seekIndex-2;
+
+        if (seekIndex == -1)                      // belongs before first entry
+          seekPosition = firstPosition;           // use beginning of file
+        else
+          seekPosition = positions[seekIndex];    // else use index
+      }
+      data.seek(seekPosition);
+      
+      if (nextKey == null)
+        nextKey = comparator.newKey();
+     
+      // If we're looking for the key before, we need to keep track
+      // of the position we got the current key as well as the position
+      // of the key before it.
+      long prevPosition = -1;
+      long curPosition = seekPosition;
+
+      while (data.next(nextKey)) {
+        int c = comparator.compare(key, nextKey);
+        if (c <= 0) {                             // at or beyond desired
+          if (before && c != 0) {
+            if (prevPosition == -1) {
+              // We're on the first record of this index block
+              // and we've already passed the search key. Therefore
+              // we must be at the beginning of the file, so seek
+              // to the beginning of this block and return c
+              data.seek(curPosition);
+            } else {
+              // We have a previous record to back up to
+              data.seek(prevPosition);
+              data.next(nextKey);
+              // now that we've rewound, the search key must be greater than this key
+              return 1;
+            }
+          }
+          return c;
+        }
+        if (before) {
+          prevPosition = curPosition;
+          curPosition = data.getPosition();
+        }
+      }
+
+      return 1;
+    }
+
+    private int binarySearch(WritableComparable key) {
+      int low = 0;
+      int high = count-1;
+
+      while (low <= high) {
+        int mid = (low + high) >>> 1;
+        WritableComparable midVal = keys[mid];
+        int cmp = comparator.compare(midVal, key);
+
+        if (cmp < 0)
+          low = mid + 1;
+        else if (cmp > 0)
+          high = mid - 1;
+        else
+          return mid;                             // key found
+      }
+      return -(low + 1);                          // key not found.
+    }
+
+    /** Read the next key/value pair in the map into <code>key</code> and
+     * <code>val</code>.  Returns true if such a pair exists and false when at
+     * the end of the map */
+    public synchronized boolean next(WritableComparable key, Writable val)
+      throws IOException {
+      return data.next(key, val);
+    }
+
+    /** Return the value for the named key, or null if none exists. */
+    public synchronized Writable get(WritableComparable key, Writable val)
+      throws IOException {
+      if (seek(key)) {
+        data.getCurrentValue(val);
+        return val;
+      } else
+        return null;
+    }
+
+    /** 
+     * Finds the record that is the closest match to the specified key.
+     * Returns <code>key</code> or if it does not exist, at the first entry
+     * after the named key.
+     * 
+-     * @param key       - key that we're trying to find
+-     * @param val       - data value if key is found
+-     * @return          - the key that was the closest match or null if eof.
+     */
+    public synchronized WritableComparable getClosest(WritableComparable key,
+      Writable val)
+    throws IOException {
+      return getClosest(key, val, false);
+    }
+
+    /** 
+     * Finds the record that is the closest match to the specified key.
+     * 
+     * @param key       - key that we're trying to find
+     * @param val       - data value if key is found
+     * @param before    - IF true, and <code>key</code> does not exist, return
+     * the first entry that falls just before the <code>key</code>.  Otherwise,
+     * return the record that sorts just after.
+     * @return          - the key that was the closest match or null if eof.
+     */
+    public synchronized WritableComparable getClosest(WritableComparable key,
+        Writable val, final boolean before)
+      throws IOException {
+     
+      int c = seekInternal(key, before);
+
+      // If we didn't get an exact match, and we ended up in the wrong
+      // direction relative to the query key, return null since we
+      // must be at the beginning or end of the file.
+      if ((!before && c > 0) ||
+          (before && c < 0)) {
+        return null;
+      }
+
+      data.getCurrentValue(val);
+      return nextKey;
+    }
+
+    /** Close the map. */
+    public synchronized void close() throws IOException {
+      if (!indexClosed) {
+        index.close();
+      }
+      data.close();
+    }
+
+  }
+
+  /** Renames an existing map directory. */
+  public static void rename(FileSystem fs, String oldName, String newName)
+    throws IOException {
+    Path oldDir = new Path(oldName);
+    Path newDir = new Path(newName);
+    if (!fs.rename(oldDir, newDir)) {
+      throw new IOException("Could not rename " + oldDir + " to " + newDir);
+    }
+  }
+
+  /** Deletes the named map file. */
+  public static void delete(FileSystem fs, String name) throws IOException {
+    Path dir = new Path(name);
+    Path data = new Path(dir, DATA_FILE_NAME);
+    Path index = new Path(dir, INDEX_FILE_NAME);
+
+    fs.delete(data, true);
+    fs.delete(index, true);
+    fs.delete(dir, true);
+  }
+
+  /**
+   * This method attempts to fix a corrupt MapFile by re-creating its index.
+   * @param fs filesystem
+   * @param dir directory containing the MapFile data and index
+   * @param keyClass key class (has to be a subclass of Writable)
+   * @param valueClass value class (has to be a subclass of Writable)
+   * @param dryrun do not perform any changes, just report what needs to be done
+   * @return number of valid entries in this MapFile, or -1 if no fixing was needed
+   * @throws Exception
+   */
+  public static long fix(FileSystem fs, Path dir,
+                         Class<? extends Writable> keyClass,
+                         Class<? extends Writable> valueClass, boolean dryrun,
+                         Configuration conf) throws Exception {
+    String dr = (dryrun ? "[DRY RUN ] " : "");
+    Path data = new Path(dir, DATA_FILE_NAME);
+    Path index = new Path(dir, INDEX_FILE_NAME);
+    int indexInterval = 128;
+    if (!fs.exists(data)) {
+      // there's nothing we can do to fix this!
+      throw new Exception(dr + "Missing data file in " + dir + ", impossible to fix this.");
+    }
+    if (fs.exists(index)) {
+      // no fixing needed
+      return -1;
+    }
+    SequenceFile.Reader dataReader = new SequenceFile.Reader(fs, data, conf);
+    if (!dataReader.getKeyClass().equals(keyClass)) {
+      throw new Exception(dr + "Wrong key class in " + dir + ", expected" + keyClass.getName() +
+                          ", got " + dataReader.getKeyClass().getName());
+    }
+    if (!dataReader.getValueClass().equals(valueClass)) {
+      throw new Exception(dr + "Wrong value class in " + dir + ", expected" + valueClass.getName() +
+                          ", got " + dataReader.getValueClass().getName());
+    }
+    long cnt = 0L;
+    Writable key = ReflectionUtils.newInstance(keyClass, conf);
+    Writable value = ReflectionUtils.newInstance(valueClass, conf);
+    SequenceFile.Writer indexWriter = null;
+    if (!dryrun) indexWriter = SequenceFile.createWriter(fs, conf, index, keyClass, LongWritable.class);
+    try {
+      long pos = 0L;
+      LongWritable position = new LongWritable();
+      while(dataReader.next(key, value)) {
+        cnt++;
+        if (cnt % indexInterval == 0) {
+          position.set(pos);
+          if (!dryrun) indexWriter.append(key, position);
+        }
+        pos = dataReader.getPosition();
+      }
+    } catch(Throwable t) {
+      // truncated data file. swallow it.
+    }
+    dataReader.close();
+    if (!dryrun) indexWriter.close();
+    return cnt;
+  }
+
+
+  public static void main(String[] args) throws Exception {
+    String usage = "Usage: MapFile inFile outFile";
+      
+    if (args.length != 2) {
+      System.err.println(usage);
+      System.exit(-1);
+    }
+      
+    String in = args[0];
+    String out = args[1];
+
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.getLocal(conf);
+    MapFile.Reader reader = new MapFile.Reader(fs, in, conf);
+    MapFile.Writer writer =
+      new MapFile.Writer(conf, fs, out,
+          reader.getKeyClass().asSubclass(WritableComparable.class),
+          reader.getValueClass());
+
+    WritableComparable key =
+      ReflectionUtils.newInstance(reader.getKeyClass().asSubclass(WritableComparable.class), conf);
+    Writable value =
+      ReflectionUtils.newInstance(reader.getValueClass().asSubclass(Writable.class), conf);
+
+    while (reader.next(key, value))               // copy all entries
+      writer.append(key, value);
+
+    writer.close();
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/MapWritable.java b/src/java/org/apache/hadoop/io/MapWritable.java
new file mode 100644
index 00000000000..66c493be2c3
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/MapWritable.java
@@ -0,0 +1,169 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * A Writable Map.
+ */
+public class MapWritable extends AbstractMapWritable
+  implements Map<Writable, Writable> {
+
+  private Map<Writable, Writable> instance;
+  
+  /** Default constructor. */
+  public MapWritable() {
+    super();
+    this.instance = new HashMap<Writable, Writable>();
+  }
+  
+  /**
+   * Copy constructor.
+   * 
+   * @param other the map to copy from
+   */
+  public MapWritable(MapWritable other) {
+    this();
+    copy(other);
+  }
+  
+  /** {@inheritDoc} */
+  public void clear() {
+    instance.clear();
+  }
+
+  /** {@inheritDoc} */
+  public boolean containsKey(Object key) {
+    return instance.containsKey(key);
+  }
+
+  /** {@inheritDoc} */
+  public boolean containsValue(Object value) {
+    return instance.containsValue(value);
+  }
+
+  /** {@inheritDoc} */
+  public Set<Map.Entry<Writable, Writable>> entrySet() {
+    return instance.entrySet();
+  }
+
+  /** {@inheritDoc} */
+  public Writable get(Object key) {
+    return instance.get(key);
+  }
+  
+  /** {@inheritDoc} */
+  public boolean isEmpty() {
+    return instance.isEmpty();
+  }
+
+  /** {@inheritDoc} */
+  public Set<Writable> keySet() {
+    return instance.keySet();
+  }
+
+  /** {@inheritDoc} */
+  @SuppressWarnings("unchecked")
+  public Writable put(Writable key, Writable value) {
+    addToMap(key.getClass());
+    addToMap(value.getClass());
+    return instance.put(key, value);
+  }
+
+  /** {@inheritDoc} */
+  public void putAll(Map<? extends Writable, ? extends Writable> t) {
+    for (Map.Entry<? extends Writable, ? extends Writable> e: t.entrySet()) {
+      put(e.getKey(), e.getValue());
+    }
+  }
+
+  /** {@inheritDoc} */
+  public Writable remove(Object key) {
+    return instance.remove(key);
+  }
+
+  /** {@inheritDoc} */
+  public int size() {
+    return instance.size();
+  }
+
+  /** {@inheritDoc} */
+  public Collection<Writable> values() {
+    return instance.values();
+  }
+  
+  // Writable
+  
+  /** {@inheritDoc} */
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    
+    // Write out the number of entries in the map
+    
+    out.writeInt(instance.size());
+
+    // Then write out each key/value pair
+    
+    for (Map.Entry<Writable, Writable> e: instance.entrySet()) {
+      out.writeByte(getId(e.getKey().getClass()));
+      e.getKey().write(out);
+      out.writeByte(getId(e.getValue().getClass()));
+      e.getValue().write(out);
+    }
+  }
+
+  /** {@inheritDoc} */
+  @SuppressWarnings("unchecked")
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    
+    // First clear the map.  Otherwise we will just accumulate
+    // entries every time this method is called.
+    this.instance.clear();
+    
+    // Read the number of entries in the map
+    
+    int entries = in.readInt();
+    
+    // Then read each key/value pair
+    
+    for (int i = 0; i < entries; i++) {
+      Writable key = (Writable) ReflectionUtils.newInstance(getClass(
+          in.readByte()), getConf());
+      
+      key.readFields(in);
+      
+      Writable value = (Writable) ReflectionUtils.newInstance(getClass(
+          in.readByte()), getConf());
+      
+      value.readFields(in);
+      instance.put(key, value);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/MultipleIOException.java b/src/java/org/apache/hadoop/io/MultipleIOException.java
new file mode 100644
index 00000000000..eea6b556d74
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/MultipleIOException.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io;
+
+import java.io.IOException;
+import java.util.List;
+
+/** Encapsulate a list of {@link IOException} into an {@link IOException} */
+public class MultipleIOException extends IOException {
+  /** Require by {@link java.io.Serializable} */
+  private static final long serialVersionUID = 1L;
+  
+  private final List<IOException> exceptions;
+  
+  /** Constructor is private, use {@link #createIOException(List)}. */
+  private MultipleIOException(List<IOException> exceptions) {
+    super(exceptions.size() + " exceptions " + exceptions);
+    this.exceptions = exceptions;
+  }
+
+  /** @return the underlying exceptions */
+  public List<IOException> getExceptions() {return exceptions;}
+
+  /** A convenient method to create an {@link IOException}. */
+  public static IOException createIOException(List<IOException> exceptions) {
+    if (exceptions == null || exceptions.isEmpty()) {
+      return null;
+    }
+    if (exceptions.size() == 1) {
+      return exceptions.get(0);
+    }
+    return new MultipleIOException(exceptions);
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/NullWritable.java b/src/java/org/apache/hadoop/io/NullWritable.java
new file mode 100644
index 00000000000..1df85c84fa9
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/NullWritable.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** Singleton Writable with no data. */
+public class NullWritable implements WritableComparable {
+
+  private static final NullWritable THIS = new NullWritable();
+
+  private NullWritable() {}                       // no public ctor
+
+  /** Returns the single instance of this class. */
+  public static NullWritable get() { return THIS; }
+  
+  public String toString() {
+    return "(null)";
+  }
+
+  public int hashCode() { return 0; }
+  public int compareTo(Object other) {
+    if (!(other instanceof NullWritable)) {
+      throw new ClassCastException("can't compare " + other.getClass().getName() 
+                                   + " to NullWritable");
+    }
+    return 0;
+  }
+  public boolean equals(Object other) { return other instanceof NullWritable; }
+  public void readFields(DataInput in) throws IOException {}
+  public void write(DataOutput out) throws IOException {}
+
+  /** A Comparator &quot;optimized&quot; for NullWritable. */
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(NullWritable.class);
+    }
+
+    /**
+     * Compare the buffers in serialized form.
+     */
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      assert 0 == l1;
+      assert 0 == l2;
+      return 0;
+    }
+  }
+
+  static {                                        // register this comparator
+    WritableComparator.define(NullWritable.class, new Comparator());
+  }
+}
+
diff --git a/src/java/org/apache/hadoop/io/ObjectWritable.java b/src/java/org/apache/hadoop/io/ObjectWritable.java
new file mode 100644
index 00000000000..df1c44bb2ac
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/ObjectWritable.java
@@ -0,0 +1,273 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.lang.reflect.Array;
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.hadoop.conf.*;
+
+/** A polymorphic Writable that writes an instance with it's class name.
+ * Handles arrays, strings and primitive types without a Writable wrapper.
+ */
+public class ObjectWritable implements Writable, Configurable {
+
+  private Class declaredClass;
+  private Object instance;
+  private Configuration conf;
+
+  public ObjectWritable() {}
+  
+  public ObjectWritable(Object instance) {
+    set(instance);
+  }
+
+  public ObjectWritable(Class declaredClass, Object instance) {
+    this.declaredClass = declaredClass;
+    this.instance = instance;
+  }
+
+  /** Return the instance, or null if none. */
+  public Object get() { return instance; }
+  
+  /** Return the class this is meant to be. */
+  public Class getDeclaredClass() { return declaredClass; }
+  
+  /** Reset the instance. */
+  public void set(Object instance) {
+    this.declaredClass = instance.getClass();
+    this.instance = instance;
+  }
+  
+  public String toString() {
+    return "OW[class=" + declaredClass + ",value=" + instance + "]";
+  }
+
+  
+  public void readFields(DataInput in) throws IOException {
+    readObject(in, this, this.conf);
+  }
+  
+  public void write(DataOutput out) throws IOException {
+    writeObject(out, instance, declaredClass, conf);
+  }
+
+  private static final Map<String, Class<?>> PRIMITIVE_NAMES = new HashMap<String, Class<?>>();
+  static {
+    PRIMITIVE_NAMES.put("boolean", Boolean.TYPE);
+    PRIMITIVE_NAMES.put("byte", Byte.TYPE);
+    PRIMITIVE_NAMES.put("char", Character.TYPE);
+    PRIMITIVE_NAMES.put("short", Short.TYPE);
+    PRIMITIVE_NAMES.put("int", Integer.TYPE);
+    PRIMITIVE_NAMES.put("long", Long.TYPE);
+    PRIMITIVE_NAMES.put("float", Float.TYPE);
+    PRIMITIVE_NAMES.put("double", Double.TYPE);
+    PRIMITIVE_NAMES.put("void", Void.TYPE);
+  }
+
+  private static class NullInstance extends Configured implements Writable {
+    private Class<?> declaredClass;
+    public NullInstance() { super(null); }
+    public NullInstance(Class declaredClass, Configuration conf) {
+      super(conf);
+      this.declaredClass = declaredClass;
+    }
+    public void readFields(DataInput in) throws IOException {
+      String className = UTF8.readString(in);
+      declaredClass = PRIMITIVE_NAMES.get(className);
+      if (declaredClass == null) {
+        try {
+          declaredClass = getConf().getClassByName(className);
+        } catch (ClassNotFoundException e) {
+          throw new RuntimeException(e.toString());
+        }
+      }
+    }
+    public void write(DataOutput out) throws IOException {
+      UTF8.writeString(out, declaredClass.getName());
+    }
+  }
+
+  /** Write a {@link Writable}, {@link String}, primitive type, or an array of
+   * the preceding. */
+  public static void writeObject(DataOutput out, Object instance,
+                                 Class declaredClass, 
+                                 Configuration conf) throws IOException {
+
+    if (instance == null) {                       // null
+      instance = new NullInstance(declaredClass, conf);
+      declaredClass = Writable.class;
+    }
+
+    UTF8.writeString(out, declaredClass.getName()); // always write declared
+
+    if (declaredClass.isArray()) {                // array
+      int length = Array.getLength(instance);
+      out.writeInt(length);
+      for (int i = 0; i < length; i++) {
+        writeObject(out, Array.get(instance, i),
+                    declaredClass.getComponentType(), conf);
+      }
+      
+    } else if (declaredClass == String.class) {   // String
+      UTF8.writeString(out, (String)instance);
+      
+    } else if (declaredClass.isPrimitive()) {     // primitive type
+
+      if (declaredClass == Boolean.TYPE) {        // boolean
+        out.writeBoolean(((Boolean)instance).booleanValue());
+      } else if (declaredClass == Character.TYPE) { // char
+        out.writeChar(((Character)instance).charValue());
+      } else if (declaredClass == Byte.TYPE) {    // byte
+        out.writeByte(((Byte)instance).byteValue());
+      } else if (declaredClass == Short.TYPE) {   // short
+        out.writeShort(((Short)instance).shortValue());
+      } else if (declaredClass == Integer.TYPE) { // int
+        out.writeInt(((Integer)instance).intValue());
+      } else if (declaredClass == Long.TYPE) {    // long
+        out.writeLong(((Long)instance).longValue());
+      } else if (declaredClass == Float.TYPE) {   // float
+        out.writeFloat(((Float)instance).floatValue());
+      } else if (declaredClass == Double.TYPE) {  // double
+        out.writeDouble(((Double)instance).doubleValue());
+      } else if (declaredClass == Void.TYPE) {    // void
+      } else {
+        throw new IllegalArgumentException("Not a primitive: "+declaredClass);
+      }
+    } else if (declaredClass.isEnum()) {         // enum
+      UTF8.writeString(out, ((Enum)instance).name());
+    } else if (Writable.class.isAssignableFrom(declaredClass)) { // Writable
+      UTF8.writeString(out, instance.getClass().getName());
+      ((Writable)instance).write(out);
+
+    } else {
+      throw new IOException("Can't write: "+instance+" as "+declaredClass);
+    }
+  }
+  
+  
+  /** Read a {@link Writable}, {@link String}, primitive type, or an array of
+   * the preceding. */
+  public static Object readObject(DataInput in, Configuration conf)
+    throws IOException {
+    return readObject(in, null, conf);
+  }
+    
+  /** Read a {@link Writable}, {@link String}, primitive type, or an array of
+   * the preceding. */
+  @SuppressWarnings("unchecked")
+  public static Object readObject(DataInput in, ObjectWritable objectWritable, Configuration conf)
+    throws IOException {
+    String className = UTF8.readString(in);
+    Class<?> declaredClass = PRIMITIVE_NAMES.get(className);
+    if (declaredClass == null) {
+      declaredClass = loadClass(conf, className);
+    }
+    
+    Object instance;
+    
+    if (declaredClass.isPrimitive()) {            // primitive types
+
+      if (declaredClass == Boolean.TYPE) {             // boolean
+        instance = Boolean.valueOf(in.readBoolean());
+      } else if (declaredClass == Character.TYPE) {    // char
+        instance = Character.valueOf(in.readChar());
+      } else if (declaredClass == Byte.TYPE) {         // byte
+        instance = Byte.valueOf(in.readByte());
+      } else if (declaredClass == Short.TYPE) {        // short
+        instance = Short.valueOf(in.readShort());
+      } else if (declaredClass == Integer.TYPE) {      // int
+        instance = Integer.valueOf(in.readInt());
+      } else if (declaredClass == Long.TYPE) {         // long
+        instance = Long.valueOf(in.readLong());
+      } else if (declaredClass == Float.TYPE) {        // float
+        instance = Float.valueOf(in.readFloat());
+      } else if (declaredClass == Double.TYPE) {       // double
+        instance = Double.valueOf(in.readDouble());
+      } else if (declaredClass == Void.TYPE) {         // void
+        instance = null;
+      } else {
+        throw new IllegalArgumentException("Not a primitive: "+declaredClass);
+      }
+
+    } else if (declaredClass.isArray()) {              // array
+      int length = in.readInt();
+      instance = Array.newInstance(declaredClass.getComponentType(), length);
+      for (int i = 0; i < length; i++) {
+        Array.set(instance, i, readObject(in, conf));
+      }
+      
+    } else if (declaredClass == String.class) {        // String
+      instance = UTF8.readString(in);
+    } else if (declaredClass.isEnum()) {         // enum
+      instance = Enum.valueOf((Class<? extends Enum>) declaredClass, UTF8.readString(in));
+    } else {                                      // Writable
+      Class instanceClass = null;
+      String str = UTF8.readString(in);
+      instanceClass = loadClass(conf, str);
+      
+      Writable writable = WritableFactories.newInstance(instanceClass, conf);
+      writable.readFields(in);
+      instance = writable;
+
+      if (instanceClass == NullInstance.class) {  // null
+        declaredClass = ((NullInstance)instance).declaredClass;
+        instance = null;
+      }
+    }
+
+    if (objectWritable != null) {                 // store values
+      objectWritable.declaredClass = declaredClass;
+      objectWritable.instance = instance;
+    }
+
+    return instance;
+      
+  }
+
+  /**
+   * Find and load the class with given name <tt>className</tt> by first finding
+   * it in the specified <tt>conf</tt>. If the specified <tt>conf</tt> is null,
+   * try load it directly.
+   */
+  public static Class<?> loadClass(Configuration conf, String className) {
+    Class<?> declaredClass = null;
+    try {
+      if (conf != null)
+        declaredClass = conf.getClassByName(className);
+      else
+        declaredClass = Class.forName(className);
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException("readObject can't find class " + className,
+          e);
+    }
+    return declaredClass;
+  }
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  public Configuration getConf() {
+    return this.conf;
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/io/OutputBuffer.java b/src/java/org/apache/hadoop/io/OutputBuffer.java
new file mode 100644
index 00000000000..943cb52dbce
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/OutputBuffer.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** A reusable {@link OutputStream} implementation that writes to an in-memory
+ * buffer.
+ *
+ * <p>This saves memory over creating a new OutputStream and
+ * ByteArrayOutputStream each time data is written.
+ *
+ * <p>Typical usage is something like the following:<pre>
+ *
+ * OutputBuffer buffer = new OutputBuffer();
+ * while (... loop condition ...) {
+ *   buffer.reset();
+ *   ... write buffer using OutputStream methods ...
+ *   byte[] data = buffer.getData();
+ *   int dataLength = buffer.getLength();
+ *   ... write data to its ultimate destination ...
+ * }
+ * </pre>
+ * @see DataOutputBuffer
+ * @see InputBuffer
+ */
+public class OutputBuffer extends FilterOutputStream {
+
+  private static class Buffer extends ByteArrayOutputStream {
+    public byte[] getData() { return buf; }
+    public int getLength() { return count; }
+    public void reset() { count = 0; }
+
+    public void write(InputStream in, int len) throws IOException {
+      int newcount = count + len;
+      if (newcount > buf.length) {
+        byte newbuf[] = new byte[Math.max(buf.length << 1, newcount)];
+        System.arraycopy(buf, 0, newbuf, 0, count);
+        buf = newbuf;
+      }
+      IOUtils.readFully(in, buf, count, len);
+      count = newcount;
+    }
+  }
+
+  private Buffer buffer;
+  
+  /** Constructs a new empty buffer. */
+  public OutputBuffer() {
+    this(new Buffer());
+  }
+  
+  private OutputBuffer(Buffer buffer) {
+    super(buffer);
+    this.buffer = buffer;
+  }
+
+  /** Returns the current contents of the buffer.
+   *  Data is only valid to {@link #getLength()}.
+   */
+  public byte[] getData() { return buffer.getData(); }
+
+  /** Returns the length of the valid data currently in the buffer. */
+  public int getLength() { return buffer.getLength(); }
+
+  /** Resets the buffer to empty. */
+  public OutputBuffer reset() {
+    buffer.reset();
+    return this;
+  }
+
+  /** Writes bytes from a InputStream directly into the buffer. */
+  public void write(InputStream in, int length) throws IOException {
+    buffer.write(in, length);
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/RawComparator.java b/src/java/org/apache/hadoop/io/RawComparator.java
new file mode 100644
index 00000000000..4efbb7acfc0
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/RawComparator.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.util.Comparator;
+
+import org.apache.hadoop.io.serializer.DeserializerComparator;
+
+/**
+ * <p>
+ * A {@link Comparator} that operates directly on byte representations of
+ * objects.
+ * </p>
+ * @param <T>
+ * @see DeserializerComparator
+ */
+public interface RawComparator<T> extends Comparator<T> {
+
+  public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2);
+
+}
diff --git a/src/java/org/apache/hadoop/io/SequenceFile.java b/src/java/org/apache/hadoop/io/SequenceFile.java
new file mode 100644
index 00000000000..c2ee9540978
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/SequenceFile.java
@@ -0,0 +1,3244 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+import java.util.*;
+import java.rmi.server.UID;
+import java.security.MessageDigest;
+import org.apache.commons.logging.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.io.compress.CodecPool;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionInputStream;
+import org.apache.hadoop.io.compress.CompressionOutputStream;
+import org.apache.hadoop.io.compress.Compressor;
+import org.apache.hadoop.io.compress.Decompressor;
+import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.io.compress.zlib.ZlibFactory;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.SerializationFactory;
+import org.apache.hadoop.io.serializer.Serializer;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.Progress;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.NativeCodeLoader;
+import org.apache.hadoop.util.MergeSort;
+import org.apache.hadoop.util.PriorityQueue;
+
+/** 
+ * <code>SequenceFile</code>s are flat files consisting of binary key/value 
+ * pairs.
+ * 
+ * <p><code>SequenceFile</code> provides {@link Writer}, {@link Reader} and
+ * {@link Sorter} classes for writing, reading and sorting respectively.</p>
+ * 
+ * There are three <code>SequenceFile</code> <code>Writer</code>s based on the 
+ * {@link CompressionType} used to compress key/value pairs:
+ * <ol>
+ *   <li>
+ *   <code>Writer</code> : Uncompressed records.
+ *   </li>
+ *   <li>
+ *   <code>RecordCompressWriter</code> : Record-compressed files, only compress 
+ *                                       values.
+ *   </li>
+ *   <li>
+ *   <code>BlockCompressWriter</code> : Block-compressed files, both keys & 
+ *                                      values are collected in 'blocks' 
+ *                                      separately and compressed. The size of 
+ *                                      the 'block' is configurable.
+ * </ol>
+ * 
+ * <p>The actual compression algorithm used to compress key and/or values can be
+ * specified by using the appropriate {@link CompressionCodec}.</p>
+ * 
+ * <p>The recommended way is to use the static <tt>createWriter</tt> methods
+ * provided by the <code>SequenceFile</code> to chose the preferred format.</p>
+ *
+ * <p>The {@link Reader} acts as the bridge and can read any of the above 
+ * <code>SequenceFile</code> formats.</p>
+ *
+ * <h4 id="Formats">SequenceFile Formats</h4>
+ * 
+ * <p>Essentially there are 3 different formats for <code>SequenceFile</code>s
+ * depending on the <code>CompressionType</code> specified. All of them share a
+ * <a href="#Header">common header</a> described below.
+ * 
+ * <h5 id="Header">SequenceFile Header</h5>
+ * <ul>
+ *   <li>
+ *   version - 3 bytes of magic header <b>SEQ</b>, followed by 1 byte of actual 
+ *             version number (e.g. SEQ4 or SEQ6)
+ *   </li>
+ *   <li>
+ *   keyClassName -key class
+ *   </li>
+ *   <li>
+ *   valueClassName - value class
+ *   </li>
+ *   <li>
+ *   compression - A boolean which specifies if compression is turned on for 
+ *                 keys/values in this file.
+ *   </li>
+ *   <li>
+ *   blockCompression - A boolean which specifies if block-compression is 
+ *                      turned on for keys/values in this file.
+ *   </li>
+ *   <li>
+ *   compression codec - <code>CompressionCodec</code> class which is used for  
+ *                       compression of keys and/or values (if compression is 
+ *                       enabled).
+ *   </li>
+ *   <li>
+ *   metadata - {@link Metadata} for this file.
+ *   </li>
+ *   <li>
+ *   sync - A sync marker to denote end of the header.
+ *   </li>
+ * </ul>
+ * 
+ * <h5 id="#UncompressedFormat">Uncompressed SequenceFile Format</h5>
+ * <ul>
+ * <li>
+ * <a href="#Header">Header</a>
+ * </li>
+ * <li>
+ * Record
+ *   <ul>
+ *     <li>Record length</li>
+ *     <li>Key length</li>
+ *     <li>Key</li>
+ *     <li>Value</li>
+ *   </ul>
+ * </li>
+ * <li>
+ * A sync-marker every few <code>100</code> bytes or so.
+ * </li>
+ * </ul>
+ *
+ * <h5 id="#RecordCompressedFormat">Record-Compressed SequenceFile Format</h5>
+ * <ul>
+ * <li>
+ * <a href="#Header">Header</a>
+ * </li>
+ * <li>
+ * Record
+ *   <ul>
+ *     <li>Record length</li>
+ *     <li>Key length</li>
+ *     <li>Key</li>
+ *     <li><i>Compressed</i> Value</li>
+ *   </ul>
+ * </li>
+ * <li>
+ * A sync-marker every few <code>100</code> bytes or so.
+ * </li>
+ * </ul>
+ * 
+ * <h5 id="#BlockCompressedFormat">Block-Compressed SequenceFile Format</h5>
+ * <ul>
+ * <li>
+ * <a href="#Header">Header</a>
+ * </li>
+ * <li>
+ * Record <i>Block</i>
+ *   <ul>
+ *     <li>Compressed key-lengths block-size</li>
+ *     <li>Compressed key-lengths block</li>
+ *     <li>Compressed keys block-size</li>
+ *     <li>Compressed keys block</li>
+ *     <li>Compressed value-lengths block-size</li>
+ *     <li>Compressed value-lengths block</li>
+ *     <li>Compressed values block-size</li>
+ *     <li>Compressed values block</li>
+ *   </ul>
+ * </li>
+ * <li>
+ * A sync-marker every few <code>100</code> bytes or so.
+ * </li>
+ * </ul>
+ * 
+ * <p>The compressed blocks of key lengths and value lengths consist of the 
+ * actual lengths of individual keys/values encoded in ZeroCompressedInteger 
+ * format.</p>
+ * 
+ * @see CompressionCodec
+ */
+public class SequenceFile {
+  private static final Log LOG = LogFactory.getLog(SequenceFile.class);
+
+  private SequenceFile() {}                         // no public ctor
+
+  private static final byte BLOCK_COMPRESS_VERSION = (byte)4;
+  private static final byte CUSTOM_COMPRESS_VERSION = (byte)5;
+  private static final byte VERSION_WITH_METADATA = (byte)6;
+  private static byte[] VERSION = new byte[] {
+    (byte)'S', (byte)'E', (byte)'Q', VERSION_WITH_METADATA
+  };
+
+  private static final int SYNC_ESCAPE = -1;      // "length" of sync entries
+  private static final int SYNC_HASH_SIZE = 16;   // number of bytes in hash 
+  private static final int SYNC_SIZE = 4+SYNC_HASH_SIZE; // escape + hash
+
+  /** The number of bytes between sync points.*/
+  public static final int SYNC_INTERVAL = 100*SYNC_SIZE; 
+
+  /** 
+   * The compression type used to compress key/value pairs in the 
+   * {@link SequenceFile}.
+   * 
+   * @see SequenceFile.Writer
+   */
+  public static enum CompressionType {
+    /** Do not compress records. */
+    NONE, 
+    /** Compress values only, each separately. */
+    RECORD,
+    /** Compress sequences of records together in blocks. */
+    BLOCK
+  }
+
+  /**
+   * Get the compression type for the reduce outputs
+   * @param job the job config to look in
+   * @return the kind of compression to use
+   * @deprecated Use 
+   *             {@link org.apache.hadoop.mapred.SequenceFileOutputFormat#getOutputCompressionType(org.apache.hadoop.mapred.JobConf)} 
+   *             to get {@link CompressionType} for job-outputs.
+   */
+  @Deprecated
+  static public CompressionType getCompressionType(Configuration job) {
+    String name = job.get("io.seqfile.compression.type");
+    return name == null ? CompressionType.RECORD : 
+      CompressionType.valueOf(name);
+  }
+  
+  /**
+   * Set the compression type for sequence files.
+   * @param job the configuration to modify
+   * @param val the new compression type (none, block, record)
+   * @deprecated Use the one of the many SequenceFile.createWriter methods to specify
+   *             the {@link CompressionType} while creating the {@link SequenceFile} or
+   *             {@link org.apache.hadoop.mapred.SequenceFileOutputFormat#setOutputCompressionType(org.apache.hadoop.mapred.JobConf, org.apache.hadoop.io.SequenceFile.CompressionType)}
+   *             to specify the {@link CompressionType} for job-outputs. 
+   * or 
+   */
+  @Deprecated
+  static public void setCompressionType(Configuration job, 
+                                        CompressionType val) {
+    job.set("io.seqfile.compression.type", val.toString());
+  }
+    
+  /**
+   * Construct the preferred type of SequenceFile Writer.
+   * @param fs The configured filesystem. 
+   * @param conf The configuration.
+   * @param name The name of the file. 
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  public static Writer 
+    createWriter(FileSystem fs, Configuration conf, Path name, 
+                 Class keyClass, Class valClass) 
+    throws IOException {
+    return createWriter(fs, conf, name, keyClass, valClass,
+                        getCompressionType(conf));
+  }
+  
+  /**
+   * Construct the preferred type of SequenceFile Writer.
+   * @param fs The configured filesystem. 
+   * @param conf The configuration.
+   * @param name The name of the file. 
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @param compressionType The compression type.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  public static Writer 
+    createWriter(FileSystem fs, Configuration conf, Path name, 
+                 Class keyClass, Class valClass, CompressionType compressionType) 
+    throws IOException {
+    return createWriter(fs, conf, name, keyClass, valClass,
+            fs.getConf().getInt("io.file.buffer.size", 4096),
+            fs.getDefaultReplication(), fs.getDefaultBlockSize(),
+            compressionType, new DefaultCodec(), null, new Metadata());
+  }
+  
+  /**
+   * Construct the preferred type of SequenceFile Writer.
+   * @param fs The configured filesystem. 
+   * @param conf The configuration.
+   * @param name The name of the file. 
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @param compressionType The compression type.
+   * @param progress The Progressable object to track progress.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  public static Writer
+    createWriter(FileSystem fs, Configuration conf, Path name, 
+                 Class keyClass, Class valClass, CompressionType compressionType,
+                 Progressable progress) throws IOException {
+    return createWriter(fs, conf, name, keyClass, valClass,
+            fs.getConf().getInt("io.file.buffer.size", 4096),
+            fs.getDefaultReplication(), fs.getDefaultBlockSize(),
+            compressionType, new DefaultCodec(), progress, new Metadata());
+  }
+
+  /**
+   * Construct the preferred type of SequenceFile Writer.
+   * @param fs The configured filesystem. 
+   * @param conf The configuration.
+   * @param name The name of the file. 
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @param compressionType The compression type.
+   * @param codec The compression codec.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  public static Writer 
+    createWriter(FileSystem fs, Configuration conf, Path name, 
+                 Class keyClass, Class valClass, 
+                 CompressionType compressionType, CompressionCodec codec) 
+    throws IOException {
+    return createWriter(fs, conf, name, keyClass, valClass,
+            fs.getConf().getInt("io.file.buffer.size", 4096),
+            fs.getDefaultReplication(), fs.getDefaultBlockSize(),
+            compressionType, codec, null, new Metadata());
+  }
+  
+  /**
+   * Construct the preferred type of SequenceFile Writer.
+   * @param fs The configured filesystem. 
+   * @param conf The configuration.
+   * @param name The name of the file. 
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @param compressionType The compression type.
+   * @param codec The compression codec.
+   * @param progress The Progressable object to track progress.
+   * @param metadata The metadata of the file.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  public static Writer
+    createWriter(FileSystem fs, Configuration conf, Path name, 
+                 Class keyClass, Class valClass, 
+                 CompressionType compressionType, CompressionCodec codec,
+                 Progressable progress, Metadata metadata) throws IOException {
+    return createWriter(fs, conf, name, keyClass, valClass,
+            fs.getConf().getInt("io.file.buffer.size", 4096),
+            fs.getDefaultReplication(), fs.getDefaultBlockSize(),
+            compressionType, codec, progress, metadata);
+  }
+
+  /**
+   * Construct the preferred type of SequenceFile Writer.
+   * @param fs The configured filesystem.
+   * @param conf The configuration.
+   * @param name The name of the file.
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @param bufferSize buffer size for the underlaying outputstream.
+   * @param replication replication factor for the file.
+   * @param blockSize block size for the file.
+   * @param compressionType The compression type.
+   * @param codec The compression codec.
+   * @param progress The Progressable object to track progress.
+   * @param metadata The metadata of the file.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  public static Writer
+    createWriter(FileSystem fs, Configuration conf, Path name,
+                 Class keyClass, Class valClass, int bufferSize,
+                 short replication, long blockSize,
+                 CompressionType compressionType, CompressionCodec codec,
+                 Progressable progress, Metadata metadata) throws IOException {
+    if ((codec instanceof GzipCodec) &&
+        !NativeCodeLoader.isNativeCodeLoaded() &&
+        !ZlibFactory.isNativeZlibLoaded(conf)) {
+      throw new IllegalArgumentException("SequenceFile doesn't work with " +
+                                         "GzipCodec without native-hadoop code!");
+    }
+
+    Writer writer = null;
+
+    if (compressionType == CompressionType.NONE) {
+      writer = new Writer(fs, conf, name, keyClass, valClass,
+                          bufferSize, replication, blockSize,
+                          progress, metadata);
+    } else if (compressionType == CompressionType.RECORD) {
+      writer = new RecordCompressWriter(fs, conf, name, keyClass, valClass,
+                                        bufferSize, replication, blockSize,
+                                        codec, progress, metadata);
+    } else if (compressionType == CompressionType.BLOCK){
+      writer = new BlockCompressWriter(fs, conf, name, keyClass, valClass,
+                                       bufferSize, replication, blockSize,
+                                       codec, progress, metadata);
+    }
+
+    return writer;
+  }
+
+  /**
+   * Construct the preferred type of SequenceFile Writer.
+   * @param fs The configured filesystem. 
+   * @param conf The configuration.
+   * @param name The name of the file. 
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @param compressionType The compression type.
+   * @param codec The compression codec.
+   * @param progress The Progressable object to track progress.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  public static Writer
+    createWriter(FileSystem fs, Configuration conf, Path name, 
+                 Class keyClass, Class valClass, 
+                 CompressionType compressionType, CompressionCodec codec,
+                 Progressable progress) throws IOException {
+    Writer writer = createWriter(fs, conf, name, keyClass, valClass, 
+                                 compressionType, codec, progress, new Metadata());
+    return writer;
+  }
+
+  /**
+   * Construct the preferred type of 'raw' SequenceFile Writer.
+   * @param out The stream on top which the writer is to be constructed.
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @param compress Compress data?
+   * @param blockCompress Compress blocks?
+   * @param metadata The metadata of the file.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  private static Writer
+    createWriter(Configuration conf, FSDataOutputStream out, 
+                 Class keyClass, Class valClass, boolean compress, boolean blockCompress,
+                 CompressionCodec codec, Metadata metadata)
+    throws IOException {
+    if (codec != null && (codec instanceof GzipCodec) && 
+        !NativeCodeLoader.isNativeCodeLoaded() && 
+        !ZlibFactory.isNativeZlibLoaded(conf)) {
+      throw new IllegalArgumentException("SequenceFile doesn't work with " +
+                                         "GzipCodec without native-hadoop code!");
+    }
+
+    Writer writer = null;
+
+    if (!compress) {
+      writer = new Writer(conf, out, keyClass, valClass, metadata);
+    } else if (compress && !blockCompress) {
+      writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata);
+    } else {
+      writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata);
+    }
+    
+    return writer;
+  }
+
+  /**
+   * Construct the preferred type of 'raw' SequenceFile Writer.
+   * @param fs The configured filesystem. 
+   * @param conf The configuration.
+   * @param file The name of the file. 
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @param compress Compress data?
+   * @param blockCompress Compress blocks?
+   * @param codec The compression codec.
+   * @param progress
+   * @param metadata The metadata of the file.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  private static Writer
+  createWriter(FileSystem fs, Configuration conf, Path file, 
+               Class keyClass, Class valClass, 
+               boolean compress, boolean blockCompress,
+               CompressionCodec codec, Progressable progress, Metadata metadata)
+  throws IOException {
+  if (codec != null && (codec instanceof GzipCodec) && 
+      !NativeCodeLoader.isNativeCodeLoaded() && 
+      !ZlibFactory.isNativeZlibLoaded(conf)) {
+    throw new IllegalArgumentException("SequenceFile doesn't work with " +
+                                       "GzipCodec without native-hadoop code!");
+  }
+
+  Writer writer = null;
+
+  if (!compress) {
+    writer = new Writer(fs, conf, file, keyClass, valClass, progress, metadata);
+  } else if (compress && !blockCompress) {
+    writer = new RecordCompressWriter(fs, conf, file, keyClass, valClass, 
+                                      codec, progress, metadata);
+  } else {
+    writer = new BlockCompressWriter(fs, conf, file, keyClass, valClass, 
+                                     codec, progress, metadata);
+  }
+  
+  return writer;
+}
+
+  /**
+   * Construct the preferred type of 'raw' SequenceFile Writer.
+   * @param conf The configuration.
+   * @param out The stream on top which the writer is to be constructed.
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @param compressionType The compression type.
+   * @param codec The compression codec.
+   * @param metadata The metadata of the file.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  public static Writer
+    createWriter(Configuration conf, FSDataOutputStream out, 
+                 Class keyClass, Class valClass, CompressionType compressionType,
+                 CompressionCodec codec, Metadata metadata)
+    throws IOException {
+    if ((codec instanceof GzipCodec) && 
+        !NativeCodeLoader.isNativeCodeLoaded() && 
+        !ZlibFactory.isNativeZlibLoaded(conf)) {
+      throw new IllegalArgumentException("SequenceFile doesn't work with " +
+                                         "GzipCodec without native-hadoop code!");
+    }
+
+    Writer writer = null;
+
+    if (compressionType == CompressionType.NONE) {
+      writer = new Writer(conf, out, keyClass, valClass, metadata);
+    } else if (compressionType == CompressionType.RECORD) {
+      writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata);
+    } else if (compressionType == CompressionType.BLOCK){
+      writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata);
+    }
+    
+    return writer;
+  }
+  
+  /**
+   * Construct the preferred type of 'raw' SequenceFile Writer.
+   * @param conf The configuration.
+   * @param out The stream on top which the writer is to be constructed.
+   * @param keyClass The 'key' type.
+   * @param valClass The 'value' type.
+   * @param compressionType The compression type.
+   * @param codec The compression codec.
+   * @return Returns the handle to the constructed SequenceFile Writer.
+   * @throws IOException
+   */
+  public static Writer
+    createWriter(Configuration conf, FSDataOutputStream out, 
+                 Class keyClass, Class valClass, CompressionType compressionType,
+                 CompressionCodec codec)
+    throws IOException {
+    Writer writer = createWriter(conf, out, keyClass, valClass, compressionType,
+                                 codec, new Metadata());
+    return writer;
+  }
+  
+
+  /** The interface to 'raw' values of SequenceFiles. */
+  public static interface ValueBytes {
+
+    /** Writes the uncompressed bytes to the outStream.
+     * @param outStream : Stream to write uncompressed bytes into.
+     * @throws IOException
+     */
+    public void writeUncompressedBytes(DataOutputStream outStream)
+      throws IOException;
+
+    /** Write compressed bytes to outStream. 
+     * Note: that it will NOT compress the bytes if they are not compressed.
+     * @param outStream : Stream to write compressed bytes into.
+     */
+    public void writeCompressedBytes(DataOutputStream outStream) 
+      throws IllegalArgumentException, IOException;
+
+    /**
+     * Size of stored data.
+     */
+    public int getSize();
+  }
+  
+  private static class UncompressedBytes implements ValueBytes {
+    private int dataSize;
+    private byte[] data;
+    
+    private UncompressedBytes() {
+      data = null;
+      dataSize = 0;
+    }
+    
+    private void reset(DataInputStream in, int length) throws IOException {
+      if (data == null) {
+        data = new byte[length];
+      } else if (length > data.length) {
+        data = new byte[Math.max(length, data.length * 2)];
+      }
+      dataSize = -1;
+      in.readFully(data, 0, length);
+      dataSize = length;
+    }
+    
+    public int getSize() {
+      return dataSize;
+    }
+    
+    public void writeUncompressedBytes(DataOutputStream outStream)
+      throws IOException {
+      outStream.write(data, 0, dataSize);
+    }
+
+    public void writeCompressedBytes(DataOutputStream outStream) 
+      throws IllegalArgumentException, IOException {
+      throw 
+        new IllegalArgumentException("UncompressedBytes cannot be compressed!");
+    }
+
+  } // UncompressedBytes
+  
+  private static class CompressedBytes implements ValueBytes {
+    private int dataSize;
+    private byte[] data;
+    DataInputBuffer rawData = null;
+    CompressionCodec codec = null;
+    CompressionInputStream decompressedStream = null;
+
+    private CompressedBytes(CompressionCodec codec) {
+      data = null;
+      dataSize = 0;
+      this.codec = codec;
+    }
+
+    private void reset(DataInputStream in, int length) throws IOException {
+      if (data == null) {
+        data = new byte[length];
+      } else if (length > data.length) {
+        data = new byte[Math.max(length, data.length * 2)];
+      } 
+      dataSize = -1;
+      in.readFully(data, 0, length);
+      dataSize = length;
+    }
+    
+    public int getSize() {
+      return dataSize;
+    }
+    
+    public void writeUncompressedBytes(DataOutputStream outStream)
+      throws IOException {
+      if (decompressedStream == null) {
+        rawData = new DataInputBuffer();
+        decompressedStream = codec.createInputStream(rawData);
+      } else {
+        decompressedStream.resetState();
+      }
+      rawData.reset(data, 0, dataSize);
+
+      byte[] buffer = new byte[8192];
+      int bytesRead = 0;
+      while ((bytesRead = decompressedStream.read(buffer, 0, 8192)) != -1) {
+        outStream.write(buffer, 0, bytesRead);
+      }
+    }
+
+    public void writeCompressedBytes(DataOutputStream outStream) 
+      throws IllegalArgumentException, IOException {
+      outStream.write(data, 0, dataSize);
+    }
+
+  } // CompressedBytes
+  
+  /**
+   * The class encapsulating with the metadata of a file.
+   * The metadata of a file is a list of attribute name/value
+   * pairs of Text type.
+   *
+   */
+  public static class Metadata implements Writable {
+
+    private TreeMap<Text, Text> theMetadata;
+    
+    public Metadata() {
+      this(new TreeMap<Text, Text>());
+    }
+    
+    public Metadata(TreeMap<Text, Text> arg) {
+      if (arg == null) {
+        this.theMetadata = new TreeMap<Text, Text>();
+      } else {
+        this.theMetadata = arg;
+      }
+    }
+    
+    public Text get(Text name) {
+      return this.theMetadata.get(name);
+    }
+    
+    public void set(Text name, Text value) {
+      this.theMetadata.put(name, value);
+    }
+    
+    public TreeMap<Text, Text> getMetadata() {
+      return new TreeMap<Text, Text>(this.theMetadata);
+    }
+    
+    public void write(DataOutput out) throws IOException {
+      out.writeInt(this.theMetadata.size());
+      Iterator<Map.Entry<Text, Text>> iter =
+        this.theMetadata.entrySet().iterator();
+      while (iter.hasNext()) {
+        Map.Entry<Text, Text> en = iter.next();
+        en.getKey().write(out);
+        en.getValue().write(out);
+      }
+    }
+
+    public void readFields(DataInput in) throws IOException {
+      int sz = in.readInt();
+      if (sz < 0) throw new IOException("Invalid size: " + sz + " for file metadata object");
+      this.theMetadata = new TreeMap<Text, Text>();
+      for (int i = 0; i < sz; i++) {
+        Text key = new Text();
+        Text val = new Text();
+        key.readFields(in);
+        val.readFields(in);
+        this.theMetadata.put(key, val);
+      }    
+    }
+
+    public boolean equals(Object other) {
+      if (other == null) {
+        return false;
+      }
+      if (other.getClass() != this.getClass()) {
+        return false;
+      } else {
+        return equals((Metadata)other);
+      }
+    }
+    
+    public boolean equals(Metadata other) {
+      if (other == null) return false;
+      if (this.theMetadata.size() != other.theMetadata.size()) {
+        return false;
+      }
+      Iterator<Map.Entry<Text, Text>> iter1 =
+        this.theMetadata.entrySet().iterator();
+      Iterator<Map.Entry<Text, Text>> iter2 =
+        other.theMetadata.entrySet().iterator();
+      while (iter1.hasNext() && iter2.hasNext()) {
+        Map.Entry<Text, Text> en1 = iter1.next();
+        Map.Entry<Text, Text> en2 = iter2.next();
+        if (!en1.getKey().equals(en2.getKey())) {
+          return false;
+        }
+        if (!en1.getValue().equals(en2.getValue())) {
+          return false;
+        }
+      }
+      if (iter1.hasNext() || iter2.hasNext()) {
+        return false;
+      }
+      return true;
+    }
+
+    public int hashCode() {
+      assert false : "hashCode not designed";
+      return 42; // any arbitrary constant will do 
+    }
+    
+    public String toString() {
+      StringBuffer sb = new StringBuffer();
+      sb.append("size: ").append(this.theMetadata.size()).append("\n");
+      Iterator<Map.Entry<Text, Text>> iter =
+        this.theMetadata.entrySet().iterator();
+      while (iter.hasNext()) {
+        Map.Entry<Text, Text> en = iter.next();
+        sb.append("\t").append(en.getKey().toString()).append("\t").append(en.getValue().toString());
+        sb.append("\n");
+      }
+      return sb.toString();
+    }
+  }
+  
+  /** Write key/value pairs to a sequence-format file. */
+  public static class Writer implements java.io.Closeable {
+    Configuration conf;
+    FSDataOutputStream out;
+    boolean ownOutputStream = true;
+    DataOutputBuffer buffer = new DataOutputBuffer();
+
+    Class keyClass;
+    Class valClass;
+
+    private boolean compress;
+    CompressionCodec codec = null;
+    CompressionOutputStream deflateFilter = null;
+    DataOutputStream deflateOut = null;
+    Metadata metadata = null;
+    Compressor compressor = null;
+    
+    protected Serializer keySerializer;
+    protected Serializer uncompressedValSerializer;
+    protected Serializer compressedValSerializer;
+    
+    // Insert a globally unique 16-byte value every few entries, so that one
+    // can seek into the middle of a file and then synchronize with record
+    // starts and ends by scanning for this value.
+    long lastSyncPos;                     // position of last sync
+    byte[] sync;                          // 16 random bytes
+    {
+      try {                                       
+        MessageDigest digester = MessageDigest.getInstance("MD5");
+        long time = System.currentTimeMillis();
+        digester.update((new UID()+"@"+time).getBytes());
+        sync = digester.digest();
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    /** Implicit constructor: needed for the period of transition!*/
+    Writer()
+    {}
+    
+    /** Create the named file. */
+    public Writer(FileSystem fs, Configuration conf, Path name, 
+                  Class keyClass, Class valClass)
+      throws IOException {
+      this(fs, conf, name, keyClass, valClass, null, new Metadata());
+    }
+    
+    /** Create the named file with write-progress reporter. */
+    public Writer(FileSystem fs, Configuration conf, Path name, 
+                  Class keyClass, Class valClass,
+                  Progressable progress, Metadata metadata)
+      throws IOException {
+      this(fs, conf, name, keyClass, valClass,
+           fs.getConf().getInt("io.file.buffer.size", 4096),
+           fs.getDefaultReplication(), fs.getDefaultBlockSize(),
+           progress, metadata);
+    }
+    
+    /** Create the named file with write-progress reporter. */
+    public Writer(FileSystem fs, Configuration conf, Path name,
+                  Class keyClass, Class valClass,
+                  int bufferSize, short replication, long blockSize,
+                  Progressable progress, Metadata metadata)
+      throws IOException {
+      init(name, conf,
+           fs.create(name, true, bufferSize, replication, blockSize, progress),
+              keyClass, valClass, false, null, metadata);
+      initializeFileHeader();
+      writeFileHeader();
+      finalizeFileHeader();
+    }
+
+    /** Write to an arbitrary stream using a specified buffer size. */
+    private Writer(Configuration conf, FSDataOutputStream out, 
+                   Class keyClass, Class valClass, Metadata metadata)
+      throws IOException {
+      this.ownOutputStream = false;
+      init(null, conf, out, keyClass, valClass, false, null, metadata);
+      
+      initializeFileHeader();
+      writeFileHeader();
+      finalizeFileHeader();
+    }
+
+    /** Write the initial part of file header. */
+    void initializeFileHeader() 
+      throws IOException{
+      out.write(VERSION);
+    }
+
+    /** Write the final part of file header. */
+    void finalizeFileHeader() 
+      throws IOException{
+      out.write(sync);                       // write the sync bytes
+      out.flush();                           // flush header
+    }
+    
+    boolean isCompressed() { return compress; }
+    boolean isBlockCompressed() { return false; }
+    
+    /** Write and flush the file header. */
+    void writeFileHeader() 
+      throws IOException {
+      Text.writeString(out, keyClass.getName());
+      Text.writeString(out, valClass.getName());
+      
+      out.writeBoolean(this.isCompressed());
+      out.writeBoolean(this.isBlockCompressed());
+      
+      if (this.isCompressed()) {
+        Text.writeString(out, (codec.getClass()).getName());
+      }
+      this.metadata.write(out);
+    }
+    
+    /** Initialize. */
+    @SuppressWarnings("unchecked")
+    void init(Path name, Configuration conf, FSDataOutputStream out,
+              Class keyClass, Class valClass,
+              boolean compress, CompressionCodec codec, Metadata metadata) 
+      throws IOException {
+      this.conf = conf;
+      this.out = out;
+      this.keyClass = keyClass;
+      this.valClass = valClass;
+      this.compress = compress;
+      this.codec = codec;
+      this.metadata = metadata;
+      SerializationFactory serializationFactory = new SerializationFactory(conf);
+      this.keySerializer = serializationFactory.getSerializer(keyClass);
+      this.keySerializer.open(buffer);
+      this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
+      this.uncompressedValSerializer.open(buffer);
+      if (this.codec != null) {
+        ReflectionUtils.setConf(this.codec, this.conf);
+        this.compressor = CodecPool.getCompressor(this.codec);
+        this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
+        this.deflateOut = 
+          new DataOutputStream(new BufferedOutputStream(deflateFilter));
+        this.compressedValSerializer = serializationFactory.getSerializer(valClass);
+        this.compressedValSerializer.open(deflateOut);
+      }
+    }
+    
+    /** Returns the class of keys in this file. */
+    public Class getKeyClass() { return keyClass; }
+
+    /** Returns the class of values in this file. */
+    public Class getValueClass() { return valClass; }
+
+    /** Returns the compression codec of data in this file. */
+    public CompressionCodec getCompressionCodec() { return codec; }
+    
+    /** create a sync point */
+    public void sync() throws IOException {
+      if (sync != null && lastSyncPos != out.getPos()) {
+        out.writeInt(SYNC_ESCAPE);                // mark the start of the sync
+        out.write(sync);                          // write sync
+        lastSyncPos = out.getPos();               // update lastSyncPos
+      }
+    }
+
+    /** Returns the configuration of this file. */
+    Configuration getConf() { return conf; }
+    
+    /** Close the file. */
+    public synchronized void close() throws IOException {
+      keySerializer.close();
+      uncompressedValSerializer.close();
+      if (compressedValSerializer != null) {
+        compressedValSerializer.close();
+      }
+
+      CodecPool.returnCompressor(compressor);
+      compressor = null;
+      
+      if (out != null) {
+        
+        // Close the underlying stream iff we own it...
+        if (ownOutputStream) {
+          out.close();
+        } else {
+          out.flush();
+        }
+        out = null;
+      }
+    }
+
+    synchronized void checkAndWriteSync() throws IOException {
+      if (sync != null &&
+          out.getPos() >= lastSyncPos+SYNC_INTERVAL) { // time to emit sync
+        sync();
+      }
+    }
+
+    /** Append a key/value pair. */
+    public synchronized void append(Writable key, Writable val)
+      throws IOException {
+      append((Object) key, (Object) val);
+    }
+
+    /** Append a key/value pair. */
+    @SuppressWarnings("unchecked")
+    public synchronized void append(Object key, Object val)
+      throws IOException {
+      if (key.getClass() != keyClass)
+        throw new IOException("wrong key class: "+key.getClass().getName()
+                              +" is not "+keyClass);
+      if (val.getClass() != valClass)
+        throw new IOException("wrong value class: "+val.getClass().getName()
+                              +" is not "+valClass);
+
+      buffer.reset();
+
+      // Append the 'key'
+      keySerializer.serialize(key);
+      int keyLength = buffer.getLength();
+      if (keyLength < 0)
+        throw new IOException("negative length keys not allowed: " + key);
+
+      // Append the 'value'
+      if (compress) {
+        deflateFilter.resetState();
+        compressedValSerializer.serialize(val);
+        deflateOut.flush();
+        deflateFilter.finish();
+      } else {
+        uncompressedValSerializer.serialize(val);
+      }
+
+      // Write the record out
+      checkAndWriteSync();                                // sync
+      out.writeInt(buffer.getLength());                   // total record length
+      out.writeInt(keyLength);                            // key portion length
+      out.write(buffer.getData(), 0, buffer.getLength()); // data
+    }
+
+    public synchronized void appendRaw(byte[] keyData, int keyOffset,
+        int keyLength, ValueBytes val) throws IOException {
+      if (keyLength < 0)
+        throw new IOException("negative length keys not allowed: " + keyLength);
+
+      int valLength = val.getSize();
+
+      checkAndWriteSync();
+      
+      out.writeInt(keyLength+valLength);          // total record length
+      out.writeInt(keyLength);                    // key portion length
+      out.write(keyData, keyOffset, keyLength);   // key
+      val.writeUncompressedBytes(out);            // value
+    }
+
+    /** Returns the current length of the output file.
+     *
+     * <p>This always returns a synchronized position.  In other words,
+     * immediately after calling {@link SequenceFile.Reader#seek(long)} with a position
+     * returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called.  However
+     * the key may be earlier in the file than key last written when this
+     * method was called (e.g., with block-compression, it may be the first key
+     * in the block that was being written when this method was called).
+     */
+    public synchronized long getLength() throws IOException {
+      return out.getPos();
+    }
+
+  } // class Writer
+
+  /** Write key/compressed-value pairs to a sequence-format file. */
+  static class RecordCompressWriter extends Writer {
+    
+    /** Create the named file. */
+    public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, 
+                                Class keyClass, Class valClass, CompressionCodec codec) 
+      throws IOException {
+      this(conf, fs.create(name), keyClass, valClass, codec, new Metadata());
+    }
+    
+    /** Create the named file with write-progress reporter. */
+    public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, 
+                                Class keyClass, Class valClass, CompressionCodec codec,
+                                Progressable progress, Metadata metadata)
+      throws IOException {
+      this(fs, conf, name, keyClass, valClass,
+           fs.getConf().getInt("io.file.buffer.size", 4096),
+           fs.getDefaultReplication(), fs.getDefaultBlockSize(), codec,
+           progress, metadata);
+    }
+
+    /** Create the named file with write-progress reporter. */
+    public RecordCompressWriter(FileSystem fs, Configuration conf, Path name,
+                                Class keyClass, Class valClass,
+                                int bufferSize, short replication, long blockSize,
+                                CompressionCodec codec,
+                                Progressable progress, Metadata metadata)
+      throws IOException {
+      super.init(name, conf,
+                 fs.create(name, true, bufferSize, replication, blockSize, progress),
+                 keyClass, valClass, true, codec, metadata);
+
+      initializeFileHeader();
+      writeFileHeader();
+      finalizeFileHeader();
+    }
+
+    /** Create the named file with write-progress reporter. */
+    public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, 
+                                Class keyClass, Class valClass, CompressionCodec codec,
+                                Progressable progress)
+      throws IOException {
+      this(fs, conf, name, keyClass, valClass, codec, progress, new Metadata());
+    }
+    
+    /** Write to an arbitrary stream using a specified buffer size. */
+    private RecordCompressWriter(Configuration conf, FSDataOutputStream out,
+                                 Class keyClass, Class valClass, CompressionCodec codec, Metadata metadata)
+      throws IOException {
+      this.ownOutputStream = false;
+      super.init(null, conf, out, keyClass, valClass, true, codec, metadata);
+      
+      initializeFileHeader();
+      writeFileHeader();
+      finalizeFileHeader();
+      
+    }
+    
+    boolean isCompressed() { return true; }
+    boolean isBlockCompressed() { return false; }
+
+    /** Append a key/value pair. */
+    @SuppressWarnings("unchecked")
+    public synchronized void append(Object key, Object val)
+      throws IOException {
+      if (key.getClass() != keyClass)
+        throw new IOException("wrong key class: "+key.getClass().getName()
+                              +" is not "+keyClass);
+      if (val.getClass() != valClass)
+        throw new IOException("wrong value class: "+val.getClass().getName()
+                              +" is not "+valClass);
+
+      buffer.reset();
+
+      // Append the 'key'
+      keySerializer.serialize(key);
+      int keyLength = buffer.getLength();
+      if (keyLength < 0)
+        throw new IOException("negative length keys not allowed: " + key);
+
+      // Compress 'value' and append it
+      deflateFilter.resetState();
+      compressedValSerializer.serialize(val);
+      deflateOut.flush();
+      deflateFilter.finish();
+
+      // Write the record out
+      checkAndWriteSync();                                // sync
+      out.writeInt(buffer.getLength());                   // total record length
+      out.writeInt(keyLength);                            // key portion length
+      out.write(buffer.getData(), 0, buffer.getLength()); // data
+    }
+
+    /** Append a key/value pair. */
+    public synchronized void appendRaw(byte[] keyData, int keyOffset,
+        int keyLength, ValueBytes val) throws IOException {
+
+      if (keyLength < 0)
+        throw new IOException("negative length keys not allowed: " + keyLength);
+
+      int valLength = val.getSize();
+      
+      checkAndWriteSync();                        // sync
+      out.writeInt(keyLength+valLength);          // total record length
+      out.writeInt(keyLength);                    // key portion length
+      out.write(keyData, keyOffset, keyLength);   // 'key' data
+      val.writeCompressedBytes(out);              // 'value' data
+    }
+    
+  } // RecordCompressionWriter
+
+  /** Write compressed key/value blocks to a sequence-format file. */
+  static class BlockCompressWriter extends Writer {
+    
+    private int noBufferedRecords = 0;
+    
+    private DataOutputBuffer keyLenBuffer = new DataOutputBuffer();
+    private DataOutputBuffer keyBuffer = new DataOutputBuffer();
+
+    private DataOutputBuffer valLenBuffer = new DataOutputBuffer();
+    private DataOutputBuffer valBuffer = new DataOutputBuffer();
+
+    private int compressionBlockSize;
+    
+    /** Create the named file. */
+    public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, 
+                               Class keyClass, Class valClass, CompressionCodec codec) 
+      throws IOException {
+      this(fs, conf, name, keyClass, valClass,
+           fs.getConf().getInt("io.file.buffer.size", 4096),
+           fs.getDefaultReplication(), fs.getDefaultBlockSize(), codec,
+           null, new Metadata());
+    }
+    
+    /** Create the named file with write-progress reporter. */
+    public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, 
+                               Class keyClass, Class valClass, CompressionCodec codec,
+                               Progressable progress, Metadata metadata)
+      throws IOException {
+      this(fs, conf, name, keyClass, valClass,
+           fs.getConf().getInt("io.file.buffer.size", 4096),
+           fs.getDefaultReplication(), fs.getDefaultBlockSize(), codec,
+           progress, metadata);
+    }
+
+    /** Create the named file with write-progress reporter. */
+    public BlockCompressWriter(FileSystem fs, Configuration conf, Path name,
+                               Class keyClass, Class valClass,
+                               int bufferSize, short replication, long blockSize,
+                               CompressionCodec codec,
+                               Progressable progress, Metadata metadata)
+      throws IOException {
+      super.init(name, conf,
+                 fs.create(name, true, bufferSize, replication, blockSize, progress),
+                 keyClass, valClass, true, codec, metadata);
+      init(conf.getInt("io.seqfile.compress.blocksize", 1000000));
+
+      initializeFileHeader();
+      writeFileHeader();
+      finalizeFileHeader();
+    }
+
+    /** Create the named file with write-progress reporter. */
+    public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, 
+                               Class keyClass, Class valClass, CompressionCodec codec,
+                               Progressable progress)
+      throws IOException {
+      this(fs, conf, name, keyClass, valClass, codec, progress, new Metadata());
+    }
+    
+    /** Write to an arbitrary stream using a specified buffer size. */
+    private BlockCompressWriter(Configuration conf, FSDataOutputStream out,
+                                Class keyClass, Class valClass, CompressionCodec codec, Metadata metadata)
+      throws IOException {
+      this.ownOutputStream = false;
+      super.init(null, conf, out, keyClass, valClass, true, codec, metadata);
+      init(1000000);
+      
+      initializeFileHeader();
+      writeFileHeader();
+      finalizeFileHeader();
+    }
+    
+    boolean isCompressed() { return true; }
+    boolean isBlockCompressed() { return true; }
+
+    /** Initialize */
+    void init(int compressionBlockSize) throws IOException {
+      this.compressionBlockSize = compressionBlockSize;
+      keySerializer.close();
+      keySerializer.open(keyBuffer);
+      uncompressedValSerializer.close();
+      uncompressedValSerializer.open(valBuffer);
+    }
+    
+    /** Workhorse to check and write out compressed data/lengths */
+    private synchronized 
+      void writeBuffer(DataOutputBuffer uncompressedDataBuffer) 
+      throws IOException {
+      deflateFilter.resetState();
+      buffer.reset();
+      deflateOut.write(uncompressedDataBuffer.getData(), 0, 
+                       uncompressedDataBuffer.getLength());
+      deflateOut.flush();
+      deflateFilter.finish();
+      
+      WritableUtils.writeVInt(out, buffer.getLength());
+      out.write(buffer.getData(), 0, buffer.getLength());
+    }
+    
+    /** Compress and flush contents to dfs */
+    public synchronized void sync() throws IOException {
+      if (noBufferedRecords > 0) {
+        super.sync();
+        
+        // No. of records
+        WritableUtils.writeVInt(out, noBufferedRecords);
+        
+        // Write 'keys' and lengths
+        writeBuffer(keyLenBuffer);
+        writeBuffer(keyBuffer);
+        
+        // Write 'values' and lengths
+        writeBuffer(valLenBuffer);
+        writeBuffer(valBuffer);
+        
+        // Flush the file-stream
+        out.flush();
+        
+        // Reset internal states
+        keyLenBuffer.reset();
+        keyBuffer.reset();
+        valLenBuffer.reset();
+        valBuffer.reset();
+        noBufferedRecords = 0;
+      }
+      
+    }
+    
+    /** Close the file. */
+    public synchronized void close() throws IOException {
+      if (out != null) {
+        sync();
+      }
+      super.close();
+    }
+
+    /** Append a key/value pair. */
+    @SuppressWarnings("unchecked")
+    public synchronized void append(Object key, Object val)
+      throws IOException {
+      if (key.getClass() != keyClass)
+        throw new IOException("wrong key class: "+key+" is not "+keyClass);
+      if (val.getClass() != valClass)
+        throw new IOException("wrong value class: "+val+" is not "+valClass);
+
+      // Save key/value into respective buffers 
+      int oldKeyLength = keyBuffer.getLength();
+      keySerializer.serialize(key);
+      int keyLength = keyBuffer.getLength() - oldKeyLength;
+      if (keyLength < 0)
+        throw new IOException("negative length keys not allowed: " + key);
+      WritableUtils.writeVInt(keyLenBuffer, keyLength);
+
+      int oldValLength = valBuffer.getLength();
+      uncompressedValSerializer.serialize(val);
+      int valLength = valBuffer.getLength() - oldValLength;
+      WritableUtils.writeVInt(valLenBuffer, valLength);
+      
+      // Added another key/value pair
+      ++noBufferedRecords;
+      
+      // Compress and flush?
+      int currentBlockSize = keyBuffer.getLength() + valBuffer.getLength();
+      if (currentBlockSize >= compressionBlockSize) {
+        sync();
+      }
+    }
+    
+    /** Append a key/value pair. */
+    public synchronized void appendRaw(byte[] keyData, int keyOffset,
+        int keyLength, ValueBytes val) throws IOException {
+      
+      if (keyLength < 0)
+        throw new IOException("negative length keys not allowed");
+
+      int valLength = val.getSize();
+      
+      // Save key/value data in relevant buffers
+      WritableUtils.writeVInt(keyLenBuffer, keyLength);
+      keyBuffer.write(keyData, keyOffset, keyLength);
+      WritableUtils.writeVInt(valLenBuffer, valLength);
+      val.writeUncompressedBytes(valBuffer);
+
+      // Added another key/value pair
+      ++noBufferedRecords;
+
+      // Compress and flush?
+      int currentBlockSize = keyBuffer.getLength() + valBuffer.getLength(); 
+      if (currentBlockSize >= compressionBlockSize) {
+        sync();
+      }
+    }
+  
+  } // BlockCompressionWriter
+  
+  /** Reads key/value pairs from a sequence-format file. */
+  public static class Reader implements java.io.Closeable {
+    private Path file;
+    private FSDataInputStream in;
+    private DataOutputBuffer outBuf = new DataOutputBuffer();
+
+    private byte version;
+
+    private String keyClassName;
+    private String valClassName;
+    private Class keyClass;
+    private Class valClass;
+
+    private CompressionCodec codec = null;
+    private Metadata metadata = null;
+    
+    private byte[] sync = new byte[SYNC_HASH_SIZE];
+    private byte[] syncCheck = new byte[SYNC_HASH_SIZE];
+    private boolean syncSeen;
+
+    private long end;
+    private int keyLength;
+    private int recordLength;
+
+    private boolean decompress;
+    private boolean blockCompressed;
+    
+    private Configuration conf;
+
+    private int noBufferedRecords = 0;
+    private boolean lazyDecompress = true;
+    private boolean valuesDecompressed = true;
+    
+    private int noBufferedKeys = 0;
+    private int noBufferedValues = 0;
+    
+    private DataInputBuffer keyLenBuffer = null;
+    private CompressionInputStream keyLenInFilter = null;
+    private DataInputStream keyLenIn = null;
+    private Decompressor keyLenDecompressor = null;
+    private DataInputBuffer keyBuffer = null;
+    private CompressionInputStream keyInFilter = null;
+    private DataInputStream keyIn = null;
+    private Decompressor keyDecompressor = null;
+
+    private DataInputBuffer valLenBuffer = null;
+    private CompressionInputStream valLenInFilter = null;
+    private DataInputStream valLenIn = null;
+    private Decompressor valLenDecompressor = null;
+    private DataInputBuffer valBuffer = null;
+    private CompressionInputStream valInFilter = null;
+    private DataInputStream valIn = null;
+    private Decompressor valDecompressor = null;
+    
+    private Deserializer keyDeserializer;
+    private Deserializer valDeserializer;
+
+    /** Open the named file. */
+    public Reader(FileSystem fs, Path file, Configuration conf)
+      throws IOException {
+      this(fs, file, conf.getInt("io.file.buffer.size", 4096), conf, false);
+    }
+
+    private Reader(FileSystem fs, Path file, int bufferSize,
+                   Configuration conf, boolean tempReader) throws IOException {
+      this(fs, file, bufferSize, 0, fs.getFileStatus(file).getLen(), conf, tempReader);
+    }
+    
+    private Reader(FileSystem fs, Path file, int bufferSize, long start,
+                   long length, Configuration conf, boolean tempReader) 
+    throws IOException {
+      this.file = file;
+      this.in = openFile(fs, file, bufferSize, length);
+      this.conf = conf;
+      boolean succeeded = false;
+      try {
+        seek(start);
+        this.end = in.getPos() + length;
+        init(tempReader);
+        succeeded = true;
+      } finally {
+        if (!succeeded) {
+          IOUtils.cleanup(LOG, in);
+        }
+      }
+    }
+
+    /**
+     * Override this method to specialize the type of
+     * {@link FSDataInputStream} returned.
+     */
+    protected FSDataInputStream openFile(FileSystem fs, Path file,
+        int bufferSize, long length) throws IOException {
+      return fs.open(file, bufferSize);
+    }
+    
+    /**
+     * Initialize the {@link Reader}
+     * @param tmpReader <code>true</code> if we are constructing a temporary
+     *                  reader {@link SequenceFile.Sorter.cloneFileAttributes}, 
+     *                  and hence do not initialize every component; 
+     *                  <code>false</code> otherwise.
+     * @throws IOException
+     */
+    private void init(boolean tempReader) throws IOException {
+      byte[] versionBlock = new byte[VERSION.length];
+      in.readFully(versionBlock);
+
+      if ((versionBlock[0] != VERSION[0]) ||
+          (versionBlock[1] != VERSION[1]) ||
+          (versionBlock[2] != VERSION[2]))
+        throw new IOException(file + " not a SequenceFile");
+
+      // Set 'version'
+      version = versionBlock[3];
+      if (version > VERSION[3])
+        throw new VersionMismatchException(VERSION[3], version);
+
+      if (version < BLOCK_COMPRESS_VERSION) {
+        UTF8 className = new UTF8();
+
+        className.readFields(in);
+        keyClassName = className.toString(); // key class name
+
+        className.readFields(in);
+        valClassName = className.toString(); // val class name
+      } else {
+        keyClassName = Text.readString(in);
+        valClassName = Text.readString(in);
+      }
+
+      if (version > 2) {                          // if version > 2
+        this.decompress = in.readBoolean();       // is compressed?
+      } else {
+        decompress = false;
+      }
+
+      if (version >= BLOCK_COMPRESS_VERSION) {    // if version >= 4
+        this.blockCompressed = in.readBoolean();  // is block-compressed?
+      } else {
+        blockCompressed = false;
+      }
+      
+      // if version >= 5
+      // setup the compression codec
+      if (decompress) {
+        if (version >= CUSTOM_COMPRESS_VERSION) {
+          String codecClassname = Text.readString(in);
+          try {
+            Class<? extends CompressionCodec> codecClass
+              = conf.getClassByName(codecClassname).asSubclass(CompressionCodec.class);
+            this.codec = ReflectionUtils.newInstance(codecClass, conf);
+          } catch (ClassNotFoundException cnfe) {
+            throw new IllegalArgumentException("Unknown codec: " + 
+                                               codecClassname, cnfe);
+          }
+        } else {
+          codec = new DefaultCodec();
+          ((Configurable)codec).setConf(conf);
+        }
+      }
+      
+      this.metadata = new Metadata();
+      if (version >= VERSION_WITH_METADATA) {    // if version >= 6
+        this.metadata.readFields(in);
+      }
+      
+      if (version > 1) {                          // if version > 1
+        in.readFully(sync);                       // read sync bytes
+      }
+      
+      // Initialize... *not* if this we are constructing a temporary Reader
+      if (!tempReader) {
+        valBuffer = new DataInputBuffer();
+        if (decompress) {
+          valDecompressor = CodecPool.getDecompressor(codec);
+          valInFilter = codec.createInputStream(valBuffer, valDecompressor);
+          valIn = new DataInputStream(valInFilter);
+        } else {
+          valIn = valBuffer;
+        }
+
+        if (blockCompressed) {
+          keyLenBuffer = new DataInputBuffer();
+          keyBuffer = new DataInputBuffer();
+          valLenBuffer = new DataInputBuffer();
+
+          keyLenDecompressor = CodecPool.getDecompressor(codec);
+          keyLenInFilter = codec.createInputStream(keyLenBuffer, 
+                                                   keyLenDecompressor);
+          keyLenIn = new DataInputStream(keyLenInFilter);
+
+          keyDecompressor = CodecPool.getDecompressor(codec);
+          keyInFilter = codec.createInputStream(keyBuffer, keyDecompressor);
+          keyIn = new DataInputStream(keyInFilter);
+
+          valLenDecompressor = CodecPool.getDecompressor(codec);
+          valLenInFilter = codec.createInputStream(valLenBuffer, 
+                                                   valLenDecompressor);
+          valLenIn = new DataInputStream(valLenInFilter);
+        }
+        
+        SerializationFactory serializationFactory =
+          new SerializationFactory(conf);
+        this.keyDeserializer =
+          getDeserializer(serializationFactory, getKeyClass());
+        if (!blockCompressed) {
+          this.keyDeserializer.open(valBuffer);
+        } else {
+          this.keyDeserializer.open(keyIn);
+        }
+        this.valDeserializer =
+          getDeserializer(serializationFactory, getValueClass());
+        this.valDeserializer.open(valIn);
+      }
+    }
+    
+    @SuppressWarnings("unchecked")
+    private Deserializer getDeserializer(SerializationFactory sf, Class c) {
+      return sf.getDeserializer(c);
+    }
+    
+    /** Close the file. */
+    public synchronized void close() throws IOException {
+      // Return the decompressors to the pool
+      CodecPool.returnDecompressor(keyLenDecompressor);
+      CodecPool.returnDecompressor(keyDecompressor);
+      CodecPool.returnDecompressor(valLenDecompressor);
+      CodecPool.returnDecompressor(valDecompressor);
+      keyLenDecompressor = keyDecompressor = null;
+      valLenDecompressor = valDecompressor = null;
+      
+      if (keyDeserializer != null) {
+    	keyDeserializer.close();
+      }
+      if (valDeserializer != null) {
+        valDeserializer.close();
+      }
+      
+      // Close the input-stream
+      in.close();
+    }
+
+    /** Returns the name of the key class. */
+    public String getKeyClassName() {
+      return keyClassName;
+    }
+
+    /** Returns the class of keys in this file. */
+    public synchronized Class<?> getKeyClass() {
+      if (null == keyClass) {
+        try {
+          keyClass = WritableName.getClass(getKeyClassName(), conf);
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+      return keyClass;
+    }
+
+    /** Returns the name of the value class. */
+    public String getValueClassName() {
+      return valClassName;
+    }
+
+    /** Returns the class of values in this file. */
+    public synchronized Class<?> getValueClass() {
+      if (null == valClass) {
+        try {
+          valClass = WritableName.getClass(getValueClassName(), conf);
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+      return valClass;
+    }
+
+    /** Returns true if values are compressed. */
+    public boolean isCompressed() { return decompress; }
+    
+    /** Returns true if records are block-compressed. */
+    public boolean isBlockCompressed() { return blockCompressed; }
+    
+    /** Returns the compression codec of data in this file. */
+    public CompressionCodec getCompressionCodec() { return codec; }
+
+    /** Returns the metadata object of the file */
+    public Metadata getMetadata() {
+      return this.metadata;
+    }
+    
+    /** Returns the configuration used for this file. */
+    Configuration getConf() { return conf; }
+    
+    /** Read a compressed buffer */
+    private synchronized void readBuffer(DataInputBuffer buffer, 
+                                         CompressionInputStream filter) throws IOException {
+      // Read data into a temporary buffer
+      DataOutputBuffer dataBuffer = new DataOutputBuffer();
+
+      try {
+        int dataBufferLength = WritableUtils.readVInt(in);
+        dataBuffer.write(in, dataBufferLength);
+      
+        // Set up 'buffer' connected to the input-stream
+        buffer.reset(dataBuffer.getData(), 0, dataBuffer.getLength());
+      } finally {
+        dataBuffer.close();
+      }
+
+      // Reset the codec
+      filter.resetState();
+    }
+    
+    /** Read the next 'compressed' block */
+    private synchronized void readBlock() throws IOException {
+      // Check if we need to throw away a whole block of 
+      // 'values' due to 'lazy decompression' 
+      if (lazyDecompress && !valuesDecompressed) {
+        in.seek(WritableUtils.readVInt(in)+in.getPos());
+        in.seek(WritableUtils.readVInt(in)+in.getPos());
+      }
+      
+      // Reset internal states
+      noBufferedKeys = 0; noBufferedValues = 0; noBufferedRecords = 0;
+      valuesDecompressed = false;
+
+      //Process sync
+      if (sync != null) {
+        in.readInt();
+        in.readFully(syncCheck);                // read syncCheck
+        if (!Arrays.equals(sync, syncCheck))    // check it
+          throw new IOException("File is corrupt!");
+      }
+      syncSeen = true;
+
+      // Read number of records in this block
+      noBufferedRecords = WritableUtils.readVInt(in);
+      
+      // Read key lengths and keys
+      readBuffer(keyLenBuffer, keyLenInFilter);
+      readBuffer(keyBuffer, keyInFilter);
+      noBufferedKeys = noBufferedRecords;
+      
+      // Read value lengths and values
+      if (!lazyDecompress) {
+        readBuffer(valLenBuffer, valLenInFilter);
+        readBuffer(valBuffer, valInFilter);
+        noBufferedValues = noBufferedRecords;
+        valuesDecompressed = true;
+      }
+    }
+
+    /** 
+     * Position valLenIn/valIn to the 'value' 
+     * corresponding to the 'current' key 
+     */
+    private synchronized void seekToCurrentValue() throws IOException {
+      if (!blockCompressed) {
+        if (decompress) {
+          valInFilter.resetState();
+        }
+        valBuffer.reset();
+      } else {
+        // Check if this is the first value in the 'block' to be read
+        if (lazyDecompress && !valuesDecompressed) {
+          // Read the value lengths and values
+          readBuffer(valLenBuffer, valLenInFilter);
+          readBuffer(valBuffer, valInFilter);
+          noBufferedValues = noBufferedRecords;
+          valuesDecompressed = true;
+        }
+        
+        // Calculate the no. of bytes to skip
+        // Note: 'current' key has already been read!
+        int skipValBytes = 0;
+        int currentKey = noBufferedKeys + 1;          
+        for (int i=noBufferedValues; i > currentKey; --i) {
+          skipValBytes += WritableUtils.readVInt(valLenIn);
+          --noBufferedValues;
+        }
+        
+        // Skip to the 'val' corresponding to 'current' key
+        if (skipValBytes > 0) {
+          if (valIn.skipBytes(skipValBytes) != skipValBytes) {
+            throw new IOException("Failed to seek to " + currentKey + 
+                                  "(th) value!");
+          }
+        }
+      }
+    }
+
+    /**
+     * Get the 'value' corresponding to the last read 'key'.
+     * @param val : The 'value' to be read.
+     * @throws IOException
+     */
+    public synchronized void getCurrentValue(Writable val) 
+      throws IOException {
+      if (val instanceof Configurable) {
+        ((Configurable) val).setConf(this.conf);
+      }
+
+      // Position stream to 'current' value
+      seekToCurrentValue();
+
+      if (!blockCompressed) {
+        val.readFields(valIn);
+        
+        if (valIn.read() > 0) {
+          LOG.info("available bytes: " + valIn.available());
+          throw new IOException(val+" read "+(valBuffer.getPosition()-keyLength)
+                                + " bytes, should read " +
+                                (valBuffer.getLength()-keyLength));
+        }
+      } else {
+        // Get the value
+        int valLength = WritableUtils.readVInt(valLenIn);
+        val.readFields(valIn);
+        
+        // Read another compressed 'value'
+        --noBufferedValues;
+        
+        // Sanity check
+        if (valLength < 0) {
+          LOG.debug(val + " is a zero-length value");
+        }
+      }
+
+    }
+    
+    /**
+     * Get the 'value' corresponding to the last read 'key'.
+     * @param val : The 'value' to be read.
+     * @throws IOException
+     */
+    public synchronized Object getCurrentValue(Object val) 
+      throws IOException {
+      if (val instanceof Configurable) {
+        ((Configurable) val).setConf(this.conf);
+      }
+
+      // Position stream to 'current' value
+      seekToCurrentValue();
+
+      if (!blockCompressed) {
+        val = deserializeValue(val);
+        
+        if (valIn.read() > 0) {
+          LOG.info("available bytes: " + valIn.available());
+          throw new IOException(val+" read "+(valBuffer.getPosition()-keyLength)
+                                + " bytes, should read " +
+                                (valBuffer.getLength()-keyLength));
+        }
+      } else {
+        // Get the value
+        int valLength = WritableUtils.readVInt(valLenIn);
+        val = deserializeValue(val);
+        
+        // Read another compressed 'value'
+        --noBufferedValues;
+        
+        // Sanity check
+        if (valLength < 0) {
+          LOG.debug(val + " is a zero-length value");
+        }
+      }
+      return val;
+
+    }
+
+    @SuppressWarnings("unchecked")
+    private Object deserializeValue(Object val) throws IOException {
+      return valDeserializer.deserialize(val);
+    }
+    
+    /** Read the next key in the file into <code>key</code>, skipping its
+     * value.  True if another entry exists, and false at end of file. */
+    public synchronized boolean next(Writable key) throws IOException {
+      if (key.getClass() != getKeyClass())
+        throw new IOException("wrong key class: "+key.getClass().getName()
+                              +" is not "+keyClass);
+
+      if (!blockCompressed) {
+        outBuf.reset();
+        
+        keyLength = next(outBuf);
+        if (keyLength < 0)
+          return false;
+        
+        valBuffer.reset(outBuf.getData(), outBuf.getLength());
+        
+        key.readFields(valBuffer);
+        valBuffer.mark(0);
+        if (valBuffer.getPosition() != keyLength)
+          throw new IOException(key + " read " + valBuffer.getPosition()
+                                + " bytes, should read " + keyLength);
+      } else {
+        //Reset syncSeen
+        syncSeen = false;
+        
+        if (noBufferedKeys == 0) {
+          try {
+            readBlock();
+          } catch (EOFException eof) {
+            return false;
+          }
+        }
+        
+        int keyLength = WritableUtils.readVInt(keyLenIn);
+        
+        // Sanity check
+        if (keyLength < 0) {
+          return false;
+        }
+        
+        //Read another compressed 'key'
+        key.readFields(keyIn);
+        --noBufferedKeys;
+      }
+
+      return true;
+    }
+
+    /** Read the next key/value pair in the file into <code>key</code> and
+     * <code>val</code>.  Returns true if such a pair exists and false when at
+     * end of file */
+    public synchronized boolean next(Writable key, Writable val)
+      throws IOException {
+      if (val.getClass() != getValueClass())
+        throw new IOException("wrong value class: "+val+" is not "+valClass);
+
+      boolean more = next(key);
+      
+      if (more) {
+        getCurrentValue(val);
+      }
+
+      return more;
+    }
+    
+    /**
+     * Read and return the next record length, potentially skipping over 
+     * a sync block.
+     * @return the length of the next record or -1 if there is no next record
+     * @throws IOException
+     */
+    private synchronized int readRecordLength() throws IOException {
+      if (in.getPos() >= end) {
+        return -1;
+      }      
+      int length = in.readInt();
+      if (version > 1 && sync != null &&
+          length == SYNC_ESCAPE) {              // process a sync entry
+        in.readFully(syncCheck);                // read syncCheck
+        if (!Arrays.equals(sync, syncCheck))    // check it
+          throw new IOException("File is corrupt!");
+        syncSeen = true;
+        if (in.getPos() >= end) {
+          return -1;
+        }
+        length = in.readInt();                  // re-read length
+      } else {
+        syncSeen = false;
+      }
+      
+      return length;
+    }
+    
+    /** Read the next key/value pair in the file into <code>buffer</code>.
+     * Returns the length of the key read, or -1 if at end of file.  The length
+     * of the value may be computed by calling buffer.getLength() before and
+     * after calls to this method. */
+    /** @deprecated Call {@link #nextRaw(DataOutputBuffer,SequenceFile.ValueBytes)}. */
+    public synchronized int next(DataOutputBuffer buffer) throws IOException {
+      // Unsupported for block-compressed sequence files
+      if (blockCompressed) {
+        throw new IOException("Unsupported call for block-compressed" +
+                              " SequenceFiles - use SequenceFile.Reader.next(DataOutputStream, ValueBytes)");
+      }
+      try {
+        int length = readRecordLength();
+        if (length == -1) {
+          return -1;
+        }
+        int keyLength = in.readInt();
+        buffer.write(in, length);
+        return keyLength;
+      } catch (ChecksumException e) {             // checksum failure
+        handleChecksumException(e);
+        return next(buffer);
+      }
+    }
+
+    public ValueBytes createValueBytes() {
+      ValueBytes val = null;
+      if (!decompress || blockCompressed) {
+        val = new UncompressedBytes();
+      } else {
+        val = new CompressedBytes(codec);
+      }
+      return val;
+    }
+
+    /**
+     * Read 'raw' records.
+     * @param key - The buffer into which the key is read
+     * @param val - The 'raw' value
+     * @return Returns the total record length or -1 for end of file
+     * @throws IOException
+     */
+    public synchronized int nextRaw(DataOutputBuffer key, ValueBytes val) 
+      throws IOException {
+      if (!blockCompressed) {
+        int length = readRecordLength();
+        if (length == -1) {
+          return -1;
+        }
+        int keyLength = in.readInt();
+        int valLength = length - keyLength;
+        key.write(in, keyLength);
+        if (decompress) {
+          CompressedBytes value = (CompressedBytes)val;
+          value.reset(in, valLength);
+        } else {
+          UncompressedBytes value = (UncompressedBytes)val;
+          value.reset(in, valLength);
+        }
+        
+        return length;
+      } else {
+        //Reset syncSeen
+        syncSeen = false;
+        
+        // Read 'key'
+        if (noBufferedKeys == 0) {
+          if (in.getPos() >= end) 
+            return -1;
+
+          try { 
+            readBlock();
+          } catch (EOFException eof) {
+            return -1;
+          }
+        }
+        int keyLength = WritableUtils.readVInt(keyLenIn);
+        if (keyLength < 0) {
+          throw new IOException("zero length key found!");
+        }
+        key.write(keyIn, keyLength);
+        --noBufferedKeys;
+        
+        // Read raw 'value'
+        seekToCurrentValue();
+        int valLength = WritableUtils.readVInt(valLenIn);
+        UncompressedBytes rawValue = (UncompressedBytes)val;
+        rawValue.reset(valIn, valLength);
+        --noBufferedValues;
+        
+        return (keyLength+valLength);
+      }
+      
+    }
+
+    /**
+     * Read 'raw' keys.
+     * @param key - The buffer into which the key is read
+     * @return Returns the key length or -1 for end of file
+     * @throws IOException
+     */
+    public synchronized int nextRawKey(DataOutputBuffer key) 
+      throws IOException {
+      if (!blockCompressed) {
+        recordLength = readRecordLength();
+        if (recordLength == -1) {
+          return -1;
+        }
+        keyLength = in.readInt();
+        key.write(in, keyLength);
+        return keyLength;
+      } else {
+        //Reset syncSeen
+        syncSeen = false;
+        
+        // Read 'key'
+        if (noBufferedKeys == 0) {
+          if (in.getPos() >= end) 
+            return -1;
+
+          try { 
+            readBlock();
+          } catch (EOFException eof) {
+            return -1;
+          }
+        }
+        int keyLength = WritableUtils.readVInt(keyLenIn);
+        if (keyLength < 0) {
+          throw new IOException("zero length key found!");
+        }
+        key.write(keyIn, keyLength);
+        --noBufferedKeys;
+        
+        return keyLength;
+      }
+      
+    }
+
+    /** Read the next key in the file, skipping its
+     * value.  Return null at end of file. */
+    public synchronized Object next(Object key) throws IOException {
+      if (key != null && key.getClass() != getKeyClass()) {
+        throw new IOException("wrong key class: "+key.getClass().getName()
+                              +" is not "+keyClass);
+      }
+
+      if (!blockCompressed) {
+        outBuf.reset();
+        
+        keyLength = next(outBuf);
+        if (keyLength < 0)
+          return null;
+        
+        valBuffer.reset(outBuf.getData(), outBuf.getLength());
+        
+        key = deserializeKey(key);
+        valBuffer.mark(0);
+        if (valBuffer.getPosition() != keyLength)
+          throw new IOException(key + " read " + valBuffer.getPosition()
+                                + " bytes, should read " + keyLength);
+      } else {
+        //Reset syncSeen
+        syncSeen = false;
+        
+        if (noBufferedKeys == 0) {
+          try {
+            readBlock();
+          } catch (EOFException eof) {
+            return null;
+          }
+        }
+        
+        int keyLength = WritableUtils.readVInt(keyLenIn);
+        
+        // Sanity check
+        if (keyLength < 0) {
+          return null;
+        }
+        
+        //Read another compressed 'key'
+        key = deserializeKey(key);
+        --noBufferedKeys;
+      }
+
+      return key;
+    }
+
+    @SuppressWarnings("unchecked")
+    private Object deserializeKey(Object key) throws IOException {
+      return keyDeserializer.deserialize(key);
+    }
+
+    /**
+     * Read 'raw' values.
+     * @param val - The 'raw' value
+     * @return Returns the value length
+     * @throws IOException
+     */
+    public synchronized int nextRawValue(ValueBytes val) 
+      throws IOException {
+      
+      // Position stream to current value
+      seekToCurrentValue();
+ 
+      if (!blockCompressed) {
+        int valLength = recordLength - keyLength;
+        if (decompress) {
+          CompressedBytes value = (CompressedBytes)val;
+          value.reset(in, valLength);
+        } else {
+          UncompressedBytes value = (UncompressedBytes)val;
+          value.reset(in, valLength);
+        }
+         
+        return valLength;
+      } else {
+        int valLength = WritableUtils.readVInt(valLenIn);
+        UncompressedBytes rawValue = (UncompressedBytes)val;
+        rawValue.reset(valIn, valLength);
+        --noBufferedValues;
+        return valLength;
+      }
+      
+    }
+
+    private void handleChecksumException(ChecksumException e)
+      throws IOException {
+      if (this.conf.getBoolean("io.skip.checksum.errors", false)) {
+        LOG.warn("Bad checksum at "+getPosition()+". Skipping entries.");
+        sync(getPosition()+this.conf.getInt("io.bytes.per.checksum", 512));
+      } else {
+        throw e;
+      }
+    }
+
+    /** disables sync. often invoked for tmp files */
+    synchronized void ignoreSync() {
+      sync = null;
+    }
+    
+    /** Set the current byte position in the input file.
+     *
+     * <p>The position passed must be a position returned by {@link
+     * SequenceFile.Writer#getLength()} when writing this file.  To seek to an arbitrary
+     * position, use {@link SequenceFile.Reader#sync(long)}.
+     */
+    public synchronized void seek(long position) throws IOException {
+      in.seek(position);
+      if (blockCompressed) {                      // trigger block read
+        noBufferedKeys = 0;
+        valuesDecompressed = true;
+      }
+    }
+
+    /** Seek to the next sync mark past a given position.*/
+    public synchronized void sync(long position) throws IOException {
+      if (position+SYNC_SIZE >= end) {
+        seek(end);
+        return;
+      }
+
+      try {
+        seek(position+4);                         // skip escape
+        in.readFully(syncCheck);
+        int syncLen = sync.length;
+        for (int i = 0; in.getPos() < end; i++) {
+          int j = 0;
+          for (; j < syncLen; j++) {
+            if (sync[j] != syncCheck[(i+j)%syncLen])
+              break;
+          }
+          if (j == syncLen) {
+            in.seek(in.getPos() - SYNC_SIZE);     // position before sync
+            return;
+          }
+          syncCheck[i%syncLen] = in.readByte();
+        }
+      } catch (ChecksumException e) {             // checksum failure
+        handleChecksumException(e);
+      }
+    }
+
+    /** Returns true iff the previous call to next passed a sync mark.*/
+    public synchronized boolean syncSeen() { return syncSeen; }
+
+    /** Return the current byte position in the input file. */
+    public synchronized long getPosition() throws IOException {
+      return in.getPos();
+    }
+
+    /** Returns the name of the file. */
+    public String toString() {
+      return file.toString();
+    }
+
+  }
+
+  /** Sorts key/value pairs in a sequence-format file.
+   *
+   * <p>For best performance, applications should make sure that the {@link
+   * Writable#readFields(DataInput)} implementation of their keys is
+   * very efficient.  In particular, it should avoid allocating memory.
+   */
+  public static class Sorter {
+
+    private RawComparator comparator;
+
+    private MergeSort mergeSort; //the implementation of merge sort
+    
+    private Path[] inFiles;                     // when merging or sorting
+
+    private Path outFile;
+
+    private int memory; // bytes
+    private int factor; // merged per pass
+
+    private FileSystem fs = null;
+
+    private Class keyClass;
+    private Class valClass;
+
+    private Configuration conf;
+    private Metadata metadata;
+    
+    private Progressable progressable = null;
+
+    /** Sort and merge files containing the named classes. */
+    public Sorter(FileSystem fs, Class<? extends WritableComparable> keyClass,
+                  Class valClass, Configuration conf)  {
+      this(fs, WritableComparator.get(keyClass), keyClass, valClass, conf);
+    }
+
+    /** Sort and merge using an arbitrary {@link RawComparator}. */
+    public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, 
+                  Class valClass, Configuration conf) {
+      this(fs, comparator, keyClass, valClass, conf, new Metadata());
+    }
+
+    /** Sort and merge using an arbitrary {@link RawComparator}. */
+    public Sorter(FileSystem fs, RawComparator comparator, Class keyClass,
+                  Class valClass, Configuration conf, Metadata metadata) {
+      this.fs = fs;
+      this.comparator = comparator;
+      this.keyClass = keyClass;
+      this.valClass = valClass;
+      this.memory = conf.getInt("io.sort.mb", 100) * 1024 * 1024;
+      this.factor = conf.getInt("io.sort.factor", 100);
+      this.conf = conf;
+      this.metadata = metadata;
+    }
+
+    /** Set the number of streams to merge at once.*/
+    public void setFactor(int factor) { this.factor = factor; }
+
+    /** Get the number of streams to merge at once.*/
+    public int getFactor() { return factor; }
+
+    /** Set the total amount of buffer memory, in bytes.*/
+    public void setMemory(int memory) { this.memory = memory; }
+
+    /** Get the total amount of buffer memory, in bytes.*/
+    public int getMemory() { return memory; }
+
+    /** Set the progressable object in order to report progress. */
+    public void setProgressable(Progressable progressable) {
+      this.progressable = progressable;
+    }
+    
+    /** 
+     * Perform a file sort from a set of input files into an output file.
+     * @param inFiles the files to be sorted
+     * @param outFile the sorted output file
+     * @param deleteInput should the input files be deleted as they are read?
+     */
+    public void sort(Path[] inFiles, Path outFile,
+                     boolean deleteInput) throws IOException {
+      if (fs.exists(outFile)) {
+        throw new IOException("already exists: " + outFile);
+      }
+
+      this.inFiles = inFiles;
+      this.outFile = outFile;
+
+      int segments = sortPass(deleteInput);
+      if (segments > 1) {
+        mergePass(outFile.getParent());
+      }
+    }
+
+    /** 
+     * Perform a file sort from a set of input files and return an iterator.
+     * @param inFiles the files to be sorted
+     * @param tempDir the directory where temp files are created during sort
+     * @param deleteInput should the input files be deleted as they are read?
+     * @return iterator the RawKeyValueIterator
+     */
+    public RawKeyValueIterator sortAndIterate(Path[] inFiles, Path tempDir, 
+                                              boolean deleteInput) throws IOException {
+      Path outFile = new Path(tempDir + Path.SEPARATOR + "all.2");
+      if (fs.exists(outFile)) {
+        throw new IOException("already exists: " + outFile);
+      }
+      this.inFiles = inFiles;
+      //outFile will basically be used as prefix for temp files in the cases
+      //where sort outputs multiple sorted segments. For the single segment
+      //case, the outputFile itself will contain the sorted data for that
+      //segment
+      this.outFile = outFile;
+
+      int segments = sortPass(deleteInput);
+      if (segments > 1)
+        return merge(outFile.suffix(".0"), outFile.suffix(".0.index"), 
+                     tempDir);
+      else if (segments == 1)
+        return merge(new Path[]{outFile}, true, tempDir);
+      else return null;
+    }
+
+    /**
+     * The backwards compatible interface to sort.
+     * @param inFile the input file to sort
+     * @param outFile the sorted output file
+     */
+    public void sort(Path inFile, Path outFile) throws IOException {
+      sort(new Path[]{inFile}, outFile, false);
+    }
+    
+    private int sortPass(boolean deleteInput) throws IOException {
+      LOG.debug("running sort pass");
+      SortPass sortPass = new SortPass();         // make the SortPass
+      sortPass.setProgressable(progressable);
+      mergeSort = new MergeSort(sortPass.new SeqFileComparator());
+      try {
+        return sortPass.run(deleteInput);         // run it
+      } finally {
+        sortPass.close();                         // close it
+      }
+    }
+
+    private class SortPass {
+      private int memoryLimit = memory/4;
+      private int recordLimit = 1000000;
+      
+      private DataOutputBuffer rawKeys = new DataOutputBuffer();
+      private byte[] rawBuffer;
+
+      private int[] keyOffsets = new int[1024];
+      private int[] pointers = new int[keyOffsets.length];
+      private int[] pointersCopy = new int[keyOffsets.length];
+      private int[] keyLengths = new int[keyOffsets.length];
+      private ValueBytes[] rawValues = new ValueBytes[keyOffsets.length];
+      
+      private ArrayList segmentLengths = new ArrayList();
+      
+      private Reader in = null;
+      private FSDataOutputStream out = null;
+      private FSDataOutputStream indexOut = null;
+      private Path outName;
+
+      private Progressable progressable = null;
+
+      public int run(boolean deleteInput) throws IOException {
+        int segments = 0;
+        int currentFile = 0;
+        boolean atEof = (currentFile >= inFiles.length);
+        boolean isCompressed = false;
+        boolean isBlockCompressed = false;
+        CompressionCodec codec = null;
+        segmentLengths.clear();
+        if (atEof) {
+          return 0;
+        }
+        
+        // Initialize
+        in = new Reader(fs, inFiles[currentFile], conf);
+        isCompressed = in.isCompressed();
+        isBlockCompressed = in.isBlockCompressed();
+        codec = in.getCompressionCodec();
+        
+        for (int i=0; i < rawValues.length; ++i) {
+          rawValues[i] = null;
+        }
+        
+        while (!atEof) {
+          int count = 0;
+          int bytesProcessed = 0;
+          rawKeys.reset();
+          while (!atEof && 
+                 bytesProcessed < memoryLimit && count < recordLimit) {
+
+            // Read a record into buffer
+            // Note: Attempt to re-use 'rawValue' as far as possible
+            int keyOffset = rawKeys.getLength();       
+            ValueBytes rawValue = 
+              (count == keyOffsets.length || rawValues[count] == null) ? 
+              in.createValueBytes() : 
+              rawValues[count];
+            int recordLength = in.nextRaw(rawKeys, rawValue);
+            if (recordLength == -1) {
+              in.close();
+              if (deleteInput) {
+                fs.delete(inFiles[currentFile], true);
+              }
+              currentFile += 1;
+              atEof = currentFile >= inFiles.length;
+              if (!atEof) {
+                in = new Reader(fs, inFiles[currentFile], conf);
+              } else {
+                in = null;
+              }
+              continue;
+            }
+
+            int keyLength = rawKeys.getLength() - keyOffset;
+
+            if (count == keyOffsets.length)
+              grow();
+
+            keyOffsets[count] = keyOffset;                // update pointers
+            pointers[count] = count;
+            keyLengths[count] = keyLength;
+            rawValues[count] = rawValue;
+
+            bytesProcessed += recordLength; 
+            count++;
+          }
+
+          // buffer is full -- sort & flush it
+          LOG.debug("flushing segment " + segments);
+          rawBuffer = rawKeys.getData();
+          sort(count);
+          // indicate we're making progress
+          if (progressable != null) {
+            progressable.progress();
+          }
+          flush(count, bytesProcessed, isCompressed, isBlockCompressed, codec, 
+                segments==0 && atEof);
+          segments++;
+        }
+        return segments;
+      }
+
+      public void close() throws IOException {
+        if (in != null) {
+          in.close();
+        }
+        if (out != null) {
+          out.close();
+        }
+        if (indexOut != null) {
+          indexOut.close();
+        }
+      }
+
+      private void grow() {
+        int newLength = keyOffsets.length * 3 / 2;
+        keyOffsets = grow(keyOffsets, newLength);
+        pointers = grow(pointers, newLength);
+        pointersCopy = new int[newLength];
+        keyLengths = grow(keyLengths, newLength);
+        rawValues = grow(rawValues, newLength);
+      }
+
+      private int[] grow(int[] old, int newLength) {
+        int[] result = new int[newLength];
+        System.arraycopy(old, 0, result, 0, old.length);
+        return result;
+      }
+      
+      private ValueBytes[] grow(ValueBytes[] old, int newLength) {
+        ValueBytes[] result = new ValueBytes[newLength];
+        System.arraycopy(old, 0, result, 0, old.length);
+        for (int i=old.length; i < newLength; ++i) {
+          result[i] = null;
+        }
+        return result;
+      }
+
+      private void flush(int count, int bytesProcessed, boolean isCompressed, 
+                         boolean isBlockCompressed, CompressionCodec codec, boolean done) 
+        throws IOException {
+        if (out == null) {
+          outName = done ? outFile : outFile.suffix(".0");
+          out = fs.create(outName);
+          if (!done) {
+            indexOut = fs.create(outName.suffix(".index"));
+          }
+        }
+
+        long segmentStart = out.getPos();
+        Writer writer = createWriter(conf, out, keyClass, valClass, 
+                                     isCompressed, isBlockCompressed, codec, 
+                                     done ? metadata : new Metadata());
+        
+        if (!done) {
+          writer.sync = null;                     // disable sync on temp files
+        }
+
+        for (int i = 0; i < count; i++) {         // write in sorted order
+          int p = pointers[i];
+          writer.appendRaw(rawBuffer, keyOffsets[p], keyLengths[p], rawValues[p]);
+        }
+        writer.close();
+        
+        if (!done) {
+          // Save the segment length
+          WritableUtils.writeVLong(indexOut, segmentStart);
+          WritableUtils.writeVLong(indexOut, (out.getPos()-segmentStart));
+          indexOut.flush();
+        }
+      }
+
+      private void sort(int count) {
+        System.arraycopy(pointers, 0, pointersCopy, 0, count);
+        mergeSort.mergeSort(pointersCopy, pointers, 0, count);
+      }
+      class SeqFileComparator implements Comparator<IntWritable> {
+        public int compare(IntWritable I, IntWritable J) {
+          return comparator.compare(rawBuffer, keyOffsets[I.get()], 
+                                    keyLengths[I.get()], rawBuffer, 
+                                    keyOffsets[J.get()], keyLengths[J.get()]);
+        }
+      }
+      
+      /** set the progressable object in order to report progress */
+      public void setProgressable(Progressable progressable)
+      {
+        this.progressable = progressable;
+      }
+      
+    } // SequenceFile.Sorter.SortPass
+
+    /** The interface to iterate over raw keys/values of SequenceFiles. */
+    public static interface RawKeyValueIterator {
+      /** Gets the current raw key
+       * @return DataOutputBuffer
+       * @throws IOException
+       */
+      DataOutputBuffer getKey() throws IOException; 
+      /** Gets the current raw value
+       * @return ValueBytes 
+       * @throws IOException
+       */
+      ValueBytes getValue() throws IOException; 
+      /** Sets up the current key and value (for getKey and getValue)
+       * @return true if there exists a key/value, false otherwise 
+       * @throws IOException
+       */
+      boolean next() throws IOException;
+      /** closes the iterator so that the underlying streams can be closed
+       * @throws IOException
+       */
+      void close() throws IOException;
+      /** Gets the Progress object; this has a float (0.0 - 1.0) 
+       * indicating the bytes processed by the iterator so far
+       */
+      Progress getProgress();
+    }    
+    
+    /**
+     * Merges the list of segments of type <code>SegmentDescriptor</code>
+     * @param segments the list of SegmentDescriptors
+     * @param tmpDir the directory to write temporary files into
+     * @return RawKeyValueIterator
+     * @throws IOException
+     */
+    public RawKeyValueIterator merge(List <SegmentDescriptor> segments, 
+                                     Path tmpDir) 
+      throws IOException {
+      // pass in object to report progress, if present
+      MergeQueue mQueue = new MergeQueue(segments, tmpDir, progressable);
+      return mQueue.merge();
+    }
+
+    /**
+     * Merges the contents of files passed in Path[] using a max factor value
+     * that is already set
+     * @param inNames the array of path names
+     * @param deleteInputs true if the input files should be deleted when 
+     * unnecessary
+     * @param tmpDir the directory to write temporary files into
+     * @return RawKeyValueIteratorMergeQueue
+     * @throws IOException
+     */
+    public RawKeyValueIterator merge(Path [] inNames, boolean deleteInputs,
+                                     Path tmpDir) 
+      throws IOException {
+      return merge(inNames, deleteInputs, 
+                   (inNames.length < factor) ? inNames.length : factor,
+                   tmpDir);
+    }
+
+    /**
+     * Merges the contents of files passed in Path[]
+     * @param inNames the array of path names
+     * @param deleteInputs true if the input files should be deleted when 
+     * unnecessary
+     * @param factor the factor that will be used as the maximum merge fan-in
+     * @param tmpDir the directory to write temporary files into
+     * @return RawKeyValueIteratorMergeQueue
+     * @throws IOException
+     */
+    public RawKeyValueIterator merge(Path [] inNames, boolean deleteInputs,
+                                     int factor, Path tmpDir) 
+      throws IOException {
+      //get the segments from inNames
+      ArrayList <SegmentDescriptor> a = new ArrayList <SegmentDescriptor>();
+      for (int i = 0; i < inNames.length; i++) {
+        SegmentDescriptor s = new SegmentDescriptor(0,
+            fs.getFileStatus(inNames[i]).getLen(), inNames[i]);
+        s.preserveInput(!deleteInputs);
+        s.doSync();
+        a.add(s);
+      }
+      this.factor = factor;
+      MergeQueue mQueue = new MergeQueue(a, tmpDir, progressable);
+      return mQueue.merge();
+    }
+
+    /**
+     * Merges the contents of files passed in Path[]
+     * @param inNames the array of path names
+     * @param tempDir the directory for creating temp files during merge
+     * @param deleteInputs true if the input files should be deleted when 
+     * unnecessary
+     * @return RawKeyValueIteratorMergeQueue
+     * @throws IOException
+     */
+    public RawKeyValueIterator merge(Path [] inNames, Path tempDir, 
+                                     boolean deleteInputs) 
+      throws IOException {
+      //outFile will basically be used as prefix for temp files for the
+      //intermediate merge outputs           
+      this.outFile = new Path(tempDir + Path.SEPARATOR + "merged");
+      //get the segments from inNames
+      ArrayList <SegmentDescriptor> a = new ArrayList <SegmentDescriptor>();
+      for (int i = 0; i < inNames.length; i++) {
+        SegmentDescriptor s = new SegmentDescriptor(0,
+            fs.getFileStatus(inNames[i]).getLen(), inNames[i]);
+        s.preserveInput(!deleteInputs);
+        s.doSync();
+        a.add(s);
+      }
+      factor = (inNames.length < factor) ? inNames.length : factor;
+      // pass in object to report progress, if present
+      MergeQueue mQueue = new MergeQueue(a, tempDir, progressable);
+      return mQueue.merge();
+    }
+
+    /**
+     * Clones the attributes (like compression of the input file and creates a 
+     * corresponding Writer
+     * @param inputFile the path of the input file whose attributes should be 
+     * cloned
+     * @param outputFile the path of the output file 
+     * @param prog the Progressable to report status during the file write
+     * @return Writer
+     * @throws IOException
+     */
+    public Writer cloneFileAttributes(Path inputFile, Path outputFile, 
+                                      Progressable prog) 
+    throws IOException {
+      FileSystem srcFileSys = inputFile.getFileSystem(conf);
+      Reader reader = new Reader(srcFileSys, inputFile, 4096, conf, true);
+      boolean compress = reader.isCompressed();
+      boolean blockCompress = reader.isBlockCompressed();
+      CompressionCodec codec = reader.getCompressionCodec();
+      reader.close();
+
+      Writer writer = createWriter(outputFile.getFileSystem(conf), conf, 
+                                   outputFile, keyClass, valClass, compress, 
+                                   blockCompress, codec, prog,
+                                   new Metadata());
+      return writer;
+    }
+
+    /**
+     * Writes records from RawKeyValueIterator into a file represented by the 
+     * passed writer
+     * @param records the RawKeyValueIterator
+     * @param writer the Writer created earlier 
+     * @throws IOException
+     */
+    public void writeFile(RawKeyValueIterator records, Writer writer) 
+      throws IOException {
+      while(records.next()) {
+        writer.appendRaw(records.getKey().getData(), 0, 
+                         records.getKey().getLength(), records.getValue());
+      }
+      writer.sync();
+    }
+        
+    /** Merge the provided files.
+     * @param inFiles the array of input path names
+     * @param outFile the final output file
+     * @throws IOException
+     */
+    public void merge(Path[] inFiles, Path outFile) throws IOException {
+      if (fs.exists(outFile)) {
+        throw new IOException("already exists: " + outFile);
+      }
+      RawKeyValueIterator r = merge(inFiles, false, outFile.getParent());
+      Writer writer = cloneFileAttributes(inFiles[0], outFile, null);
+      
+      writeFile(r, writer);
+
+      writer.close();
+    }
+
+    /** sort calls this to generate the final merged output */
+    private int mergePass(Path tmpDir) throws IOException {
+      LOG.debug("running merge pass");
+      Writer writer = cloneFileAttributes(
+                                          outFile.suffix(".0"), outFile, null);
+      RawKeyValueIterator r = merge(outFile.suffix(".0"), 
+                                    outFile.suffix(".0.index"), tmpDir);
+      writeFile(r, writer);
+
+      writer.close();
+      return 0;
+    }
+
+    /** Used by mergePass to merge the output of the sort
+     * @param inName the name of the input file containing sorted segments
+     * @param indexIn the offsets of the sorted segments
+     * @param tmpDir the relative directory to store intermediate results in
+     * @return RawKeyValueIterator
+     * @throws IOException
+     */
+    private RawKeyValueIterator merge(Path inName, Path indexIn, Path tmpDir) 
+      throws IOException {
+      //get the segments from indexIn
+      //we create a SegmentContainer so that we can track segments belonging to
+      //inName and delete inName as soon as we see that we have looked at all
+      //the contained segments during the merge process & hence don't need 
+      //them anymore
+      SegmentContainer container = new SegmentContainer(inName, indexIn);
+      MergeQueue mQueue = new MergeQueue(container.getSegmentList(), tmpDir, progressable);
+      return mQueue.merge();
+    }
+    
+    /** This class implements the core of the merge logic */
+    private class MergeQueue extends PriorityQueue 
+      implements RawKeyValueIterator {
+      private boolean compress;
+      private boolean blockCompress;
+      private DataOutputBuffer rawKey = new DataOutputBuffer();
+      private ValueBytes rawValue;
+      private long totalBytesProcessed;
+      private float progPerByte;
+      private Progress mergeProgress = new Progress();
+      private Path tmpDir;
+      private Progressable progress = null; //handle to the progress reporting object
+      private SegmentDescriptor minSegment;
+      
+      //a TreeMap used to store the segments sorted by size (segment offset and
+      //segment path name is used to break ties between segments of same sizes)
+      private Map<SegmentDescriptor, Void> sortedSegmentSizes =
+        new TreeMap<SegmentDescriptor, Void>();
+            
+      @SuppressWarnings("unchecked")
+      public void put(SegmentDescriptor stream) throws IOException {
+        if (size() == 0) {
+          compress = stream.in.isCompressed();
+          blockCompress = stream.in.isBlockCompressed();
+        } else if (compress != stream.in.isCompressed() || 
+                   blockCompress != stream.in.isBlockCompressed()) {
+          throw new IOException("All merged files must be compressed or not.");
+        } 
+        super.put(stream);
+      }
+      
+      /**
+       * A queue of file segments to merge
+       * @param segments the file segments to merge
+       * @param tmpDir a relative local directory to save intermediate files in
+       * @param progress the reference to the Progressable object
+       */
+      public MergeQueue(List <SegmentDescriptor> segments,
+          Path tmpDir, Progressable progress) {
+        int size = segments.size();
+        for (int i = 0; i < size; i++) {
+          sortedSegmentSizes.put(segments.get(i), null);
+        }
+        this.tmpDir = tmpDir;
+        this.progress = progress;
+      }
+      protected boolean lessThan(Object a, Object b) {
+        // indicate we're making progress
+        if (progress != null) {
+          progress.progress();
+        }
+        SegmentDescriptor msa = (SegmentDescriptor)a;
+        SegmentDescriptor msb = (SegmentDescriptor)b;
+        return comparator.compare(msa.getKey().getData(), 0, 
+                                  msa.getKey().getLength(), msb.getKey().getData(), 0, 
+                                  msb.getKey().getLength()) < 0;
+      }
+      public void close() throws IOException {
+        SegmentDescriptor ms;                           // close inputs
+        while ((ms = (SegmentDescriptor)pop()) != null) {
+          ms.cleanup();
+        }
+        minSegment = null;
+      }
+      public DataOutputBuffer getKey() throws IOException {
+        return rawKey;
+      }
+      public ValueBytes getValue() throws IOException {
+        return rawValue;
+      }
+      public boolean next() throws IOException {
+        if (size() == 0)
+          return false;
+        if (minSegment != null) {
+          //minSegment is non-null for all invocations of next except the first
+          //one. For the first invocation, the priority queue is ready for use
+          //but for the subsequent invocations, first adjust the queue 
+          adjustPriorityQueue(minSegment);
+          if (size() == 0) {
+            minSegment = null;
+            return false;
+          }
+        }
+        minSegment = (SegmentDescriptor)top();
+        long startPos = minSegment.in.getPosition(); // Current position in stream
+        //save the raw key reference
+        rawKey = minSegment.getKey();
+        //load the raw value. Re-use the existing rawValue buffer
+        if (rawValue == null) {
+          rawValue = minSegment.in.createValueBytes();
+        }
+        minSegment.nextRawValue(rawValue);
+        long endPos = minSegment.in.getPosition(); // End position after reading value
+        updateProgress(endPos - startPos);
+        return true;
+      }
+      
+      public Progress getProgress() {
+        return mergeProgress; 
+      }
+
+      private void adjustPriorityQueue(SegmentDescriptor ms) throws IOException{
+        long startPos = ms.in.getPosition(); // Current position in stream
+        boolean hasNext = ms.nextRawKey();
+        long endPos = ms.in.getPosition(); // End position after reading key
+        updateProgress(endPos - startPos);
+        if (hasNext) {
+          adjustTop();
+        } else {
+          pop();
+          ms.cleanup();
+        }
+      }
+
+      private void updateProgress(long bytesProcessed) {
+        totalBytesProcessed += bytesProcessed;
+        if (progPerByte > 0) {
+          mergeProgress.set(totalBytesProcessed * progPerByte);
+        }
+      }
+      
+      /** This is the single level merge that is called multiple times 
+       * depending on the factor size and the number of segments
+       * @return RawKeyValueIterator
+       * @throws IOException
+       */
+      public RawKeyValueIterator merge() throws IOException {
+        //create the MergeStreams from the sorted map created in the constructor
+        //and dump the final output to a file
+        int numSegments = sortedSegmentSizes.size();
+        int origFactor = factor;
+        int passNo = 1;
+        LocalDirAllocator lDirAlloc = new LocalDirAllocator("io.seqfile.local.dir");
+        do {
+          //get the factor for this pass of merge
+          factor = getPassFactor(passNo, numSegments);
+          List<SegmentDescriptor> segmentsToMerge =
+            new ArrayList<SegmentDescriptor>();
+          int segmentsConsidered = 0;
+          int numSegmentsToConsider = factor;
+          while (true) {
+            //extract the smallest 'factor' number of segment pointers from the 
+            //TreeMap. Call cleanup on the empty segments (no key/value data)
+            SegmentDescriptor[] mStream = 
+              getSegmentDescriptors(numSegmentsToConsider);
+            for (int i = 0; i < mStream.length; i++) {
+              if (mStream[i].nextRawKey()) {
+                segmentsToMerge.add(mStream[i]);
+                segmentsConsidered++;
+                // Count the fact that we read some bytes in calling nextRawKey()
+                updateProgress(mStream[i].in.getPosition());
+              }
+              else {
+                mStream[i].cleanup();
+                numSegments--; //we ignore this segment for the merge
+              }
+            }
+            //if we have the desired number of segments
+            //or looked at all available segments, we break
+            if (segmentsConsidered == factor || 
+                sortedSegmentSizes.size() == 0) {
+              break;
+            }
+              
+            numSegmentsToConsider = factor - segmentsConsidered;
+          }
+          //feed the streams to the priority queue
+          initialize(segmentsToMerge.size()); clear();
+          for (int i = 0; i < segmentsToMerge.size(); i++) {
+            put(segmentsToMerge.get(i));
+          }
+          //if we have lesser number of segments remaining, then just return the
+          //iterator, else do another single level merge
+          if (numSegments <= factor) {
+            //calculate the length of the remaining segments. Required for 
+            //calculating the merge progress
+            long totalBytes = 0;
+            for (int i = 0; i < segmentsToMerge.size(); i++) {
+              totalBytes += segmentsToMerge.get(i).segmentLength;
+            }
+            if (totalBytes != 0) //being paranoid
+              progPerByte = 1.0f / (float)totalBytes;
+            //reset factor to what it originally was
+            factor = origFactor;
+            return this;
+          } else {
+            //we want to spread the creation of temp files on multiple disks if 
+            //available under the space constraints
+            long approxOutputSize = 0; 
+            for (SegmentDescriptor s : segmentsToMerge) {
+              approxOutputSize += s.segmentLength + 
+                                  ChecksumFileSystem.getApproxChkSumLength(
+                                  s.segmentLength);
+            }
+            Path tmpFilename = 
+              new Path(tmpDir, "intermediate").suffix("." + passNo);
+
+            Path outputFile =  lDirAlloc.getLocalPathForWrite(
+                                                tmpFilename.toString(),
+                                                approxOutputSize, conf);
+            LOG.debug("writing intermediate results to " + outputFile);
+            Writer writer = cloneFileAttributes(
+                                                fs.makeQualified(segmentsToMerge.get(0).segmentPathName), 
+                                                fs.makeQualified(outputFile), null);
+            writer.sync = null; //disable sync for temp files
+            writeFile(this, writer);
+            writer.close();
+            
+            //we finished one single level merge; now clean up the priority 
+            //queue
+            this.close();
+            
+            SegmentDescriptor tempSegment = 
+              new SegmentDescriptor(0,
+                  fs.getFileStatus(outputFile).getLen(), outputFile);
+            //put the segment back in the TreeMap
+            sortedSegmentSizes.put(tempSegment, null);
+            numSegments = sortedSegmentSizes.size();
+            passNo++;
+          }
+          //we are worried about only the first pass merge factor. So reset the 
+          //factor to what it originally was
+          factor = origFactor;
+        } while(true);
+      }
+  
+      //Hadoop-591
+      public int getPassFactor(int passNo, int numSegments) {
+        if (passNo > 1 || numSegments <= factor || factor == 1) 
+          return factor;
+        int mod = (numSegments - 1) % (factor - 1);
+        if (mod == 0)
+          return factor;
+        return mod + 1;
+      }
+      
+      /** Return (& remove) the requested number of segment descriptors from the
+       * sorted map.
+       */
+      public SegmentDescriptor[] getSegmentDescriptors(int numDescriptors) {
+        if (numDescriptors > sortedSegmentSizes.size())
+          numDescriptors = sortedSegmentSizes.size();
+        SegmentDescriptor[] SegmentDescriptors = 
+          new SegmentDescriptor[numDescriptors];
+        Iterator iter = sortedSegmentSizes.keySet().iterator();
+        int i = 0;
+        while (i < numDescriptors) {
+          SegmentDescriptors[i++] = (SegmentDescriptor)iter.next();
+          iter.remove();
+        }
+        return SegmentDescriptors;
+      }
+    } // SequenceFile.Sorter.MergeQueue
+
+    /** This class defines a merge segment. This class can be subclassed to 
+     * provide a customized cleanup method implementation. In this 
+     * implementation, cleanup closes the file handle and deletes the file 
+     */
+    public class SegmentDescriptor implements Comparable {
+      
+      long segmentOffset; //the start of the segment in the file
+      long segmentLength; //the length of the segment
+      Path segmentPathName; //the path name of the file containing the segment
+      boolean ignoreSync = true; //set to true for temp files
+      private Reader in = null; 
+      private DataOutputBuffer rawKey = null; //this will hold the current key
+      private boolean preserveInput = false; //delete input segment files?
+      
+      /** Constructs a segment
+       * @param segmentOffset the offset of the segment in the file
+       * @param segmentLength the length of the segment
+       * @param segmentPathName the path name of the file containing the segment
+       */
+      public SegmentDescriptor (long segmentOffset, long segmentLength, 
+                                Path segmentPathName) {
+        this.segmentOffset = segmentOffset;
+        this.segmentLength = segmentLength;
+        this.segmentPathName = segmentPathName;
+      }
+      
+      /** Do the sync checks */
+      public void doSync() {ignoreSync = false;}
+      
+      /** Whether to delete the files when no longer needed */
+      public void preserveInput(boolean preserve) {
+        preserveInput = preserve;
+      }
+
+      public boolean shouldPreserveInput() {
+        return preserveInput;
+      }
+      
+      public int compareTo(Object o) {
+        SegmentDescriptor that = (SegmentDescriptor)o;
+        if (this.segmentLength != that.segmentLength) {
+          return (this.segmentLength < that.segmentLength ? -1 : 1);
+        }
+        if (this.segmentOffset != that.segmentOffset) {
+          return (this.segmentOffset < that.segmentOffset ? -1 : 1);
+        }
+        return (this.segmentPathName.toString()).
+          compareTo(that.segmentPathName.toString());
+      }
+
+      public boolean equals(Object o) {
+        if (!(o instanceof SegmentDescriptor)) {
+          return false;
+        }
+        SegmentDescriptor that = (SegmentDescriptor)o;
+        if (this.segmentLength == that.segmentLength &&
+            this.segmentOffset == that.segmentOffset &&
+            this.segmentPathName.toString().equals(
+              that.segmentPathName.toString())) {
+          return true;
+        }
+        return false;
+      }
+
+      public int hashCode() {
+        return 37 * 17 + (int) (segmentOffset^(segmentOffset>>>32));
+      }
+
+      /** Fills up the rawKey object with the key returned by the Reader
+       * @return true if there is a key returned; false, otherwise
+       * @throws IOException
+       */
+      public boolean nextRawKey() throws IOException {
+        if (in == null) {
+          int bufferSize = conf.getInt("io.file.buffer.size", 4096); 
+          if (fs.getUri().getScheme().startsWith("ramfs")) {
+            bufferSize = conf.getInt("io.bytes.per.checksum", 512);
+          }
+          Reader reader = new Reader(fs, segmentPathName, 
+                                     bufferSize, segmentOffset, 
+                                     segmentLength, conf, false);
+        
+          //sometimes we ignore syncs especially for temp merge files
+          if (ignoreSync) reader.ignoreSync();
+
+          if (reader.getKeyClass() != keyClass)
+            throw new IOException("wrong key class: " + reader.getKeyClass() +
+                                  " is not " + keyClass);
+          if (reader.getValueClass() != valClass)
+            throw new IOException("wrong value class: "+reader.getValueClass()+
+                                  " is not " + valClass);
+          this.in = reader;
+          rawKey = new DataOutputBuffer();
+        }
+        rawKey.reset();
+        int keyLength = 
+          in.nextRawKey(rawKey);
+        return (keyLength >= 0);
+      }
+
+      /** Fills up the passed rawValue with the value corresponding to the key
+       * read earlier
+       * @param rawValue
+       * @return the length of the value
+       * @throws IOException
+       */
+      public int nextRawValue(ValueBytes rawValue) throws IOException {
+        int valLength = in.nextRawValue(rawValue);
+        return valLength;
+      }
+      
+      /** Returns the stored rawKey */
+      public DataOutputBuffer getKey() {
+        return rawKey;
+      }
+      
+      /** closes the underlying reader */
+      private void close() throws IOException {
+        this.in.close();
+        this.in = null;
+      }
+
+      /** The default cleanup. Subclasses can override this with a custom 
+       * cleanup 
+       */
+      public void cleanup() throws IOException {
+        close();
+        if (!preserveInput) {
+          fs.delete(segmentPathName, true);
+        }
+      }
+    } // SequenceFile.Sorter.SegmentDescriptor
+    
+    /** This class provisions multiple segments contained within a single
+     *  file
+     */
+    private class LinkedSegmentsDescriptor extends SegmentDescriptor {
+
+      SegmentContainer parentContainer = null;
+
+      /** Constructs a segment
+       * @param segmentOffset the offset of the segment in the file
+       * @param segmentLength the length of the segment
+       * @param segmentPathName the path name of the file containing the segment
+       * @param parent the parent SegmentContainer that holds the segment
+       */
+      public LinkedSegmentsDescriptor (long segmentOffset, long segmentLength, 
+                                       Path segmentPathName, SegmentContainer parent) {
+        super(segmentOffset, segmentLength, segmentPathName);
+        this.parentContainer = parent;
+      }
+      /** The default cleanup. Subclasses can override this with a custom 
+       * cleanup 
+       */
+      public void cleanup() throws IOException {
+        super.close();
+        if (super.shouldPreserveInput()) return;
+        parentContainer.cleanup();
+      }
+      
+      public boolean equals(Object o) {
+        if (!(o instanceof LinkedSegmentsDescriptor)) {
+          return false;
+        }
+        return super.equals(o);
+      }
+    } //SequenceFile.Sorter.LinkedSegmentsDescriptor
+
+    /** The class that defines a container for segments to be merged. Primarily
+     * required to delete temp files as soon as all the contained segments
+     * have been looked at */
+    private class SegmentContainer {
+      private int numSegmentsCleanedUp = 0; //track the no. of segment cleanups
+      private int numSegmentsContained; //# of segments contained
+      private Path inName; //input file from where segments are created
+      
+      //the list of segments read from the file
+      private ArrayList <SegmentDescriptor> segments = 
+        new ArrayList <SegmentDescriptor>();
+      /** This constructor is there primarily to serve the sort routine that 
+       * generates a single output file with an associated index file */
+      public SegmentContainer(Path inName, Path indexIn) throws IOException {
+        //get the segments from indexIn
+        FSDataInputStream fsIndexIn = fs.open(indexIn);
+        long end = fs.getFileStatus(indexIn).getLen();
+        while (fsIndexIn.getPos() < end) {
+          long segmentOffset = WritableUtils.readVLong(fsIndexIn);
+          long segmentLength = WritableUtils.readVLong(fsIndexIn);
+          Path segmentName = inName;
+          segments.add(new LinkedSegmentsDescriptor(segmentOffset, 
+                                                    segmentLength, segmentName, this));
+        }
+        fsIndexIn.close();
+        fs.delete(indexIn, true);
+        numSegmentsContained = segments.size();
+        this.inName = inName;
+      }
+
+      public List <SegmentDescriptor> getSegmentList() {
+        return segments;
+      }
+      public void cleanup() throws IOException {
+        numSegmentsCleanedUp++;
+        if (numSegmentsCleanedUp == numSegmentsContained) {
+          fs.delete(inName, true);
+        }
+      }
+    } //SequenceFile.Sorter.SegmentContainer
+
+  } // SequenceFile.Sorter
+
+} // SequenceFile
diff --git a/src/java/org/apache/hadoop/io/SetFile.java b/src/java/org/apache/hadoop/io/SetFile.java
new file mode 100644
index 00000000000..a0cb84922aa
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/SetFile.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+
+/** A file-based set of keys. */
+public class SetFile extends MapFile {
+
+  protected SetFile() {}                            // no public ctor
+
+  /** 
+   * Write a new set file.
+   */
+  public static class Writer extends MapFile.Writer {
+
+    /** Create the named set for keys of the named class. 
+     *  @deprecated pass a Configuration too
+     */
+    public Writer(FileSystem fs, String dirName,
+	Class<? extends WritableComparable> keyClass) throws IOException {
+      super(new Configuration(), fs, dirName, keyClass, NullWritable.class);
+    }
+
+    /** Create a set naming the element class and compression type. */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+                  Class<? extends WritableComparable> keyClass,
+                  SequenceFile.CompressionType compress)
+      throws IOException {
+      this(conf, fs, dirName, WritableComparator.get(keyClass), compress);
+    }
+
+    /** Create a set naming the element comparator and compression type. */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+                  WritableComparator comparator,
+                  SequenceFile.CompressionType compress) throws IOException {
+      super(conf, fs, dirName, comparator, NullWritable.class, compress);
+    }
+
+    /** Append a key to a set.  The key must be strictly greater than the
+     * previous key added to the set. */
+    public void append(WritableComparable key) throws IOException{
+      append(key, NullWritable.get());
+    }
+  }
+
+  /** Provide access to an existing set file. */
+  public static class Reader extends MapFile.Reader {
+
+    /** Construct a set reader for the named set.*/
+    public Reader(FileSystem fs, String dirName, Configuration conf) throws IOException {
+      super(fs, dirName, conf);
+    }
+
+    /** Construct a set reader for the named set using the named comparator.*/
+    public Reader(FileSystem fs, String dirName, WritableComparator comparator, Configuration conf)
+      throws IOException {
+      super(fs, dirName, comparator, conf);
+    }
+
+    // javadoc inherited
+    public boolean seek(WritableComparable key)
+      throws IOException {
+      return super.seek(key);
+    }
+
+    /** Read the next key in a set into <code>key</code>.  Returns
+     * true if such a key exists and false when at the end of the set. */
+    public boolean next(WritableComparable key)
+      throws IOException {
+      return next(key, NullWritable.get());
+    }
+
+    /** Read the matching key from a set into <code>key</code>.
+     * Returns <code>key</code>, or null if no match exists. */
+    public WritableComparable get(WritableComparable key)
+      throws IOException {
+      if (seek(key)) {
+        next(key);
+        return key;
+      } else
+        return null;
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/SortedMapWritable.java b/src/java/org/apache/hadoop/io/SortedMapWritable.java
new file mode 100644
index 00000000000..53a28dddd3a
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/SortedMapWritable.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * A Writable SortedMap.
+ */
+public class SortedMapWritable extends AbstractMapWritable
+  implements SortedMap<WritableComparable, Writable> {
+  
+  private SortedMap<WritableComparable, Writable> instance;
+  
+  /** default constructor. */
+  public SortedMapWritable() {
+    super();
+    this.instance = new TreeMap<WritableComparable, Writable>();
+  }
+  
+  /**
+   * Copy constructor.
+   * 
+   * @param other the map to copy from
+   */
+  public SortedMapWritable(SortedMapWritable other) {
+    this();
+    copy(other);
+  }
+
+  /** {@inheritDoc} */
+  public Comparator<? super WritableComparable> comparator() {
+    // Returning null means we use the natural ordering of the keys
+    return null;
+  }
+
+  /** {@inheritDoc} */
+  public WritableComparable firstKey() {
+    return instance.firstKey();
+  }
+
+  /** {@inheritDoc} */
+  public SortedMap<WritableComparable, Writable>
+  headMap(WritableComparable toKey) {
+    
+    return instance.headMap(toKey);
+  }
+
+  /** {@inheritDoc} */
+  public WritableComparable lastKey() {
+    return instance.lastKey();
+  }
+
+  /** {@inheritDoc} */
+  public SortedMap<WritableComparable, Writable>
+  subMap(WritableComparable fromKey, WritableComparable toKey) {
+    
+    return instance.subMap(fromKey, toKey);
+  }
+
+  /** {@inheritDoc} */
+  public SortedMap<WritableComparable, Writable>
+  tailMap(WritableComparable fromKey) {
+    
+    return instance.tailMap(fromKey);
+  }
+
+  /** {@inheritDoc} */
+  public void clear() {
+    instance.clear();
+  }
+
+  /** {@inheritDoc} */
+  public boolean containsKey(Object key) {
+    return instance.containsKey(key);
+  }
+
+  /** {@inheritDoc} */
+  public boolean containsValue(Object value) {
+    return instance.containsValue(value);
+  }
+
+  /** {@inheritDoc} */
+  public Set<java.util.Map.Entry<WritableComparable, Writable>> entrySet() {
+    return instance.entrySet();
+  }
+
+  /** {@inheritDoc} */
+  public Writable get(Object key) {
+    return instance.get(key);
+  }
+
+  /** {@inheritDoc} */
+  public boolean isEmpty() {
+    return instance.isEmpty();
+  }
+
+  /** {@inheritDoc} */
+  public Set<WritableComparable> keySet() {
+    return instance.keySet();
+  }
+
+  /** {@inheritDoc} */
+  public Writable put(WritableComparable key, Writable value) {
+    addToMap(key.getClass());
+    addToMap(value.getClass());
+    return instance.put(key, value);
+  }
+
+  /** {@inheritDoc} */
+  public void putAll(Map<? extends WritableComparable, ? extends Writable> t) {
+    for (Map.Entry<? extends WritableComparable, ? extends Writable> e:
+      t.entrySet()) {
+      
+      instance.put(e.getKey(), e.getValue());
+    }
+  }
+
+  /** {@inheritDoc} */
+  public Writable remove(Object key) {
+    return instance.remove(key);
+  }
+
+  /** {@inheritDoc} */
+  public int size() {
+    return instance.size();
+  }
+
+  /** {@inheritDoc} */
+  public Collection<Writable> values() {
+    return instance.values();
+  }
+
+  /** {@inheritDoc} */
+  @SuppressWarnings("unchecked")
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    
+    // Read the number of entries in the map
+    
+    int entries = in.readInt();
+    
+    // Then read each key/value pair
+    
+    for (int i = 0; i < entries; i++) {
+      WritableComparable key =
+        (WritableComparable) ReflectionUtils.newInstance(getClass(
+            in.readByte()), getConf());
+      
+      key.readFields(in);
+      
+      Writable value = (Writable) ReflectionUtils.newInstance(getClass(
+          in.readByte()), getConf());
+      
+      value.readFields(in);
+      instance.put(key, value);
+    }
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    
+    // Write out the number of entries in the map
+    
+    out.writeInt(instance.size());
+    
+    // Then write out each key/value pair
+    
+    for (Map.Entry<WritableComparable, Writable> e: instance.entrySet()) {
+      out.writeByte(getId(e.getKey().getClass()));
+      e.getKey().write(out);
+      out.writeByte(getId(e.getValue().getClass()));
+      e.getValue().write(out);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/Stringifier.java b/src/java/org/apache/hadoop/io/Stringifier.java
new file mode 100644
index 00000000000..e8dba8e05ec
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/Stringifier.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.IOException;
+
+/**
+ * Stringifier interface offers two methods to convert an object 
+ * to a string representation and restore the object given its 
+ * string representation.
+ * @param <T> the class of the objects to stringify
+ */
+public interface Stringifier<T> extends java.io.Closeable {
+
+  /**
+   * Converts the object to a string representation
+   * @param obj the object to convert
+   * @return the string representation of the object
+   * @throws IOException if the object cannot be converted
+   */
+  public String toString(T obj)  throws IOException;
+  
+  /**
+   * Restores the object from its string representation.
+   * @param str the string representation of the object
+   * @return restored object
+   * @throws IOException if the object cannot be restored
+   */
+  public T fromString(String str) throws IOException;
+  
+  
+  /** 
+   * Closes this object. 
+   * @throws IOException if an I/O error occurs 
+   * */
+  public void close() throws IOException;
+  
+}
diff --git a/src/java/org/apache/hadoop/io/Text.java b/src/java/org/apache/hadoop/io/Text.java
new file mode 100644
index 00000000000..19faa8768d6
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/Text.java
@@ -0,0 +1,594 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.IOException;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.MalformedInputException;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/** This class stores text using standard UTF8 encoding.  It provides methods
+ * to serialize, deserialize, and compare texts at byte level.  The type of
+ * length is integer and is serialized using zero-compressed format.  <p>In
+ * addition, it provides methods for string traversal without converting the
+ * byte array to a string.  <p>Also includes utilities for
+ * serializing/deserialing a string, coding/decoding a string, checking if a
+ * byte array contains valid UTF8 code, calculating the length of an encoded
+ * string.
+ */
+public class Text extends BinaryComparable
+    implements WritableComparable<BinaryComparable> {
+  private static final Log LOG= LogFactory.getLog(Text.class);
+  
+  private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
+    new ThreadLocal<CharsetEncoder>() {
+      protected CharsetEncoder initialValue() {
+        return Charset.forName("UTF-8").newEncoder().
+               onMalformedInput(CodingErrorAction.REPORT).
+               onUnmappableCharacter(CodingErrorAction.REPORT);
+    }
+  };
+  
+  private static ThreadLocal<CharsetDecoder> DECODER_FACTORY =
+    new ThreadLocal<CharsetDecoder>() {
+    protected CharsetDecoder initialValue() {
+      return Charset.forName("UTF-8").newDecoder().
+             onMalformedInput(CodingErrorAction.REPORT).
+             onUnmappableCharacter(CodingErrorAction.REPORT);
+    }
+  };
+  
+  private static final byte [] EMPTY_BYTES = new byte[0];
+  
+  private byte[] bytes;
+  private int length;
+
+  public Text() {
+    bytes = EMPTY_BYTES;
+  }
+
+  /** Construct from a string. 
+   */
+  public Text(String string) {
+    set(string);
+  }
+
+  /** Construct from another text. */
+  public Text(Text utf8) {
+    set(utf8);
+  }
+
+  /** Construct from a byte array.
+   */
+  public Text(byte[] utf8)  {
+    set(utf8);
+  }
+  
+  /**
+   * Returns the raw bytes; however, only data up to {@link #getLength()} is
+   * valid.
+   */
+  public byte[] getBytes() {
+    return bytes;
+  }
+
+  /** Returns the number of bytes in the byte array */ 
+  public int getLength() {
+    return length;
+  }
+  
+  /**
+   * Returns the Unicode Scalar Value (32-bit integer value)
+   * for the character at <code>position</code>. Note that this
+   * method avoids using the converter or doing String instatiation
+   * @return the Unicode scalar value at position or -1
+   *          if the position is invalid or points to a
+   *          trailing byte
+   */
+  public int charAt(int position) {
+    if (position > this.length) return -1; // too long
+    if (position < 0) return -1; // duh.
+      
+    ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
+    return bytesToCodePoint(bb.slice());
+  }
+  
+  public int find(String what) {
+    return find(what, 0);
+  }
+  
+  /**
+   * Finds any occurence of <code>what</code> in the backing
+   * buffer, starting as position <code>start</code>. The starting
+   * position is measured in bytes and the return value is in
+   * terms of byte position in the buffer. The backing buffer is
+   * not converted to a string for this operation.
+   * @return byte position of the first occurence of the search
+   *         string in the UTF-8 buffer or -1 if not found
+   */
+  public int find(String what, int start) {
+    try {
+      ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length);
+      ByteBuffer tgt = encode(what);
+      byte b = tgt.get();
+      src.position(start);
+          
+      while (src.hasRemaining()) {
+        if (b == src.get()) { // matching first byte
+          src.mark(); // save position in loop
+          tgt.mark(); // save position in target
+          boolean found = true;
+          int pos = src.position()-1;
+          while (tgt.hasRemaining()) {
+            if (!src.hasRemaining()) { // src expired first
+              tgt.reset();
+              src.reset();
+              found = false;
+              break;
+            }
+            if (!(tgt.get() == src.get())) {
+              tgt.reset();
+              src.reset();
+              found = false;
+              break; // no match
+            }
+          }
+          if (found) return pos;
+        }
+      }
+      return -1; // not found
+    } catch (CharacterCodingException e) {
+      // can't get here
+      e.printStackTrace();
+      return -1;
+    }
+  }  
+  /** Set to contain the contents of a string. 
+   */
+  public void set(String string) {
+    try {
+      ByteBuffer bb = encode(string, true);
+      bytes = bb.array();
+      length = bb.limit();
+    }catch(CharacterCodingException e) {
+      throw new RuntimeException("Should not have happened " + e.toString()); 
+    }
+  }
+
+  /** Set to a utf8 byte array
+   */
+  public void set(byte[] utf8) {
+    set(utf8, 0, utf8.length);
+  }
+  
+  /** copy a text. */
+  public void set(Text other) {
+    set(other.getBytes(), 0, other.getLength());
+  }
+
+  /**
+   * Set the Text to range of bytes
+   * @param utf8 the data to copy from
+   * @param start the first position of the new string
+   * @param len the number of bytes of the new string
+   */
+  public void set(byte[] utf8, int start, int len) {
+    setCapacity(len, false);
+    System.arraycopy(utf8, start, bytes, 0, len);
+    this.length = len;
+  }
+
+  /**
+   * Append a range of bytes to the end of the given text
+   * @param utf8 the data to copy from
+   * @param start the first position to append from utf8
+   * @param len the number of bytes to append
+   */
+  public void append(byte[] utf8, int start, int len) {
+    setCapacity(length + len, true);
+    System.arraycopy(utf8, start, bytes, length, len);
+    length += len;
+  }
+
+  /**
+   * Clear the string to empty.
+   */
+  public void clear() {
+    length = 0;
+  }
+
+  /*
+   * Sets the capacity of this Text object to <em>at least</em>
+   * <code>len</code> bytes. If the current buffer is longer,
+   * then the capacity and existing content of the buffer are
+   * unchanged. If <code>len</code> is larger
+   * than the current capacity, the Text object's capacity is
+   * increased to match.
+   * @param len the number of bytes we need
+   * @param keepData should the old data be kept
+   */
+  private void setCapacity(int len, boolean keepData) {
+    if (bytes == null || bytes.length < len) {
+      byte[] newBytes = new byte[len];
+      if (bytes != null && keepData) {
+        System.arraycopy(bytes, 0, newBytes, 0, length);
+      }
+      bytes = newBytes;
+    }
+  }
+   
+  /** 
+   * Convert text back to string
+   * @see java.lang.Object#toString()
+   */
+  public String toString() {
+    try {
+      return decode(bytes, 0, length);
+    } catch (CharacterCodingException e) { 
+      throw new RuntimeException("Should not have happened " + e.toString()); 
+    }
+  }
+  
+  /** deserialize 
+   */
+  public void readFields(DataInput in) throws IOException {
+    int newLength = WritableUtils.readVInt(in);
+    setCapacity(newLength, false);
+    in.readFully(bytes, 0, newLength);
+    length = newLength;
+  }
+
+  /** Skips over one Text in the input. */
+  public static void skip(DataInput in) throws IOException {
+    int length = WritableUtils.readVInt(in);
+    WritableUtils.skipFully(in, length);
+  }
+
+  /** serialize
+   * write this object to out
+   * length uses zero-compressed encoding
+   * @see Writable#write(DataOutput)
+   */
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeVInt(out, length);
+    out.write(bytes, 0, length);
+  }
+
+  /** Returns true iff <code>o</code> is a Text with the same contents.  */
+  public boolean equals(Object o) {
+    if (o instanceof Text)
+      return super.equals(o);
+    return false;
+  }
+
+  public int hashCode() {
+    return super.hashCode();
+  }
+
+  /** A WritableComparator optimized for Text keys. */
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(Text.class);
+    }
+
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      int n1 = WritableUtils.decodeVIntSize(b1[s1]);
+      int n2 = WritableUtils.decodeVIntSize(b2[s2]);
+      return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2);
+    }
+  }
+
+  static {
+    // register this comparator
+    WritableComparator.define(Text.class, new Comparator());
+  }
+
+  /// STATIC UTILITIES FROM HERE DOWN
+  /**
+   * Converts the provided byte array to a String using the
+   * UTF-8 encoding. If the input is malformed,
+   * replace by a default value.
+   */
+  public static String decode(byte[] utf8) throws CharacterCodingException {
+    return decode(ByteBuffer.wrap(utf8), true);
+  }
+  
+  public static String decode(byte[] utf8, int start, int length) 
+    throws CharacterCodingException {
+    return decode(ByteBuffer.wrap(utf8, start, length), true);
+  }
+  
+  /**
+   * Converts the provided byte array to a String using the
+   * UTF-8 encoding. If <code>replace</code> is true, then
+   * malformed input is replaced with the
+   * substitution character, which is U+FFFD. Otherwise the
+   * method throws a MalformedInputException.
+   */
+  public static String decode(byte[] utf8, int start, int length, boolean replace) 
+    throws CharacterCodingException {
+    return decode(ByteBuffer.wrap(utf8, start, length), replace);
+  }
+  
+  private static String decode(ByteBuffer utf8, boolean replace) 
+    throws CharacterCodingException {
+    CharsetDecoder decoder = DECODER_FACTORY.get();
+    if (replace) {
+      decoder.onMalformedInput(
+          java.nio.charset.CodingErrorAction.REPLACE);
+      decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+    }
+    String str = decoder.decode(utf8).toString();
+    // set decoder back to its default value: REPORT
+    if (replace) {
+      decoder.onMalformedInput(CodingErrorAction.REPORT);
+      decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+    }
+    return str;
+  }
+
+  /**
+   * Converts the provided String to bytes using the
+   * UTF-8 encoding. If the input is malformed,
+   * invalid chars are replaced by a default value.
+   * @return ByteBuffer: bytes stores at ByteBuffer.array() 
+   *                     and length is ByteBuffer.limit()
+   */
+
+  public static ByteBuffer encode(String string)
+    throws CharacterCodingException {
+    return encode(string, true);
+  }
+
+  /**
+   * Converts the provided String to bytes using the
+   * UTF-8 encoding. If <code>replace</code> is true, then
+   * malformed input is replaced with the
+   * substitution character, which is U+FFFD. Otherwise the
+   * method throws a MalformedInputException.
+   * @return ByteBuffer: bytes stores at ByteBuffer.array() 
+   *                     and length is ByteBuffer.limit()
+   */
+  public static ByteBuffer encode(String string, boolean replace)
+    throws CharacterCodingException {
+    CharsetEncoder encoder = ENCODER_FACTORY.get();
+    if (replace) {
+      encoder.onMalformedInput(CodingErrorAction.REPLACE);
+      encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+    }
+    ByteBuffer bytes = 
+      encoder.encode(CharBuffer.wrap(string.toCharArray()));
+    if (replace) {
+      encoder.onMalformedInput(CodingErrorAction.REPORT);
+      encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+    }
+    return bytes;
+  }
+
+  /** Read a UTF8 encoded string from in
+   */
+  public static String readString(DataInput in) throws IOException {
+    int length = WritableUtils.readVInt(in);
+    byte [] bytes = new byte[length];
+    in.readFully(bytes, 0, length);
+    return decode(bytes);
+  }
+
+  /** Write a UTF8 encoded string to out
+   */
+  public static int writeString(DataOutput out, String s) throws IOException {
+    ByteBuffer bytes = encode(s);
+    int length = bytes.limit();
+    WritableUtils.writeVInt(out, length);
+    out.write(bytes.array(), 0, length);
+    return length;
+  }
+
+  ////// states for validateUTF8
+  
+  private static final int LEAD_BYTE = 0;
+
+  private static final int TRAIL_BYTE_1 = 1;
+
+  private static final int TRAIL_BYTE = 2;
+
+  /** 
+   * Check if a byte array contains valid utf-8
+   * @param utf8 byte array
+   * @throws MalformedInputException if the byte array contains invalid utf-8
+   */
+  public static void validateUTF8(byte[] utf8) throws MalformedInputException {
+    validateUTF8(utf8, 0, utf8.length);     
+  }
+  
+  /**
+   * Check to see if a byte array is valid utf-8
+   * @param utf8 the array of bytes
+   * @param start the offset of the first byte in the array
+   * @param len the length of the byte sequence
+   * @throws MalformedInputException if the byte array contains invalid bytes
+   */
+  public static void validateUTF8(byte[] utf8, int start, int len)
+    throws MalformedInputException {
+    int count = start;
+    int leadByte = 0;
+    int length = 0;
+    int state = LEAD_BYTE;
+    while (count < start+len) {
+      int aByte = ((int) utf8[count] & 0xFF);
+
+      switch (state) {
+      case LEAD_BYTE:
+        leadByte = aByte;
+        length = bytesFromUTF8[aByte];
+
+        switch (length) {
+        case 0: // check for ASCII
+          if (leadByte > 0x7F)
+            throw new MalformedInputException(count);
+          break;
+        case 1:
+          if (leadByte < 0xC2 || leadByte > 0xDF)
+            throw new MalformedInputException(count);
+          state = TRAIL_BYTE_1;
+          break;
+        case 2:
+          if (leadByte < 0xE0 || leadByte > 0xEF)
+            throw new MalformedInputException(count);
+          state = TRAIL_BYTE_1;
+          break;
+        case 3:
+          if (leadByte < 0xF0 || leadByte > 0xF4)
+            throw new MalformedInputException(count);
+          state = TRAIL_BYTE_1;
+          break;
+        default:
+          // too long! Longest valid UTF-8 is 4 bytes (lead + three)
+          // or if < 0 we got a trail byte in the lead byte position
+          throw new MalformedInputException(count);
+        } // switch (length)
+        break;
+
+      case TRAIL_BYTE_1:
+        if (leadByte == 0xF0 && aByte < 0x90)
+          throw new MalformedInputException(count);
+        if (leadByte == 0xF4 && aByte > 0x8F)
+          throw new MalformedInputException(count);
+        if (leadByte == 0xE0 && aByte < 0xA0)
+          throw new MalformedInputException(count);
+        if (leadByte == 0xED && aByte > 0x9F)
+          throw new MalformedInputException(count);
+        // falls through to regular trail-byte test!!
+      case TRAIL_BYTE:
+        if (aByte < 0x80 || aByte > 0xBF)
+          throw new MalformedInputException(count);
+        if (--length == 0) {
+          state = LEAD_BYTE;
+        } else {
+          state = TRAIL_BYTE;
+        }
+        break;
+      } // switch (state)
+      count++;
+    }
+  }
+
+  /**
+   * Magic numbers for UTF-8. These are the number of bytes
+   * that <em>follow</em> a given lead byte. Trailing bytes
+   * have the value -1. The values 4 and 5 are presented in
+   * this table, even though valid UTF-8 cannot include the
+   * five and six byte sequences.
+   */
+  static final int[] bytesFromUTF8 =
+  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0,
+    // trail bytes
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
+    3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 };
+
+  /**
+   * Returns the next code point at the current position in
+   * the buffer. The buffer's position will be incremented.
+   * Any mark set on this buffer will be changed by this method!
+   */
+  public static int bytesToCodePoint(ByteBuffer bytes) {
+    bytes.mark();
+    byte b = bytes.get();
+    bytes.reset();
+    int extraBytesToRead = bytesFromUTF8[(b & 0xFF)];
+    if (extraBytesToRead < 0) return -1; // trailing byte!
+    int ch = 0;
+
+    switch (extraBytesToRead) {
+    case 5: ch += (bytes.get() & 0xFF); ch <<= 6; /* remember, illegal UTF-8 */
+    case 4: ch += (bytes.get() & 0xFF); ch <<= 6; /* remember, illegal UTF-8 */
+    case 3: ch += (bytes.get() & 0xFF); ch <<= 6;
+    case 2: ch += (bytes.get() & 0xFF); ch <<= 6;
+    case 1: ch += (bytes.get() & 0xFF); ch <<= 6;
+    case 0: ch += (bytes.get() & 0xFF);
+    }
+    ch -= offsetsFromUTF8[extraBytesToRead];
+
+    return ch;
+  }
+
+  
+  static final int offsetsFromUTF8[] =
+  { 0x00000000, 0x00003080,
+    0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };
+
+  /**
+   * For the given string, returns the number of UTF-8 bytes
+   * required to encode the string.
+   * @param string text to encode
+   * @return number of UTF-8 bytes required to encode
+   */
+  public static int utf8Length(String string) {
+    CharacterIterator iter = new StringCharacterIterator(string);
+    char ch = iter.first();
+    int size = 0;
+    while (ch != CharacterIterator.DONE) {
+      if ((ch >= 0xD800) && (ch < 0xDC00)) {
+        // surrogate pair?
+        char trail = iter.next();
+        if ((trail > 0xDBFF) && (trail < 0xE000)) {
+          // valid pair
+          size += 4;
+        } else {
+          // invalid pair
+          size += 3;
+          iter.previous(); // rewind one
+        }
+      } else if (ch < 0x80) {
+        size++;
+      } else if (ch < 0x800) {
+        size += 2;
+      } else {
+        // ch < 0x10000, that is, the largest char value
+        size += 3;
+      }
+      ch = iter.next();
+    }
+    return size;
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/TwoDArrayWritable.java b/src/java/org/apache/hadoop/io/TwoDArrayWritable.java
new file mode 100644
index 00000000000..23463a2a124
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/TwoDArrayWritable.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+import java.lang.reflect.Array;
+
+/** A Writable for 2D arrays containing a matrix of instances of a class. */
+public class TwoDArrayWritable implements Writable {
+  private Class valueClass;
+  private Writable[][] values;
+
+  public TwoDArrayWritable(Class valueClass) {
+    this.valueClass = valueClass;
+  }
+
+  public TwoDArrayWritable(Class valueClass, Writable[][] values) {
+    this(valueClass);
+    this.values = values;
+  }
+
+  public Object toArray() {
+    int dimensions[] = {values.length, 0};
+    Object result = Array.newInstance(valueClass, dimensions);
+    for (int i = 0; i < values.length; i++) {
+      Object resultRow = Array.newInstance(valueClass, values[i].length);
+      Array.set(result, i, resultRow);
+      for (int j = 0; j < values[i].length; j++) {
+        Array.set(resultRow, j, values[i][j]);
+      }
+    }
+    return result;
+  }
+
+  public void set(Writable[][] values) { this.values = values; }
+
+  public Writable[][] get() { return values; }
+
+  public void readFields(DataInput in) throws IOException {
+    // construct matrix
+    values = new Writable[in.readInt()][];          
+    for (int i = 0; i < values.length; i++) {
+      values[i] = new Writable[in.readInt()];
+    }
+
+    // construct values
+    for (int i = 0; i < values.length; i++) {
+      for (int j = 0; j < values[i].length; j++) {
+        Writable value;                             // construct value
+        try {
+          value = (Writable)valueClass.newInstance();
+        } catch (InstantiationException e) {
+          throw new RuntimeException(e.toString());
+        } catch (IllegalAccessException e) {
+          throw new RuntimeException(e.toString());
+        }
+        value.readFields(in);                       // read a value
+        values[i][j] = value;                       // store it in values
+      }
+    }
+  }
+
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(values.length);                 // write values
+    for (int i = 0; i < values.length; i++) {
+      out.writeInt(values[i].length);
+    }
+    for (int i = 0; i < values.length; i++) {
+      for (int j = 0; j < values[i].length; j++) {
+        values[i][j].write(out);
+      }
+    }
+  }
+}
+
diff --git a/src/java/org/apache/hadoop/io/UTF8.java b/src/java/org/apache/hadoop/io/UTF8.java
new file mode 100644
index 00000000000..d9f45f7e6b4
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/UTF8.java
@@ -0,0 +1,286 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.IOException;
+import java.io.DataInput;
+import java.io.DataOutput;
+
+
+import org.apache.commons.logging.*;
+
+/** A WritableComparable for strings that uses the UTF8 encoding.
+ * 
+ * <p>Also includes utilities for efficiently reading and writing UTF-8.
+ *
+ * @deprecated replaced by Text
+ */
+public class UTF8 implements WritableComparable {
+  private static final Log LOG= LogFactory.getLog(UTF8.class);
+  private static final DataOutputBuffer OBUF = new DataOutputBuffer();
+  private static final DataInputBuffer IBUF = new DataInputBuffer();
+
+  private static final byte[] EMPTY_BYTES = new byte[0];
+
+  private byte[] bytes = EMPTY_BYTES;
+  private int length;
+
+  public UTF8() {
+    //set("");
+  }
+
+  /** Construct from a given string. */
+  public UTF8(String string) {
+    set(string);
+  }
+
+  /** Construct from a given string. */
+  public UTF8(UTF8 utf8) {
+    set(utf8);
+  }
+
+  /** The raw bytes. */
+  public byte[] getBytes() {
+    return bytes;
+  }
+
+  /** The number of bytes in the encoded string. */
+  public int getLength() {
+    return length;
+  }
+
+  /** Set to contain the contents of a string. */
+  public void set(String string) {
+    if (string.length() > 0xffff/3) {             // maybe too long
+      LOG.warn("truncating long string: " + string.length()
+               + " chars, starting with " + string.substring(0, 20));
+      string = string.substring(0, 0xffff/3);
+    }
+
+    length = utf8Length(string);                  // compute length
+    if (length > 0xffff)                          // double-check length
+      throw new RuntimeException("string too long!");
+
+    if (bytes == null || length > bytes.length)   // grow buffer
+      bytes = new byte[length];
+
+    try {                                         // avoid sync'd allocations
+      synchronized (OBUF) {
+        OBUF.reset();
+        writeChars(OBUF, string, 0, string.length());
+        System.arraycopy(OBUF.getData(), 0, bytes, 0, length);
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /** Set to contain the contents of a string. */
+  public void set(UTF8 other) {
+    length = other.length;
+    if (bytes == null || length > bytes.length)   // grow buffer
+      bytes = new byte[length];
+    System.arraycopy(other.bytes, 0, bytes, 0, length);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    length = in.readUnsignedShort();
+    if (bytes == null || bytes.length < length)
+      bytes = new byte[length];
+    in.readFully(bytes, 0, length);
+  }
+
+  /** Skips over one UTF8 in the input. */
+  public static void skip(DataInput in) throws IOException {
+    int length = in.readUnsignedShort();
+    WritableUtils.skipFully(in, length);
+  }
+
+  public void write(DataOutput out) throws IOException {
+    out.writeShort(length);
+    out.write(bytes, 0, length);
+  }
+
+  /** Compare two UTF8s. */
+  public int compareTo(Object o) {
+    UTF8 that = (UTF8)o;
+    return WritableComparator.compareBytes(bytes, 0, length,
+                                           that.bytes, 0, that.length);
+  }
+
+  /** Convert to a String. */
+  public String toString() {
+    StringBuffer buffer = new StringBuffer(length);
+    try {
+      synchronized (IBUF) {
+        IBUF.reset(bytes, length);
+        readChars(IBUF, buffer, length);
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return buffer.toString();
+  }
+
+  /** Returns true iff <code>o</code> is a UTF8 with the same contents.  */
+  public boolean equals(Object o) {
+    if (!(o instanceof UTF8))
+      return false;
+    UTF8 that = (UTF8)o;
+    if (this.length != that.length)
+      return false;
+    else
+      return WritableComparator.compareBytes(bytes, 0, length,
+                                             that.bytes, 0, that.length) == 0;
+  }
+
+  public int hashCode() {
+    return WritableComparator.hashBytes(bytes, length);
+  }
+
+  /** A WritableComparator optimized for UTF8 keys. */
+  public static class Comparator extends WritableComparator {
+    public Comparator() {
+      super(UTF8.class);
+    }
+
+    public int compare(byte[] b1, int s1, int l1,
+                       byte[] b2, int s2, int l2) {
+      int n1 = readUnsignedShort(b1, s1);
+      int n2 = readUnsignedShort(b2, s2);
+      return compareBytes(b1, s1+2, n1, b2, s2+2, n2);
+    }
+  }
+
+  static {                                        // register this comparator
+    WritableComparator.define(UTF8.class, new Comparator());
+  }
+
+  /// STATIC UTILITIES FROM HERE DOWN
+
+  /// These are probably not used much anymore, and might be removed...
+
+  /** Convert a string to a UTF-8 encoded byte array.
+   * @see String#getBytes(String)
+   */
+  public static byte[] getBytes(String string) {
+    byte[] result = new byte[utf8Length(string)];
+    try {                                         // avoid sync'd allocations
+      synchronized (OBUF) {
+        OBUF.reset();
+        writeChars(OBUF, string, 0, string.length());
+        System.arraycopy(OBUF.getData(), 0, result, 0, OBUF.getLength());
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return result;
+  }
+
+  /** Read a UTF-8 encoded string.
+   *
+   * @see DataInput#readUTF()
+   */
+  public static String readString(DataInput in) throws IOException {
+    int bytes = in.readUnsignedShort();
+    StringBuffer buffer = new StringBuffer(bytes);
+    readChars(in, buffer, bytes);
+    return buffer.toString();
+  }
+
+  private static void readChars(DataInput in, StringBuffer buffer, int nBytes)
+    throws IOException {
+    synchronized (OBUF) {
+      OBUF.reset();
+      OBUF.write(in, nBytes);
+      byte[] bytes = OBUF.getData();
+      int i = 0;
+      while (i < nBytes) {
+        byte b = bytes[i++];
+        if ((b & 0x80) == 0) {
+          buffer.append((char)(b & 0x7F));
+        } else if ((b & 0xE0) != 0xE0) {
+          buffer.append((char)(((b & 0x1F) << 6)
+                               | (bytes[i++] & 0x3F)));
+        } else {
+          buffer.append((char)(((b & 0x0F) << 12)
+                               | ((bytes[i++] & 0x3F) << 6)
+                               |  (bytes[i++] & 0x3F)));
+        }
+      }
+    }
+  }
+
+  /** Write a UTF-8 encoded string.
+   *
+   * @see DataOutput#writeUTF(String)
+   */
+  public static int writeString(DataOutput out, String s) throws IOException {
+    if (s.length() > 0xffff/3) {         // maybe too long
+      LOG.warn("truncating long string: " + s.length()
+               + " chars, starting with " + s.substring(0, 20));
+      s = s.substring(0, 0xffff/3);
+    }
+
+    int len = utf8Length(s);
+    if (len > 0xffff)                             // double-check length
+      throw new IOException("string too long!");
+      
+    out.writeShort(len);
+    writeChars(out, s, 0, s.length());
+    return len;
+  }
+
+  /** Returns the number of bytes required to write this. */
+  private static int utf8Length(String string) {
+    int stringLength = string.length();
+    int utf8Length = 0;
+    for (int i = 0; i < stringLength; i++) {
+      int c = string.charAt(i);
+      if ((c >= 0x0001) && (c <= 0x007F)) {
+        utf8Length++;
+      } else if (c > 0x07FF) {
+        utf8Length += 3;
+      } else {
+        utf8Length += 2;
+      }
+    }
+    return utf8Length;
+  }
+
+  private static void writeChars(DataOutput out,
+                                 String s, int start, int length)
+    throws IOException {
+    final int end = start + length;
+    for (int i = start; i < end; i++) {
+      int code = s.charAt(i);
+      if (code >= 0x01 && code <= 0x7F) {
+        out.writeByte((byte)code);
+      } else if (code <= 0x07FF) {
+        out.writeByte((byte)(0xC0 | ((code >> 6) & 0x1F)));
+        out.writeByte((byte)(0x80 |   code       & 0x3F));
+      } else {
+        out.writeByte((byte)(0xE0 | ((code >> 12) & 0X0F)));
+        out.writeByte((byte)(0x80 | ((code >>  6) & 0x3F)));
+        out.writeByte((byte)(0x80 |  (code        & 0x3F)));
+      }
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/VIntWritable.java b/src/java/org/apache/hadoop/io/VIntWritable.java
new file mode 100644
index 00000000000..a8af11bcfff
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/VIntWritable.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** A WritableComparable for integer values stored in variable-length format.
+ * Such values take between one and five bytes.  Smaller values take fewer bytes.
+ * 
+ * @see org.apache.hadoop.io.WritableUtils#readVInt(DataInput)
+ */
+public class VIntWritable implements WritableComparable {
+  private int value;
+
+  public VIntWritable() {}
+
+  public VIntWritable(int value) { set(value); }
+
+  /** Set the value of this VIntWritable. */
+  public void set(int value) { this.value = value; }
+
+  /** Return the value of this VIntWritable. */
+  public int get() { return value; }
+
+  public void readFields(DataInput in) throws IOException {
+    value = WritableUtils.readVInt(in);
+  }
+
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeVInt(out, value);
+  }
+
+  /** Returns true iff <code>o</code> is a VIntWritable with the same value. */
+  public boolean equals(Object o) {
+    if (!(o instanceof VIntWritable))
+      return false;
+    VIntWritable other = (VIntWritable)o;
+    return this.value == other.value;
+  }
+
+  public int hashCode() {
+    return value;
+  }
+
+  /** Compares two VIntWritables. */
+  public int compareTo(Object o) {
+    int thisValue = this.value;
+    int thatValue = ((VIntWritable)o).value;
+    return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
+  }
+
+  public String toString() {
+    return Integer.toString(value);
+  }
+
+}
+
diff --git a/src/java/org/apache/hadoop/io/VLongWritable.java b/src/java/org/apache/hadoop/io/VLongWritable.java
new file mode 100644
index 00000000000..14d8602275f
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/VLongWritable.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+/** A WritableComparable for longs in a variable-length format. Such values take
+ *  between one and five bytes.  Smaller values take fewer bytes.
+ *  
+ *  @see org.apache.hadoop.io.WritableUtils#readVLong(DataInput)
+ */
+public class VLongWritable implements WritableComparable {
+  private long value;
+
+  public VLongWritable() {}
+
+  public VLongWritable(long value) { set(value); }
+
+  /** Set the value of this LongWritable. */
+  public void set(long value) { this.value = value; }
+
+  /** Return the value of this LongWritable. */
+  public long get() { return value; }
+
+  public void readFields(DataInput in) throws IOException {
+    value = WritableUtils.readVLong(in);
+  }
+
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeVLong(out, value);
+  }
+
+  /** Returns true iff <code>o</code> is a VLongWritable with the same value. */
+  public boolean equals(Object o) {
+    if (!(o instanceof VLongWritable))
+      return false;
+    VLongWritable other = (VLongWritable)o;
+    return this.value == other.value;
+  }
+
+  public int hashCode() {
+    return (int)value;
+  }
+
+  /** Compares two VLongWritables. */
+  public int compareTo(Object o) {
+    long thisValue = this.value;
+    long thatValue = ((VLongWritable)o).value;
+    return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
+  }
+
+  public String toString() {
+    return Long.toString(value);
+  }
+
+}
+
diff --git a/src/java/org/apache/hadoop/io/VersionMismatchException.java b/src/java/org/apache/hadoop/io/VersionMismatchException.java
new file mode 100644
index 00000000000..5f57908fd5a
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/VersionMismatchException.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.IOException;
+
+/** Thrown by {@link VersionedWritable#readFields(DataInput)} when the
+ * version of an object being read does not match the current implementation
+ * version as returned by {@link VersionedWritable#getVersion()}. */
+public class VersionMismatchException extends IOException {
+
+  private byte expectedVersion;
+  private byte foundVersion;
+
+  public VersionMismatchException(byte expectedVersionIn, byte foundVersionIn){
+    expectedVersion = expectedVersionIn;
+    foundVersion = foundVersionIn;
+  }
+
+  /** Returns a string representation of this object. */
+  public String toString(){
+    return "A record version mismatch occured. Expecting v"
+      + expectedVersion + ", found v" + foundVersion; 
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/VersionedWritable.java b/src/java/org/apache/hadoop/io/VersionedWritable.java
new file mode 100644
index 00000000000..3ca4fe919ab
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/VersionedWritable.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.DataOutput;
+import java.io.DataInput;
+import java.io.IOException;
+
+/** A base class for Writables that provides version checking.
+ *
+ * <p>This is useful when a class may evolve, so that instances written by the
+ * old version of the class may still be processed by the new version.  To
+ * handle this situation, {@link #readFields(DataInput)}
+ * implementations should catch {@link VersionMismatchException}.
+ */
+public abstract class VersionedWritable implements Writable {
+
+  /** Return the version number of the current implementation. */
+  public abstract byte getVersion();
+    
+  // javadoc from Writable
+  public void write(DataOutput out) throws IOException {
+    out.writeByte(getVersion());                  // store version
+  }
+
+  // javadoc from Writable
+  public void readFields(DataInput in) throws IOException {
+    byte version = in.readByte();                 // read version
+    if (version != getVersion())
+      throw new VersionMismatchException(getVersion(), version);
+  }
+
+    
+}
diff --git a/src/java/org/apache/hadoop/io/Writable.java b/src/java/org/apache/hadoop/io/Writable.java
new file mode 100644
index 00000000000..b61e5b5c34a
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/Writable.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.DataOutput;
+import java.io.DataInput;
+import java.io.IOException;
+
+/**
+ * A serializable object which implements a simple, efficient, serialization 
+ * protocol, based on {@link DataInput} and {@link DataOutput}.
+ *
+ * <p>Any <code>key</code> or <code>value</code> type in the Hadoop Map-Reduce
+ * framework implements this interface.</p>
+ * 
+ * <p>Implementations typically implement a static <code>read(DataInput)</code>
+ * method which constructs a new instance, calls {@link #readFields(DataInput)} 
+ * and returns the instance.</p>
+ * 
+ * <p>Example:</p>
+ * <p><blockquote><pre>
+ *     public class MyWritable implements Writable {
+ *       // Some data     
+ *       private int counter;
+ *       private long timestamp;
+ *       
+ *       public void write(DataOutput out) throws IOException {
+ *         out.writeInt(counter);
+ *         out.writeLong(timestamp);
+ *       }
+ *       
+ *       public void readFields(DataInput in) throws IOException {
+ *         counter = in.readInt();
+ *         timestamp = in.readLong();
+ *       }
+ *       
+ *       public static MyWritable read(DataInput in) throws IOException {
+ *         MyWritable w = new MyWritable();
+ *         w.readFields(in);
+ *         return w;
+ *       }
+ *     }
+ * </pre></blockquote></p>
+ */
+public interface Writable {
+  /** 
+   * Serialize the fields of this object to <code>out</code>.
+   * 
+   * @param out <code>DataOuput</code> to serialize this object into.
+   * @throws IOException
+   */
+  void write(DataOutput out) throws IOException;
+
+  /** 
+   * Deserialize the fields of this object from <code>in</code>.  
+   * 
+   * <p>For efficiency, implementations should attempt to re-use storage in the 
+   * existing object where possible.</p>
+   * 
+   * @param in <code>DataInput</code> to deseriablize this object from.
+   * @throws IOException
+   */
+  void readFields(DataInput in) throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/io/WritableComparable.java b/src/java/org/apache/hadoop/io/WritableComparable.java
new file mode 100644
index 00000000000..b8aaf731cc5
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/WritableComparable.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+/**
+ * A {@link Writable} which is also {@link Comparable}. 
+ *
+ * <p><code>WritableComparable</code>s can be compared to each other, typically 
+ * via <code>Comparator</code>s. Any type which is to be used as a 
+ * <code>key</code> in the Hadoop Map-Reduce framework should implement this
+ * interface.</p>
+ *  
+ * <p>Example:</p>
+ * <p><blockquote><pre>
+ *     public class MyWritableComparable implements WritableComparable {
+ *       // Some data
+ *       private int counter;
+ *       private long timestamp;
+ *       
+ *       public void write(DataOutput out) throws IOException {
+ *         out.writeInt(counter);
+ *         out.writeLong(timestamp);
+ *       }
+ *       
+ *       public void readFields(DataInput in) throws IOException {
+ *         counter = in.readInt();
+ *         timestamp = in.readLong();
+ *       }
+ *       
+ *       public int compareTo(MyWritableComparable w) {
+ *         int thisValue = this.value;
+ *         int thatValue = ((IntWritable)o).value;
+ *         return (thisValue &lt; thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ *       }
+ *     }
+ * </pre></blockquote></p>
+ */
+public interface WritableComparable<T> extends Writable, Comparable<T> {
+}
diff --git a/src/java/org/apache/hadoop/io/WritableComparator.java b/src/java/org/apache/hadoop/io/WritableComparator.java
new file mode 100644
index 00000000000..b0b08b4126c
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/WritableComparator.java
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.hadoop.util.ReflectionUtils;
+
+/** A Comparator for {@link WritableComparable}s.
+ *
+ * <p>This base implemenation uses the natural ordering.  To define alternate
+ * orderings, override {@link #compare(WritableComparable,WritableComparable)}.
+ *
+ * <p>One may optimize compare-intensive operations by overriding
+ * {@link #compare(byte[],int,int,byte[],int,int)}.  Static utility methods are
+ * provided to assist in optimized implementations of this method.
+ */
+public class WritableComparator implements RawComparator {
+
+  private static HashMap<Class, WritableComparator> comparators =
+    new HashMap<Class, WritableComparator>(); // registry
+
+  /** Get a comparator for a {@link WritableComparable} implementation. */
+  public static synchronized WritableComparator get(Class<? extends WritableComparable> c) {
+    WritableComparator comparator = comparators.get(c);
+    if (comparator == null)
+      comparator = new WritableComparator(c, true);
+    return comparator;
+  }
+
+  /** Register an optimized comparator for a {@link WritableComparable}
+   * implementation. */
+  public static synchronized void define(Class c,
+                                         WritableComparator comparator) {
+    comparators.put(c, comparator);
+  }
+
+
+  private final Class<? extends WritableComparable> keyClass;
+  private final WritableComparable key1;
+  private final WritableComparable key2;
+  private final DataInputBuffer buffer;
+
+  /** Construct for a {@link WritableComparable} implementation. */
+  protected WritableComparator(Class<? extends WritableComparable> keyClass) {
+    this(keyClass, false);
+  }
+
+  protected WritableComparator(Class<? extends WritableComparable> keyClass,
+      boolean createInstances) {
+    this.keyClass = keyClass;
+    if (createInstances) {
+      key1 = newKey();
+      key2 = newKey();
+      buffer = new DataInputBuffer();
+    } else {
+      key1 = key2 = null;
+      buffer = null;
+    }
+  }
+
+  /** Returns the WritableComparable implementation class. */
+  public Class<? extends WritableComparable> getKeyClass() { return keyClass; }
+
+  /** Construct a new {@link WritableComparable} instance. */
+  public WritableComparable newKey() {
+    return ReflectionUtils.newInstance(keyClass, null);
+  }
+
+  /** Optimization hook.  Override this to make SequenceFile.Sorter's scream.
+   *
+   * <p>The default implementation reads the data into two {@link
+   * WritableComparable}s (using {@link
+   * Writable#readFields(DataInput)}, then calls {@link
+   * #compare(WritableComparable,WritableComparable)}.
+   */
+  public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+    try {
+      buffer.reset(b1, s1, l1);                   // parse key1
+      key1.readFields(buffer);
+      
+      buffer.reset(b2, s2, l2);                   // parse key2
+      key2.readFields(buffer);
+      
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    
+    return compare(key1, key2);                   // compare them
+  }
+
+  /** Compare two WritableComparables.
+   *
+   * <p> The default implementation uses the natural ordering, calling {@link
+   * Comparable#compareTo(Object)}. */
+  @SuppressWarnings("unchecked")
+  public int compare(WritableComparable a, WritableComparable b) {
+    return a.compareTo(b);
+  }
+
+  public int compare(Object a, Object b) {
+    return compare((WritableComparable)a, (WritableComparable)b);
+  }
+
+  /** Lexicographic order of binary data. */
+  public static int compareBytes(byte[] b1, int s1, int l1,
+                                 byte[] b2, int s2, int l2) {
+    int end1 = s1 + l1;
+    int end2 = s2 + l2;
+    for (int i = s1, j = s2; i < end1 && j < end2; i++, j++) {
+      int a = (b1[i] & 0xff);
+      int b = (b2[j] & 0xff);
+      if (a != b) {
+        return a - b;
+      }
+    }
+    return l1 - l2;
+  }
+
+  /** Compute hash for binary data. */
+  public static int hashBytes(byte[] bytes, int offset, int length) {
+    int hash = 1;
+    for (int i = offset; i < offset + length; i++)
+      hash = (31 * hash) + (int)bytes[i];
+    return hash;
+  }
+  
+  /** Compute hash for binary data. */
+  public static int hashBytes(byte[] bytes, int length) {
+    return hashBytes(bytes, 0, length);
+  }
+
+  /** Parse an unsigned short from a byte array. */
+  public static int readUnsignedShort(byte[] bytes, int start) {
+    return (((bytes[start]   & 0xff) <<  8) +
+            ((bytes[start+1] & 0xff)));
+  }
+
+  /** Parse an integer from a byte array. */
+  public static int readInt(byte[] bytes, int start) {
+    return (((bytes[start  ] & 0xff) << 24) +
+            ((bytes[start+1] & 0xff) << 16) +
+            ((bytes[start+2] & 0xff) <<  8) +
+            ((bytes[start+3] & 0xff)));
+
+  }
+
+  /** Parse a float from a byte array. */
+  public static float readFloat(byte[] bytes, int start) {
+    return Float.intBitsToFloat(readInt(bytes, start));
+  }
+
+  /** Parse a long from a byte array. */
+  public static long readLong(byte[] bytes, int start) {
+    return ((long)(readInt(bytes, start)) << 32) +
+      (readInt(bytes, start+4) & 0xFFFFFFFFL);
+  }
+
+  /** Parse a double from a byte array. */
+  public static double readDouble(byte[] bytes, int start) {
+    return Double.longBitsToDouble(readLong(bytes, start));
+  }
+
+  /**
+   * Reads a zero-compressed encoded long from a byte array and returns it.
+   * @param bytes byte array with decode long
+   * @param start starting index
+   * @throws java.io.IOException 
+   * @return deserialized long
+   */
+  public static long readVLong(byte[] bytes, int start) throws IOException {
+    int len = bytes[start];
+    if (len >= -112) {
+      return len;
+    }
+    boolean isNegative = (len < -120);
+    len = isNegative ? -(len + 120) : -(len + 112);
+    if (start+1+len>bytes.length)
+      throw new IOException(
+                            "Not enough number of bytes for a zero-compressed integer");
+    long i = 0;
+    for (int idx = 0; idx < len; idx++) {
+      i = i << 8;
+      i = i | (bytes[start+1+idx] & 0xFF);
+    }
+    return (isNegative ? (i ^ -1L) : i);
+  }
+  
+  /**
+   * Reads a zero-compressed encoded integer from a byte array and returns it.
+   * @param bytes byte array with the encoded integer
+   * @param start start index
+   * @throws java.io.IOException 
+   * @return deserialized integer
+   */
+  public static int readVInt(byte[] bytes, int start) throws IOException {
+    return (int) readVLong(bytes, start);
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/WritableFactories.java b/src/java/org/apache/hadoop/io/WritableFactories.java
new file mode 100644
index 00000000000..92569bd7bae
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/WritableFactories.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.ReflectionUtils;
+import java.util.HashMap;
+
+/** Factories for non-public writables.  Defining a factory permits {@link
+ * ObjectWritable} to be able to construct instances of non-public classes. */
+public class WritableFactories {
+  private static final HashMap<Class, WritableFactory> CLASS_TO_FACTORY =
+    new HashMap<Class, WritableFactory>();
+
+  private WritableFactories() {}                  // singleton
+
+  /** Define a factory for a class. */
+  public static synchronized void setFactory(Class c, WritableFactory factory) {
+    CLASS_TO_FACTORY.put(c, factory);
+  }
+
+  /** Define a factory for a class. */
+  public static synchronized WritableFactory getFactory(Class c) {
+    return CLASS_TO_FACTORY.get(c);
+  }
+
+  /** Create a new instance of a class with a defined factory. */
+  public static Writable newInstance(Class<? extends Writable> c, Configuration conf) {
+    WritableFactory factory = WritableFactories.getFactory(c);
+    if (factory != null) {
+      Writable result = factory.newInstance();
+      if (result instanceof Configurable) {
+        ((Configurable) result).setConf(conf);
+      }
+      return result;
+    } else {
+      return ReflectionUtils.newInstance(c, conf);
+    }
+  }
+  
+  /** Create a new instance of a class with a defined factory. */
+  public static Writable newInstance(Class<? extends Writable> c) {
+    return newInstance(c, null);
+  }
+
+}
+
diff --git a/src/java/org/apache/hadoop/io/WritableFactory.java b/src/java/org/apache/hadoop/io/WritableFactory.java
new file mode 100644
index 00000000000..736485eef57
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/WritableFactory.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+/** A factory for a class of Writable.
+ * @see WritableFactories
+ */
+public interface WritableFactory {
+  /** Return a new instance. */
+  Writable newInstance();
+}
+
diff --git a/src/java/org/apache/hadoop/io/WritableName.java b/src/java/org/apache/hadoop/io/WritableName.java
new file mode 100644
index 00000000000..6b6c1480b55
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/WritableName.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.util.HashMap;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+
+/** Utility to permit renaming of Writable implementation classes without
+ * invalidiating files that contain their class name.
+ */
+public class WritableName {
+  private static HashMap<String, Class<?>> NAME_TO_CLASS =
+    new HashMap<String, Class<?>>();
+  private static HashMap<Class<?>, String> CLASS_TO_NAME =
+    new HashMap<Class<?>, String>();
+
+  static {                                        // define important types
+    WritableName.setName(NullWritable.class, "null");
+    WritableName.setName(LongWritable.class, "long");
+    WritableName.setName(UTF8.class, "UTF8");
+    WritableName.setName(MD5Hash.class, "MD5Hash");
+  }
+
+  private WritableName() {}                      // no public ctor
+
+  /** Set the name that a class should be known as to something other than the
+   * class name. */
+  public static synchronized void setName(Class writableClass, String name) {
+    CLASS_TO_NAME.put(writableClass, name);
+    NAME_TO_CLASS.put(name, writableClass);
+  }
+
+  /** Add an alternate name for a class. */
+  public static synchronized void addName(Class writableClass, String name) {
+    NAME_TO_CLASS.put(name, writableClass);
+  }
+
+  /** Return the name for a class.  Default is {@link Class#getName()}. */
+  public static synchronized String getName(Class writableClass) {
+    String name = CLASS_TO_NAME.get(writableClass);
+    if (name != null)
+      return name;
+    return writableClass.getName();
+  }
+
+  /** Return the class for a name.  Default is {@link Class#forName(String)}.*/
+  public static synchronized Class<?> getClass(String name, Configuration conf
+                                            ) throws IOException {
+    Class<?> writableClass = NAME_TO_CLASS.get(name);
+    if (writableClass != null)
+      return writableClass.asSubclass(Writable.class);
+    try {
+      return conf.getClassByName(name);
+    } catch (ClassNotFoundException e) {
+      IOException newE = new IOException("WritableName can't load class: " + name);
+      newE.initCause(e);
+      throw newE;
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/WritableUtils.java b/src/java/org/apache/hadoop/io/WritableUtils.java
new file mode 100644
index 00000000000..e49ea9240c2
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/WritableUtils.java
@@ -0,0 +1,418 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+public final class WritableUtils  {
+
+  public static byte[] readCompressedByteArray(DataInput in) throws IOException {
+    int length = in.readInt();
+    if (length == -1) return null;
+    byte[] buffer = new byte[length];
+    in.readFully(buffer);      // could/should use readFully(buffer,0,length)?
+    GZIPInputStream gzi = new GZIPInputStream(new ByteArrayInputStream(buffer, 0, buffer.length));
+    byte[] outbuf = new byte[length];
+    ByteArrayOutputStream bos =  new ByteArrayOutputStream();
+    int len;
+    while((len=gzi.read(outbuf, 0, outbuf.length)) != -1){
+      bos.write(outbuf, 0, len);
+    }
+    byte[] decompressed =  bos.toByteArray();
+    bos.close();
+    gzi.close();
+    return decompressed;
+  }
+
+  public static void skipCompressedByteArray(DataInput in) throws IOException {
+    int length = in.readInt();
+    if (length != -1) {
+      skipFully(in, length);
+    }
+  }
+
+  public static int  writeCompressedByteArray(DataOutput out, byte[] bytes) throws IOException {
+    if (bytes != null) {
+      ByteArrayOutputStream bos =  new ByteArrayOutputStream();
+      GZIPOutputStream gzout = new GZIPOutputStream(bos);
+      gzout.write(bytes, 0, bytes.length);
+      gzout.close();
+      byte[] buffer = bos.toByteArray();
+      int len = buffer.length;
+      out.writeInt(len);
+      out.write(buffer, 0, len);
+      /* debug only! Once we have confidence, can lose this. */
+      return ((bytes.length != 0) ? (100*buffer.length)/bytes.length : 0);
+    } else {
+      out.writeInt(-1);
+      return -1;
+    }
+  }
+
+
+  /* Ugly utility, maybe someone else can do this better  */
+  public static String readCompressedString(DataInput in) throws IOException {
+    byte[] bytes = readCompressedByteArray(in);
+    if (bytes == null) return null;
+    return new String(bytes, "UTF-8");
+  }
+
+
+  public static int  writeCompressedString(DataOutput out, String s) throws IOException {
+    return writeCompressedByteArray(out, (s != null) ? s.getBytes("UTF-8") : null);
+  }
+
+  /*
+   *
+   * Write a String as a Network Int n, followed by n Bytes
+   * Alternative to 16 bit read/writeUTF.
+   * Encoding standard is... ?
+   * 
+   */
+  public static void writeString(DataOutput out, String s) throws IOException {
+    if (s != null) {
+      byte[] buffer = s.getBytes("UTF-8");
+      int len = buffer.length;
+      out.writeInt(len);
+      out.write(buffer, 0, len);
+    } else {
+      out.writeInt(-1);
+    }
+  }
+
+  /*
+   * Read a String as a Network Int n, followed by n Bytes
+   * Alternative to 16 bit read/writeUTF.
+   * Encoding standard is... ?
+   *
+   */
+  public static String readString(DataInput in) throws IOException{
+    int length = in.readInt();
+    if (length == -1) return null;
+    byte[] buffer = new byte[length];
+    in.readFully(buffer);      // could/should use readFully(buffer,0,length)?
+    return new String(buffer,"UTF-8");  
+  }
+
+
+  /*
+   * Write a String array as a Nework Int N, followed by Int N Byte Array Strings.
+   * Could be generalised using introspection.
+   *
+   */
+  public static void writeStringArray(DataOutput out, String[] s) throws IOException{
+    out.writeInt(s.length);
+    for(int i = 0; i < s.length; i++) {
+      writeString(out, s[i]);
+    }
+  }
+
+  /*
+   * Write a String array as a Nework Int N, followed by Int N Byte Array of
+   * compressed Strings. Handles also null arrays and null values.
+   * Could be generalised using introspection.
+   *
+   */
+  public static void writeCompressedStringArray(DataOutput out, String[] s) throws IOException{
+    if (s == null) {
+      out.writeInt(-1);
+      return;
+    }
+    out.writeInt(s.length);
+    for(int i = 0; i < s.length; i++) {
+      writeCompressedString(out, s[i]);
+    }
+  }
+
+  /*
+   * Write a String array as a Nework Int N, followed by Int N Byte Array Strings.
+   * Could be generalised using introspection. Actually this bit couldn't...
+   *
+   */
+  public static String[] readStringArray(DataInput in) throws IOException {
+    int len = in.readInt();
+    if (len == -1) return null;
+    String[] s = new String[len];
+    for(int i = 0; i < len; i++) {
+      s[i] = readString(in);
+    }
+    return s;
+  }
+
+
+  /*
+   * Write a String array as a Nework Int N, followed by Int N Byte Array Strings.
+   * Could be generalised using introspection. Handles null arrays and null values.
+   *
+   */
+  public static  String[] readCompressedStringArray(DataInput in) throws IOException {
+    int len = in.readInt();
+    if (len == -1) return null;
+    String[] s = new String[len];
+    for(int i = 0; i < len; i++) {
+      s[i] = readCompressedString(in);
+    }
+    return s;
+  }
+
+
+  /*
+   *
+   * Test Utility Method Display Byte Array. 
+   *
+   */
+  public static void displayByteArray(byte[] record){
+    int i;
+    for(i=0;i < record.length -1; i++){
+      if (i % 16 == 0) { System.out.println(); }
+      System.out.print(Integer.toHexString(record[i]  >> 4 & 0x0F));
+      System.out.print(Integer.toHexString(record[i] & 0x0F));
+      System.out.print(",");
+    }
+    System.out.print(Integer.toHexString(record[i]  >> 4 & 0x0F));
+    System.out.print(Integer.toHexString(record[i] & 0x0F));
+    System.out.println();
+  }
+
+  /**
+   * Make a copy of a writable object using serialization to a buffer.
+   * @param orig The object to copy
+   * @return The copied object
+   */
+  public static <T extends Writable> T clone(T orig, Configuration conf) {
+    try {
+      @SuppressWarnings("unchecked") // Unchecked cast from Class to Class<T>
+      T newInst = ReflectionUtils.newInstance((Class<T>) orig.getClass(), conf);
+      ReflectionUtils.copy(conf, orig, newInst);
+      return newInst;
+    } catch (IOException e) {
+      throw new RuntimeException("Error writing/reading clone buffer", e);
+    }
+  }
+
+  /**
+   * Make a copy of the writable object using serialiation to a buffer
+   * @param dst the object to copy from
+   * @param src the object to copy into, which is destroyed
+   * @throws IOException
+   * @deprecated use ReflectionUtils.cloneInto instead.
+   */
+  @Deprecated
+  public static void cloneInto(Writable dst, Writable src) throws IOException {
+    ReflectionUtils.cloneWritableInto(dst, src);
+  }
+
+  /**
+   * Serializes an integer to a binary stream with zero-compressed encoding.
+   * For -120 <= i <= 127, only one byte is used with the actual value.
+   * For other values of i, the first byte value indicates whether the
+   * integer is positive or negative, and the number of bytes that follow.
+   * If the first byte value v is between -121 and -124, the following integer
+   * is positive, with number of bytes that follow are -(v+120).
+   * If the first byte value v is between -125 and -128, the following integer
+   * is negative, with number of bytes that follow are -(v+124). Bytes are
+   * stored in the high-non-zero-byte-first order.
+   *
+   * @param stream Binary output stream
+   * @param i Integer to be serialized
+   * @throws java.io.IOException 
+   */
+  public static void writeVInt(DataOutput stream, int i) throws IOException {
+    writeVLong(stream, i);
+  }
+  
+  /**
+   * Serializes a long to a binary stream with zero-compressed encoding.
+   * For -112 <= i <= 127, only one byte is used with the actual value.
+   * For other values of i, the first byte value indicates whether the
+   * long is positive or negative, and the number of bytes that follow.
+   * If the first byte value v is between -113 and -120, the following long
+   * is positive, with number of bytes that follow are -(v+112).
+   * If the first byte value v is between -121 and -128, the following long
+   * is negative, with number of bytes that follow are -(v+120). Bytes are
+   * stored in the high-non-zero-byte-first order.
+   * 
+   * @param stream Binary output stream
+   * @param i Long to be serialized
+   * @throws java.io.IOException 
+   */
+  public static void writeVLong(DataOutput stream, long i) throws IOException {
+    if (i >= -112 && i <= 127) {
+      stream.writeByte((byte)i);
+      return;
+    }
+      
+    int len = -112;
+    if (i < 0) {
+      i ^= -1L; // take one's complement'
+      len = -120;
+    }
+      
+    long tmp = i;
+    while (tmp != 0) {
+      tmp = tmp >> 8;
+      len--;
+    }
+      
+    stream.writeByte((byte)len);
+      
+    len = (len < -120) ? -(len + 120) : -(len + 112);
+      
+    for (int idx = len; idx != 0; idx--) {
+      int shiftbits = (idx - 1) * 8;
+      long mask = 0xFFL << shiftbits;
+      stream.writeByte((byte)((i & mask) >> shiftbits));
+    }
+  }
+  
+
+  /**
+   * Reads a zero-compressed encoded long from input stream and returns it.
+   * @param stream Binary input stream
+   * @throws java.io.IOException 
+   * @return deserialized long from stream.
+   */
+  public static long readVLong(DataInput stream) throws IOException {
+    byte firstByte = stream.readByte();
+    int len = decodeVIntSize(firstByte);
+    if (len == 1) {
+      return firstByte;
+    }
+    long i = 0;
+    for (int idx = 0; idx < len-1; idx++) {
+      byte b = stream.readByte();
+      i = i << 8;
+      i = i | (b & 0xFF);
+    }
+    return (isNegativeVInt(firstByte) ? (i ^ -1L) : i);
+  }
+
+  /**
+   * Reads a zero-compressed encoded integer from input stream and returns it.
+   * @param stream Binary input stream
+   * @throws java.io.IOException 
+   * @return deserialized integer from stream.
+   */
+  public static int readVInt(DataInput stream) throws IOException {
+    return (int) readVLong(stream);
+  }
+ 
+  /**
+   * Given the first byte of a vint/vlong, determine the sign
+   * @param value the first byte
+   * @return is the value negative
+   */
+  public static boolean isNegativeVInt(byte value) {
+    return value < -120 || (value >= -112 && value < 0);
+  }
+
+  /**
+   * Parse the first byte of a vint/vlong to determine the number of bytes
+   * @param value the first byte of the vint/vlong
+   * @return the total number of bytes (1 to 9)
+   */
+  public static int decodeVIntSize(byte value) {
+    if (value >= -112) {
+      return 1;
+    } else if (value < -120) {
+      return -119 - value;
+    }
+    return -111 - value;
+  }
+
+  /**
+   * Get the encoded length if an integer is stored in a variable-length format
+   * @return the encoded length 
+   */
+  public static int getVIntSize(long i) {
+    if (i >= -112 && i <= 127) {
+      return 1;
+    }
+      
+    if (i < 0) {
+      i ^= -1L; // take one's complement'
+    }
+    // find the number of bytes with non-leading zeros
+    int dataBits = Long.SIZE - Long.numberOfLeadingZeros(i);
+    // find the number of data bytes + length byte
+    return (dataBits + 7) / 8 + 1;
+  }
+  /**
+   * Read an Enum value from DataInput, Enums are read and written 
+   * using String values. 
+   * @param <T> Enum type
+   * @param in DataInput to read from 
+   * @param enumType Class type of Enum
+   * @return Enum represented by String read from DataInput
+   * @throws IOException
+   */
+  public static <T extends Enum<T>> T readEnum(DataInput in, Class<T> enumType)
+    throws IOException{
+    return T.valueOf(enumType, Text.readString(in));
+  }
+  /**
+   * writes String value of enum to DataOutput. 
+   * @param out Dataoutput stream
+   * @param enumVal enum value
+   * @throws IOException
+   */
+  public static void writeEnum(DataOutput out,  Enum<?> enumVal) 
+    throws IOException{
+    Text.writeString(out, enumVal.name()); 
+  }
+  /**
+   * Skip <i>len</i> number of bytes in input stream<i>in</i>
+   * @param in input stream
+   * @param len number of bytes to skip
+   * @throws IOException when skipped less number of bytes
+   */
+  public static void skipFully(DataInput in, int len) throws IOException {
+    int total = 0;
+    int cur = 0;
+
+    while ((total<len) && ((cur = in.skipBytes(len-total)) > 0)) {
+        total += cur;
+    }
+
+    if (total<len) {
+      throw new IOException("Not able to skip " + len + " bytes, possibly " +
+                            "due to end of input.");
+    }
+  }
+
+  /** Convert writables to a byte array */
+  public static byte[] toByteArray(Writable... writables) {
+    final DataOutputBuffer out = new DataOutputBuffer();
+    try {
+      for(Writable w : writables) {
+        w.write(out);
+      }
+      out.close();
+    } catch (IOException e) {
+      throw new RuntimeException("Fail to convert writables to a byte array",e);
+    }
+    return out.getData();
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/compress/BZip2Codec.java b/src/java/org/apache/hadoop/io/compress/BZip2Codec.java
new file mode 100644
index 00000000000..84a51410b83
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/BZip2Codec.java
@@ -0,0 +1,301 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.io.compress.bzip2.BZip2DummyCompressor;
+import org.apache.hadoop.io.compress.bzip2.BZip2DummyDecompressor;
+import org.apache.hadoop.io.compress.bzip2.CBZip2InputStream;
+import org.apache.hadoop.io.compress.bzip2.CBZip2OutputStream;
+
+/**
+ * This class provides CompressionOutputStream and CompressionInputStream for
+ * compression and decompression. Currently we dont have an implementation of
+ * the Compressor and Decompressor interfaces, so those methods of
+ * CompressionCodec which have a Compressor or Decompressor type argument, throw
+ * UnsupportedOperationException.
+ */
+public class BZip2Codec implements
+    org.apache.hadoop.io.compress.CompressionCodec {
+
+  private static final String HEADER = "BZ";
+  private static final int HEADER_LEN = HEADER.length();
+
+  /**
+  * Creates a new instance of BZip2Codec
+  */
+  public BZip2Codec() {
+  }
+
+  /**
+  * Creates CompressionOutputStream for BZip2
+  *
+  * @param out
+  *            The output Stream
+  * @return The BZip2 CompressionOutputStream
+  * @throws java.io.IOException
+  *             Throws IO exception
+  */
+  public CompressionOutputStream createOutputStream(OutputStream out)
+      throws IOException {
+    return new BZip2CompressionOutputStream(out);
+  }
+
+  /**
+   * This functionality is currently not supported.
+   *
+   * @throws java.lang.UnsupportedOperationException
+   *             Throws UnsupportedOperationException
+   */
+  public CompressionOutputStream createOutputStream(OutputStream out,
+      Compressor compressor) throws IOException {
+    return createOutputStream(out);
+  }
+
+  /**
+  * This functionality is currently not supported.
+  *
+  * @throws java.lang.UnsupportedOperationException
+  *             Throws UnsupportedOperationException
+  */
+  public Class<? extends org.apache.hadoop.io.compress.Compressor> getCompressorType() {
+    return BZip2DummyCompressor.class;
+  }
+
+  /**
+  * This functionality is currently not supported.
+  *
+  * @throws java.lang.UnsupportedOperationException
+  *             Throws UnsupportedOperationException
+  */
+  public Compressor createCompressor() {
+    return new BZip2DummyCompressor();
+  }
+
+  /**
+  * Creates CompressionInputStream to be used to read off uncompressed data.
+  *
+  * @param in
+  *            The InputStream
+  * @return Returns CompressionInputStream for BZip2
+  * @throws java.io.IOException
+  *             Throws IOException
+  */
+  public CompressionInputStream createInputStream(InputStream in)
+      throws IOException {
+    return new BZip2CompressionInputStream(in);
+  }
+
+  /**
+  * This functionality is currently not supported.
+  *
+  * @throws java.lang.UnsupportedOperationException
+  *             Throws UnsupportedOperationException
+  */
+  public CompressionInputStream createInputStream(InputStream in,
+      Decompressor decompressor) throws IOException {
+    return createInputStream(in);
+  }
+
+  /**
+  * This functionality is currently not supported.
+  *
+  * @throws java.lang.UnsupportedOperationException
+  *             Throws UnsupportedOperationException
+  */
+  public Class<? extends org.apache.hadoop.io.compress.Decompressor> getDecompressorType() {
+    return BZip2DummyDecompressor.class;
+  }
+
+  /**
+  * This functionality is currently not supported.
+  *
+  * @throws java.lang.UnsupportedOperationException
+  *             Throws UnsupportedOperationException
+  */
+  public Decompressor createDecompressor() {
+    return new BZip2DummyDecompressor();
+  }
+
+  /**
+  * .bz2 is recognized as the default extension for compressed BZip2 files
+  *
+  * @return A String telling the default bzip2 file extension
+  */
+  public String getDefaultExtension() {
+    return ".bz2";
+  }
+
+  private static class BZip2CompressionOutputStream extends CompressionOutputStream {
+
+    // class data starts here//
+    private CBZip2OutputStream output;
+    private boolean needsReset; 
+    // class data ends here//
+
+    public BZip2CompressionOutputStream(OutputStream out)
+        throws IOException {
+      super(out);
+      needsReset = true;
+    }
+
+    private void writeStreamHeader() throws IOException {
+      if (super.out != null) {
+        // The compressed bzip2 stream should start with the
+        // identifying characters BZ. Caller of CBZip2OutputStream
+        // i.e. this class must write these characters.
+        out.write(HEADER.getBytes());
+      }
+    }
+
+    public void finish() throws IOException {
+      if (needsReset) {
+        // In the case that nothing is written to this stream, we still need to
+        // write out the header before closing, otherwise the stream won't be
+        // recognized by BZip2CompressionInputStream.
+        internalReset();
+      }
+      this.output.finish();
+      needsReset = true;
+    }
+
+    private void internalReset() throws IOException {
+      if (needsReset) {
+        needsReset = false;
+        writeStreamHeader();
+        this.output = new CBZip2OutputStream(out);
+      }
+    }    
+    
+    public void resetState() throws IOException {
+      // Cannot write to out at this point because out might not be ready
+      // yet, as in SequenceFile.Writer implementation.
+      needsReset = true;
+    }
+
+    public void write(int b) throws IOException {
+      if (needsReset) {
+        internalReset();
+      }
+      this.output.write(b);
+    }
+
+    public void write(byte[] b, int off, int len) throws IOException {
+      if (needsReset) {
+        internalReset();
+      }
+      this.output.write(b, off, len);
+    }
+
+    public void close() throws IOException {
+      if (needsReset) {
+        // In the case that nothing is written to this stream, we still need to
+        // write out the header before closing, otherwise the stream won't be
+        // recognized by BZip2CompressionInputStream.
+        internalReset();
+      }
+      this.output.flush();
+      this.output.close();
+      needsReset = true;
+    }
+
+  }// end of class BZip2CompressionOutputStream
+
+  private static class BZip2CompressionInputStream extends CompressionInputStream {
+
+    // class data starts here//
+    private CBZip2InputStream input;
+    boolean needsReset;
+    // class data ends here//
+
+    public BZip2CompressionInputStream(InputStream in) throws IOException {
+
+      super(in);
+      needsReset = true;
+    }
+
+    private BufferedInputStream readStreamHeader() throws IOException {
+      // We are flexible enough to allow the compressed stream not to
+      // start with the header of BZ. So it works fine either we have
+      // the header or not.
+      BufferedInputStream bufferedIn = null;
+      if (super.in != null) {
+        bufferedIn = new BufferedInputStream(super.in);
+        bufferedIn.mark(HEADER_LEN);
+        byte[] headerBytes = new byte[HEADER_LEN];
+        int actualRead = bufferedIn.read(headerBytes, 0, HEADER_LEN);
+        if (actualRead != -1) {
+          String header = new String(headerBytes);
+          if (header.compareTo(HEADER) != 0) {
+            bufferedIn.reset();
+          }
+        }
+      }
+
+      if (bufferedIn == null) {
+        throw new IOException("Failed to read bzip2 stream.");
+      }
+
+      return bufferedIn;
+
+    }// end of method
+
+    public void close() throws IOException {
+      if (!needsReset) {
+        input.close();
+        needsReset = true;
+      }
+    }
+
+    public int read(byte[] b, int off, int len) throws IOException {
+      if (needsReset) {
+        internalReset();
+      }
+      return this.input.read(b, off, len);
+
+    }
+
+    private void internalReset() throws IOException {
+      if (needsReset) {
+        needsReset = false;
+        BufferedInputStream bufferedIn = readStreamHeader();
+        input = new CBZip2InputStream(bufferedIn);
+      }
+    }    
+    
+    public void resetState() throws IOException {
+      // Cannot read from bufferedIn at this point because bufferedIn might not be ready
+      // yet, as in SequenceFile.Reader implementation.
+      needsReset = true;
+    }
+
+    public int read() throws IOException {
+      if (needsReset) {
+        internalReset();
+      }
+      return this.input.read();
+    }
+
+  }// end of BZip2CompressionInputStream
+
+}
diff --git a/src/java/org/apache/hadoop/io/compress/BlockCompressorStream.java b/src/java/org/apache/hadoop/io/compress/BlockCompressorStream.java
new file mode 100644
index 00000000000..b1fb21f0eaf
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/BlockCompressorStream.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * A {@link org.apache.hadoop.io.compress.CompressorStream} which works
+ * with 'block-based' based compression algorithms, as opposed to 
+ * 'stream-based' compression algorithms.
+ *
+ * It should be noted that this wrapper does not guarantee that blocks will
+ * be sized for the compressor. If the
+ * {@link org.apache.hadoop.io.compress.Compressor} requires buffering to
+ * effect meaningful compression, it is responsible for it.
+ */
+public class BlockCompressorStream extends CompressorStream {
+
+  // The 'maximum' size of input data to be compressed, to account
+  // for the overhead of the compression algorithm.
+  private final int MAX_INPUT_SIZE;
+
+  /**
+   * Create a {@link BlockCompressorStream}.
+   * 
+   * @param out stream
+   * @param compressor compressor to be used
+   * @param bufferSize size of buffer
+   * @param compressionOverhead maximum 'overhead' of the compression 
+   *                            algorithm with given bufferSize
+   */
+  public BlockCompressorStream(OutputStream out, Compressor compressor, 
+                               int bufferSize, int compressionOverhead) {
+    super(out, compressor, bufferSize);
+    MAX_INPUT_SIZE = bufferSize - compressionOverhead;
+  }
+
+  /**
+   * Create a {@link BlockCompressorStream} with given output-stream and 
+   * compressor.
+   * Use default of 512 as bufferSize and compressionOverhead of 
+   * (1% of bufferSize + 12 bytes) =  18 bytes (zlib algorithm).
+   * 
+   * @param out stream
+   * @param compressor compressor to be used
+   */
+  public BlockCompressorStream(OutputStream out, Compressor compressor) {
+    this(out, compressor, 512, 18);
+  }
+
+  /**
+   * Write the data provided to the compression codec, compressing no more
+   * than the buffer size less the compression overhead as specified during
+   * construction for each block.
+   *
+   * Each block contains the uncompressed length for the block, followed by
+   * one or more length-prefixed blocks of compressed data.
+   */
+  public void write(byte[] b, int off, int len) throws IOException {
+    // Sanity checks
+    if (compressor.finished()) {
+      throw new IOException("write beyond end of stream");
+    }
+    if (b == null) {
+      throw new NullPointerException();
+    } else if ((off < 0) || (off > b.length) || (len < 0) ||
+               ((off + len) > b.length)) {
+      throw new IndexOutOfBoundsException();
+    } else if (len == 0) {
+      return;
+    }
+
+    long limlen = compressor.getBytesRead();
+    if (len + limlen > MAX_INPUT_SIZE && limlen > 0) {
+      // Adding this segment would exceed the maximum size.
+      // Flush data if we have it.
+      finish();
+      compressor.reset();
+    }
+
+    if (len > MAX_INPUT_SIZE) {
+      // The data we're given exceeds the maximum size. Any data
+      // we had have been flushed, so we write out this chunk in segments
+      // not exceeding the maximum size until it is exhausted.
+      rawWriteInt(len);
+      do {
+        int bufLen = Math.min(len, MAX_INPUT_SIZE);
+        
+        compressor.setInput(b, off, bufLen);
+        compressor.finish();
+        while (!compressor.finished()) {
+          compress();
+        }
+        compressor.reset();
+        off += bufLen;
+        len -= bufLen;
+      } while (len > 0);
+      return;
+    }
+
+    // Give data to the compressor
+    compressor.setInput(b, off, len);
+    if (!compressor.needsInput()) {
+      // compressor buffer size might be smaller than the maximum
+      // size, so we permit it to flush if required.
+      rawWriteInt((int)compressor.getBytesRead());
+      do {
+        compress();
+      } while (!compressor.needsInput());
+    }
+  }
+
+  public void finish() throws IOException {
+    if (!compressor.finished()) {
+      rawWriteInt((int)compressor.getBytesRead());
+      compressor.finish();
+      while (!compressor.finished()) {
+        compress();
+      }
+    }
+  }
+
+  protected void compress() throws IOException {
+    int len = compressor.compress(buffer, 0, buffer.length);
+    if (len > 0) {
+      // Write out the compressed chunk
+      rawWriteInt(len);
+      out.write(buffer, 0, len);
+    }
+  }
+  
+  private void rawWriteInt(int v) throws IOException {
+    out.write((v >>> 24) & 0xFF);
+    out.write((v >>> 16) & 0xFF);
+    out.write((v >>>  8) & 0xFF);
+    out.write((v >>>  0) & 0xFF);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java b/src/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java
new file mode 100644
index 00000000000..96636e7a4ff
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * A {@link org.apache.hadoop.io.compress.DecompressorStream} which works
+ * with 'block-based' based compression algorithms, as opposed to 
+ * 'stream-based' compression algorithms.
+ *  
+ */
+public class BlockDecompressorStream extends DecompressorStream {
+  private int originalBlockSize = 0;
+  private int noUncompressedBytes = 0;
+
+  /**
+   * Create a {@link BlockDecompressorStream}.
+   * 
+   * @param in input stream
+   * @param decompressor decompressor to use
+   * @param bufferSize size of buffer
+   */
+  public BlockDecompressorStream(InputStream in, Decompressor decompressor, 
+                                 int bufferSize) {
+    super(in, decompressor, bufferSize);
+  }
+  
+  /**
+   * Create a {@link BlockDecompressorStream}.
+   * 
+   * @param in input stream
+   * @param decompressor decompressor to use
+   */
+  public BlockDecompressorStream(InputStream in, Decompressor decompressor) {
+    super(in, decompressor);
+  }
+
+  protected BlockDecompressorStream(InputStream in) {
+    super(in);
+  }
+
+  protected int decompress(byte[] b, int off, int len) throws IOException {
+    // Check if we are the beginning of a block
+    if (noUncompressedBytes == originalBlockSize) {
+      // Get original data size
+      try {
+        originalBlockSize =  rawReadInt();
+      } catch (IOException ioe) {
+        return -1;
+      }
+      noUncompressedBytes = 0;
+    }
+    
+    int n = 0;
+    while ((n = decompressor.decompress(b, off, len)) == 0) {
+      if (decompressor.finished() || decompressor.needsDictionary()) {
+        if (noUncompressedBytes >= originalBlockSize) {
+          eof = true;
+          return -1;
+        }
+      }
+      if (decompressor.needsInput()) {
+        getCompressedData();
+      }
+    }
+    
+    // Note the no. of decompressed bytes read from 'current' block
+    noUncompressedBytes += n;
+
+    return n;
+  }
+
+  protected void getCompressedData() throws IOException {
+    checkStream();
+
+    // Get the size of the compressed chunk
+    int len = rawReadInt();
+
+    // Read len bytes from underlying stream 
+    if (len > buffer.length) {
+      buffer = new byte[len];
+    }
+    int n = 0, off = 0;
+    while (n < len) {
+      int count = in.read(buffer, off + n, len - n);
+      if (count < 0) {
+        throw new EOFException();
+      }
+      n += count;
+    }
+    
+    // Send the read data to the decompressor
+    decompressor.setInput(buffer, 0, len);
+  }
+
+  public void resetState() throws IOException {
+    super.resetState();
+  }
+
+  private int rawReadInt() throws IOException {
+    int b1 = in.read();
+    int b2 = in.read();
+    int b3 = in.read();
+    int b4 = in.read();
+    if ((b1 | b2 | b3 | b4) < 0)
+      throw new EOFException();
+    return ((b1 << 24) + (b2 << 16) + (b3 << 8) + (b4 << 0));
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/compress/CodecPool.java b/src/java/org/apache/hadoop/io/compress/CodecPool.java
new file mode 100644
index 00000000000..8960b41ef3d
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/CodecPool.java
@@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * A global compressor/decompressor pool used to save and reuse 
+ * (possibly native) compression/decompression codecs.
+ */
+public class CodecPool {
+  private static final Log LOG = LogFactory.getLog(CodecPool.class);
+  
+  /**
+   * A global compressor pool used to save the expensive 
+   * construction/destruction of (possibly native) decompression codecs.
+   */
+  private static final Map<Class<Compressor>, List<Compressor>> compressorPool = 
+    new HashMap<Class<Compressor>, List<Compressor>>();
+  
+  /**
+   * A global decompressor pool used to save the expensive 
+   * construction/destruction of (possibly native) decompression codecs.
+   */
+  private static final Map<Class<Decompressor>, List<Decompressor>> decompressorPool = 
+    new HashMap<Class<Decompressor>, List<Decompressor>>();
+
+  private static <T> T borrow(Map<Class<T>, List<T>> pool,
+                             Class<? extends T> codecClass) {
+    T codec = null;
+    
+    // Check if an appropriate codec is available
+    synchronized (pool) {
+      if (pool.containsKey(codecClass)) {
+        List<T> codecList = pool.get(codecClass);
+        
+        if (codecList != null) {
+          synchronized (codecList) {
+            if (!codecList.isEmpty()) {
+              codec = codecList.remove(codecList.size()-1);
+            }
+          }
+        }
+      }
+    }
+    
+    return codec;
+  }
+
+  private static <T> void payback(Map<Class<T>, List<T>> pool, T codec) {
+    if (codec != null) {
+      Class<T> codecClass = ReflectionUtils.getClass(codec);
+      synchronized (pool) {
+        if (!pool.containsKey(codecClass)) {
+          pool.put(codecClass, new ArrayList<T>());
+        }
+
+        List<T> codecList = pool.get(codecClass);
+        synchronized (codecList) {
+          codecList.add(codec);
+        }
+      }
+    }
+  }
+  
+  /**
+   * Get a {@link Compressor} for the given {@link CompressionCodec} from the 
+   * pool or a new one.
+   *
+   * @param codec the <code>CompressionCodec</code> for which to get the 
+   *              <code>Compressor</code>
+   * @return <code>Compressor</code> for the given 
+   *         <code>CompressionCodec</code> from the pool or a new one
+   */
+  public static Compressor getCompressor(CompressionCodec codec) {
+    Compressor compressor = borrow(compressorPool, codec.getCompressorType());
+    if (compressor == null) {
+      compressor = codec.createCompressor();
+      LOG.info("Got brand-new compressor");
+    } else {
+      LOG.debug("Got recycled compressor");
+    }
+    return compressor;
+  }
+  
+  /**
+   * Get a {@link Decompressor} for the given {@link CompressionCodec} from the
+   * pool or a new one.
+   *  
+   * @param codec the <code>CompressionCodec</code> for which to get the 
+   *              <code>Decompressor</code>
+   * @return <code>Decompressor</code> for the given 
+   *         <code>CompressionCodec</code> the pool or a new one
+   */
+  public static Decompressor getDecompressor(CompressionCodec codec) {
+    Decompressor decompressor = borrow(decompressorPool, codec.getDecompressorType());
+    if (decompressor == null) {
+      decompressor = codec.createDecompressor();
+      LOG.info("Got brand-new decompressor");
+    } else {
+      LOG.debug("Got recycled decompressor");
+    }
+    return decompressor;
+  }
+  
+  /**
+   * Return the {@link Compressor} to the pool.
+   * 
+   * @param compressor the <code>Compressor</code> to be returned to the pool
+   */
+  public static void returnCompressor(Compressor compressor) {
+    if (compressor == null) {
+      return;
+    }
+    compressor.reset();
+    payback(compressorPool, compressor);
+  }
+  
+  /**
+   * Return the {@link Decompressor} to the pool.
+   * 
+   * @param decompressor the <code>Decompressor</code> to be returned to the 
+   *                     pool
+   */
+  public static void returnDecompressor(Decompressor decompressor) {
+    if (decompressor == null) {
+      return;
+    }
+    decompressor.reset();
+    payback(decompressorPool, decompressor);
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/compress/CompressionCodec.java b/src/java/org/apache/hadoop/io/compress/CompressionCodec.java
new file mode 100644
index 00000000000..9d9ccd4e632
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/CompressionCodec.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * This class encapsulates a streaming compression/decompression pair.
+ */
+public interface CompressionCodec {
+
+  /**
+   * Create a {@link CompressionOutputStream} that will write to the given 
+   * {@link OutputStream}.
+   * 
+   * @param out the location for the final output stream
+   * @return a stream the user can write uncompressed data to have it compressed
+   * @throws IOException
+   */
+  CompressionOutputStream createOutputStream(OutputStream out) 
+  throws IOException;
+  
+  /**
+   * Create a {@link CompressionOutputStream} that will write to the given 
+   * {@link OutputStream} with the given {@link Compressor}.
+   * 
+   * @param out the location for the final output stream
+   * @param compressor compressor to use
+   * @return a stream the user can write uncompressed data to have it compressed
+   * @throws IOException
+   */
+  CompressionOutputStream createOutputStream(OutputStream out, 
+                                             Compressor compressor) 
+  throws IOException;
+
+  /**
+   * Get the type of {@link Compressor} needed by this {@link CompressionCodec}.
+   * 
+   * @return the type of compressor needed by this codec.
+   */
+  Class<? extends Compressor> getCompressorType();
+  
+  /**
+   * Create a new {@link Compressor} for use by this {@link CompressionCodec}.
+   * 
+   * @return a new compressor for use by this codec
+   */
+  Compressor createCompressor();
+  
+  /**
+   * Create a stream decompressor that will read from the given input stream.
+   * 
+   * @param in the stream to read compressed bytes from
+   * @return a stream to read uncompressed bytes from
+   * @throws IOException
+   */
+  CompressionInputStream createInputStream(InputStream in) throws IOException;
+  
+  /**
+   * Create a {@link CompressionInputStream} that will read from the given 
+   * {@link InputStream} with the given {@link Decompressor}.
+   * 
+   * @param in the stream to read compressed bytes from
+   * @param decompressor decompressor to use
+   * @return a stream to read uncompressed bytes from
+   * @throws IOException
+   */
+  CompressionInputStream createInputStream(InputStream in, 
+                                           Decompressor decompressor) 
+  throws IOException;
+
+
+  /**
+   * Get the type of {@link Decompressor} needed by this {@link CompressionCodec}.
+   * 
+   * @return the type of decompressor needed by this codec.
+   */
+  Class<? extends Decompressor> getDecompressorType();
+  
+  /**
+   * Create a new {@link Decompressor} for use by this {@link CompressionCodec}.
+   * 
+   * @return a new decompressor for use by this codec
+   */
+  Decompressor createDecompressor();
+  
+  /**
+   * Get the default filename extension for this kind of compression.
+   * @return the extension including the '.'
+   */
+  String getDefaultExtension();
+}
diff --git a/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java b/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java
new file mode 100644
index 00000000000..dae2e68e1c3
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress;
+
+import java.util.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * A factory that will find the correct codec for a given filename.
+ */
+public class CompressionCodecFactory {
+
+  public static final Log LOG =
+    LogFactory.getLog(CompressionCodecFactory.class.getName());
+
+  /**
+   * A map from the reversed filename suffixes to the codecs.
+   * This is probably overkill, because the maps should be small, but it 
+   * automatically supports finding the longest matching suffix. 
+   */
+  private SortedMap<String, CompressionCodec> codecs = null;
+  
+  private void addCodec(CompressionCodec codec) {
+    String suffix = codec.getDefaultExtension();
+    codecs.put(new StringBuffer(suffix).reverse().toString(), codec);
+  }
+  
+  /**
+   * Print the extension map out as a string.
+   */
+  public String toString() {
+    StringBuffer buf = new StringBuffer();
+    Iterator<Map.Entry<String, CompressionCodec>> itr = 
+      codecs.entrySet().iterator();
+    buf.append("{ ");
+    if (itr.hasNext()) {
+      Map.Entry<String, CompressionCodec> entry = itr.next();
+      buf.append(entry.getKey());
+      buf.append(": ");
+      buf.append(entry.getValue().getClass().getName());
+      while (itr.hasNext()) {
+        entry = itr.next();
+        buf.append(", ");
+        buf.append(entry.getKey());
+        buf.append(": ");
+        buf.append(entry.getValue().getClass().getName());
+      }
+    }
+    buf.append(" }");
+    return buf.toString();
+  }
+
+  /**
+   * Get the list of codecs listed in the configuration
+   * @param conf the configuration to look in
+   * @return a list of the Configuration classes or null if the attribute
+   *         was not set
+   */
+  public static List<Class<? extends CompressionCodec>> getCodecClasses(Configuration conf) {
+    String codecsString = conf.get("io.compression.codecs");
+    if (codecsString != null) {
+      List<Class<? extends CompressionCodec>> result
+        = new ArrayList<Class<? extends CompressionCodec>>();
+      StringTokenizer codecSplit = new StringTokenizer(codecsString, ",");
+      while (codecSplit.hasMoreElements()) {
+        String codecSubstring = codecSplit.nextToken();
+        if (codecSubstring.length() != 0) {
+          try {
+            Class<?> cls = conf.getClassByName(codecSubstring);
+            if (!CompressionCodec.class.isAssignableFrom(cls)) {
+              throw new IllegalArgumentException("Class " + codecSubstring +
+                                                 " is not a CompressionCodec");
+            }
+            result.add(cls.asSubclass(CompressionCodec.class));
+          } catch (ClassNotFoundException ex) {
+            throw new IllegalArgumentException("Compression codec " + 
+                                               codecSubstring + " not found.",
+                                               ex);
+          }
+        }
+      }
+      return result;
+    } else {
+      return null;
+    }
+  }
+  
+  /**
+   * Sets a list of codec classes in the configuration.
+   * @param conf the configuration to modify
+   * @param classes the list of classes to set
+   */
+  public static void setCodecClasses(Configuration conf,
+                                     List<Class> classes) {
+    StringBuffer buf = new StringBuffer();
+    Iterator<Class> itr = classes.iterator();
+    if (itr.hasNext()) {
+      Class cls = itr.next();
+      buf.append(cls.getName());
+      while(itr.hasNext()) {
+        buf.append(',');
+        buf.append(itr.next().getName());
+      }
+    }
+    conf.set("io.compression.codecs", buf.toString());   
+  }
+  
+  /**
+   * Find the codecs specified in the config value io.compression.codecs 
+   * and register them. Defaults to gzip and zip.
+   */
+  public CompressionCodecFactory(Configuration conf) {
+    codecs = new TreeMap<String, CompressionCodec>();
+    List<Class<? extends CompressionCodec>> codecClasses = getCodecClasses(conf);
+    if (codecClasses == null) {
+      addCodec(new GzipCodec());
+      addCodec(new DefaultCodec());      
+    } else {
+      Iterator<Class<? extends CompressionCodec>> itr = codecClasses.iterator();
+      while (itr.hasNext()) {
+        CompressionCodec codec = ReflectionUtils.newInstance(itr.next(), conf);
+        addCodec(codec);     
+      }
+    }
+  }
+  
+  /**
+   * Find the relevant compression codec for the given file based on its
+   * filename suffix.
+   * @param file the filename to check
+   * @return the codec object
+   */
+  public CompressionCodec getCodec(Path file) {
+    CompressionCodec result = null;
+    if (codecs != null) {
+      String filename = file.getName();
+      String reversedFilename = new StringBuffer(filename).reverse().toString();
+      SortedMap<String, CompressionCodec> subMap = 
+        codecs.headMap(reversedFilename);
+      if (!subMap.isEmpty()) {
+        String potentialSuffix = subMap.lastKey();
+        if (reversedFilename.startsWith(potentialSuffix)) {
+          result = codecs.get(potentialSuffix);
+        }
+      }
+    }
+    return result;
+  }
+  
+  /**
+   * Removes a suffix from a filename, if it has it.
+   * @param filename the filename to strip
+   * @param suffix the suffix to remove
+   * @return the shortened filename
+   */
+  public static String removeSuffix(String filename, String suffix) {
+    if (filename.endsWith(suffix)) {
+      return filename.substring(0, filename.length() - suffix.length());
+    }
+    return filename;
+  }
+  
+  /**
+   * A little test program.
+   * @param args
+   */
+  public static void main(String[] args) throws Exception {
+    Configuration conf = new Configuration();
+    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
+    boolean encode = false;
+    for(int i=0; i < args.length; ++i) {
+      if ("-in".equals(args[i])) {
+        encode = true;
+      } else if ("-out".equals(args[i])) {
+        encode = false;
+      } else {
+        CompressionCodec codec = factory.getCodec(new Path(args[i]));
+        if (codec == null) {
+          System.out.println("Codec for " + args[i] + " not found.");
+        } else { 
+          if (encode) {
+            CompressionOutputStream out = 
+              codec.createOutputStream(new java.io.FileOutputStream(args[i]));
+            byte[] buffer = new byte[100];
+            String inFilename = removeSuffix(args[i], 
+                                             codec.getDefaultExtension());
+            java.io.InputStream in = new java.io.FileInputStream(inFilename);
+            int len = in.read(buffer);
+            while (len > 0) {
+              out.write(buffer, 0, len);
+              len = in.read(buffer);
+            }
+            in.close();
+            out.close();
+          } else {
+            CompressionInputStream in = 
+              codec.createInputStream(new java.io.FileInputStream(args[i]));
+            byte[] buffer = new byte[100];
+            int len = in.read(buffer);
+            while (len > 0) {
+              System.out.write(buffer, 0, len);
+              len = in.read(buffer);
+            }
+            in.close();
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/compress/CompressionInputStream.java b/src/java/org/apache/hadoop/io/compress/CompressionInputStream.java
new file mode 100644
index 00000000000..aabdd2b5e4d
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/CompressionInputStream.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * A compression input stream.
+ *
+ * <p>Implementations are assumed to be buffered.  This permits clients to
+ * reposition the underlying input stream then call {@link #resetState()},
+ * without having to also synchronize client buffers.
+ */
+public abstract class CompressionInputStream extends InputStream {
+  /**
+   * The input stream to be compressed. 
+   */
+  protected final InputStream in;
+
+  /**
+   * Create a compression input stream that reads
+   * the decompressed bytes from the given stream.
+   * 
+   * @param in The input stream to be compressed.
+   */
+  protected CompressionInputStream(InputStream in) {
+    this.in = in;
+  }
+
+  public void close() throws IOException {
+    in.close();
+  }
+  
+  /**
+   * Read bytes from the stream.
+   * Made abstract to prevent leakage to underlying stream.
+   */
+  public abstract int read(byte[] b, int off, int len) throws IOException;
+
+  /**
+   * Reset the decompressor to its initial state and discard any buffered data,
+   * as the underlying stream may have been repositioned.
+   */
+  public abstract void resetState() throws IOException;
+  
+}
diff --git a/src/java/org/apache/hadoop/io/compress/CompressionOutputStream.java b/src/java/org/apache/hadoop/io/compress/CompressionOutputStream.java
new file mode 100644
index 00000000000..3b0420f11b5
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/CompressionOutputStream.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * A compression output stream.
+ */
+public abstract class CompressionOutputStream extends OutputStream {
+  /**
+   * The output stream to be compressed. 
+   */
+  protected final OutputStream out;
+  
+  /**
+   * Create a compression output stream that writes
+   * the compressed bytes to the given stream.
+   * @param out
+   */
+  protected CompressionOutputStream(OutputStream out) {
+    this.out = out;
+  }
+  
+  public void close() throws IOException {
+    finish();
+    out.close();
+  }
+  
+  public void flush() throws IOException {
+    out.flush();
+  }
+  
+  /**
+   * Write compressed bytes to the stream.
+   * Made abstract to prevent leakage to underlying stream.
+   */
+  public abstract void write(byte[] b, int off, int len) throws IOException;
+
+  /**
+   * Finishes writing compressed data to the output stream 
+   * without closing the underlying stream.
+   */
+  public abstract void finish() throws IOException;
+  
+  /**
+   * Reset the compression to the initial state. 
+   * Does not reset the underlying stream.
+   */
+  public abstract void resetState() throws IOException;
+
+}
diff --git a/src/java/org/apache/hadoop/io/compress/Compressor.java b/src/java/org/apache/hadoop/io/compress/Compressor.java
new file mode 100644
index 00000000000..66bc4bfeeda
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/Compressor.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.IOException;
+
+/**
+ * Specification of a stream-based 'compressor' which can be  
+ * plugged into a {@link CompressionOutputStream} to compress data.
+ * This is modelled after {@link java.util.zip.Deflater}
+ * 
+ */
+public interface Compressor {
+  /**
+   * Sets input data for compression. 
+   * This should be called whenever #needsInput() returns 
+   * <code>true</code> indicating that more input data is required.
+   * 
+   * @param b Input data
+   * @param off Start offset
+   * @param len Length
+   */
+  public void setInput(byte[] b, int off, int len);
+  
+  /**
+   * Returns true if the input data buffer is empty and 
+   * #setInput() should be called to provide more input. 
+   * 
+   * @return <code>true</code> if the input data buffer is empty and 
+   * #setInput() should be called in order to provide more input.
+   */
+  public boolean needsInput();
+  
+  /**
+   * Sets preset dictionary for compression. A preset dictionary 
+   * is used when the history buffer can be predetermined. 
+   *
+   * @param b Dictionary data bytes
+   * @param off Start offset
+   * @param len Length
+   */
+  public void setDictionary(byte[] b, int off, int len);
+
+  /**
+   * Return number of uncompressed bytes input so far.
+   */
+  public long getBytesRead();
+
+  /**
+   * Return number of compressed bytes output so far.
+   */
+  public long getBytesWritten();
+
+  /**
+   * When called, indicates that compression should end
+   * with the current contents of the input buffer.
+   */
+  public void finish();
+  
+  /**
+   * Returns true if the end of the compressed 
+   * data output stream has been reached.
+   * @return <code>true</code> if the end of the compressed
+   * data output stream has been reached.
+   */
+  public boolean finished();
+  
+  /**
+   * Fills specified buffer with compressed data. Returns actual number
+   * of bytes of compressed data. A return value of 0 indicates that
+   * needsInput() should be called in order to determine if more input
+   * data is required.
+   * 
+   * @param b Buffer for the compressed data
+   * @param off Start offset of the data
+   * @param len Size of the buffer
+   * @return The actual number of bytes of compressed data.
+   */
+  public int compress(byte[] b, int off, int len) throws IOException;
+  
+  /**
+   * Resets compressor so that a new set of input data can be processed.
+   */
+  public void reset();
+  
+  /**
+   * Closes the compressor and discards any unprocessed input.
+   */
+  public void end(); 
+}
diff --git a/src/java/org/apache/hadoop/io/compress/CompressorStream.java b/src/java/org/apache/hadoop/io/compress/CompressorStream.java
new file mode 100644
index 00000000000..6917ebfd051
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/CompressorStream.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.hadoop.io.compress.CompressionOutputStream;
+import org.apache.hadoop.io.compress.Compressor;
+
+public class CompressorStream extends CompressionOutputStream {
+  protected Compressor compressor;
+  protected byte[] buffer;
+  protected boolean closed = false;
+  
+  public CompressorStream(OutputStream out, Compressor compressor, int bufferSize) {
+    super(out);
+
+    if (out == null || compressor == null) {
+      throw new NullPointerException();
+    } else if (bufferSize <= 0) {
+      throw new IllegalArgumentException("Illegal bufferSize");
+    }
+
+    this.compressor = compressor;
+    buffer = new byte[bufferSize];
+  }
+
+  public CompressorStream(OutputStream out, Compressor compressor) {
+    this(out, compressor, 512);
+  }
+  
+  /**
+   * Allow derived classes to directly set the underlying stream.
+   * 
+   * @param out Underlying output stream.
+   */
+  protected CompressorStream(OutputStream out) {
+    super(out);
+  }
+
+  public void write(byte[] b, int off, int len) throws IOException {
+    // Sanity checks
+    if (compressor.finished()) {
+      throw new IOException("write beyond end of stream");
+    }
+    if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
+      throw new IndexOutOfBoundsException();
+    } else if (len == 0) {
+      return;
+    }
+
+    compressor.setInput(b, off, len);
+    while (!compressor.needsInput()) {
+      compress();
+    }
+  }
+
+  protected void compress() throws IOException {
+    int len = compressor.compress(buffer, 0, buffer.length);
+    if (len > 0) {
+      out.write(buffer, 0, len);
+    }
+  }
+
+  public void finish() throws IOException {
+    if (!compressor.finished()) {
+      compressor.finish();
+      while (!compressor.finished()) {
+        compress();
+      }
+    }
+  }
+
+  public void resetState() throws IOException {
+    compressor.reset();
+  }
+  
+  public void close() throws IOException {
+    if (!closed) {
+      finish();
+      out.close();
+      closed = true;
+    }
+  }
+
+  private byte[] oneByte = new byte[1];
+  public void write(int b) throws IOException {
+    oneByte[0] = (byte)(b & 0xff);
+    write(oneByte, 0, oneByte.length);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/compress/Decompressor.java b/src/java/org/apache/hadoop/io/compress/Decompressor.java
new file mode 100644
index 00000000000..5832a4a741c
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/Decompressor.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.IOException;
+
+/**
+ * Specification of a stream-based 'de-compressor' which can be  
+ * plugged into a {@link CompressionInputStream} to compress data.
+ * This is modelled after {@link java.util.zip.Inflater}
+ * 
+ */
+public interface Decompressor {
+  /**
+   * Sets input data for decompression. 
+   * This should be called whenever #needsInput() returns 
+   * <code>true</code> indicating that more input data is required.
+   * 
+   * @param b Input data
+   * @param off Start offset
+   * @param len Length
+   */
+  public void setInput(byte[] b, int off, int len);
+  
+  /**
+   * Returns true if the input data buffer is empty and 
+   * #setInput() should be called to provide more input. 
+   * 
+   * @return <code>true</code> if the input data buffer is empty and 
+   * #setInput() should be called in order to provide more input.
+   */
+  public boolean needsInput();
+  
+  /**
+   * Sets preset dictionary for compression. A preset dictionary
+   * is used when the history buffer can be predetermined. 
+   *
+   * @param b Dictionary data bytes
+   * @param off Start offset
+   * @param len Length
+   */
+  public void setDictionary(byte[] b, int off, int len);
+  
+  /**
+   * Returns <code>true</code> if a preset dictionary is needed for decompression.
+   * @return <code>true</code> if a preset dictionary is needed for decompression
+   */
+  public boolean needsDictionary();
+
+  /**
+   * Returns true if the end of the compressed 
+   * data output stream has been reached.
+   * @return <code>true</code> if the end of the compressed
+   * data output stream has been reached.
+   */
+  public boolean finished();
+  
+  /**
+   * Fills specified buffer with uncompressed data. Returns actual number
+   * of bytes of uncompressed data. A return value of 0 indicates that
+   * #needsInput() should be called in order to determine if more input
+   * data is required.
+   * 
+   * @param b Buffer for the compressed data
+   * @param off Start offset of the data
+   * @param len Size of the buffer
+   * @return The actual number of bytes of compressed data.
+   * @throws IOException
+   */
+  public int decompress(byte[] b, int off, int len) throws IOException;
+  
+  /**
+   * Resets decompressor so that a new set of input data can be processed.
+   */
+  public void reset();
+  
+  /**
+   * Closes the decompressor and discards any unprocessed input.
+   */
+  public void end(); 
+}
diff --git a/src/java/org/apache/hadoop/io/compress/DecompressorStream.java b/src/java/org/apache/hadoop/io/compress/DecompressorStream.java
new file mode 100644
index 00000000000..a84bea443e4
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/DecompressorStream.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.hadoop.io.compress.Decompressor;
+
+public class DecompressorStream extends CompressionInputStream {
+  protected Decompressor decompressor = null;
+  protected byte[] buffer;
+  protected boolean eof = false;
+  protected boolean closed = false;
+  
+  public DecompressorStream(InputStream in, Decompressor decompressor, int bufferSize) {
+    super(in);
+
+    if (in == null || decompressor == null) {
+      throw new NullPointerException();
+    } else if (bufferSize <= 0) {
+      throw new IllegalArgumentException("Illegal bufferSize");
+    }
+
+    this.decompressor = decompressor;
+    buffer = new byte[bufferSize];
+  }
+
+  public DecompressorStream(InputStream in, Decompressor decompressor) {
+    this(in, decompressor, 512);
+  }
+
+  /**
+   * Allow derived classes to directly set the underlying stream.
+   * 
+   * @param in Underlying input stream.
+   */
+  protected DecompressorStream(InputStream in) {
+    super(in);
+  }
+  
+  private byte[] oneByte = new byte[1];
+  public int read() throws IOException {
+    checkStream();
+    return (read(oneByte, 0, oneByte.length) == -1) ? -1 : (oneByte[0] & 0xff);
+  }
+
+  public int read(byte[] b, int off, int len) throws IOException {
+    checkStream();
+    
+    if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
+      throw new IndexOutOfBoundsException();
+    } else if (len == 0) {
+      return 0;
+    }
+
+    return decompress(b, off, len);
+  }
+
+  protected int decompress(byte[] b, int off, int len) throws IOException {
+    int n = 0;
+    
+    while ((n = decompressor.decompress(b, off, len)) == 0) {
+      if (decompressor.finished() || decompressor.needsDictionary()) {
+        eof = true;
+        return -1;
+      }
+      if (decompressor.needsInput()) {
+        getCompressedData();
+      }
+    }
+    
+    return n;
+  }
+  
+  protected void getCompressedData() throws IOException {
+    checkStream();
+  
+    int n = in.read(buffer, 0, buffer.length);
+    if (n == -1) {
+      throw new EOFException("Unexpected end of input stream");
+    }
+
+    decompressor.setInput(buffer, 0, n);
+  }
+  
+  protected void checkStream() throws IOException {
+    if (closed) {
+      throw new IOException("Stream closed");
+    }
+  }
+  
+  public void resetState() throws IOException {
+    decompressor.reset();
+  }
+
+  private byte[] skipBytes = new byte[512];
+  public long skip(long n) throws IOException {
+    // Sanity checks
+    if (n < 0) {
+      throw new IllegalArgumentException("negative skip length");
+    }
+    checkStream();
+    
+    // Read 'n' bytes
+    int skipped = 0;
+    while (skipped < n) {
+      int len = Math.min(((int)n - skipped), skipBytes.length);
+      len = read(skipBytes, 0, len);
+      if (len == -1) {
+        eof = true;
+        break;
+      }
+      skipped += len;
+    }
+    return skipped;
+  }
+
+  public int available() throws IOException {
+    checkStream();
+    return (eof) ? 0 : 1;
+  }
+
+  public void close() throws IOException {
+    if (!closed) {
+      in.close();
+      closed = true;
+    }
+  }
+
+  public boolean markSupported() {
+    return false;
+  }
+
+  public synchronized void mark(int readlimit) {
+  }
+
+  public synchronized void reset() throws IOException {
+    throw new IOException("mark/reset not supported");
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/compress/DefaultCodec.java b/src/java/org/apache/hadoop/io/compress/DefaultCodec.java
new file mode 100644
index 00000000000..29dc140c00c
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/DefaultCodec.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.InputStream;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.compress.zlib.*;
+
+public class DefaultCodec implements Configurable, CompressionCodec {
+  
+  Configuration conf;
+
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+  
+  public Configuration getConf() {
+    return conf;
+  }
+  
+  public CompressionOutputStream createOutputStream(OutputStream out) 
+  throws IOException {
+    return new CompressorStream(out, createCompressor(), 
+                                conf.getInt("io.file.buffer.size", 4*1024));
+  }
+
+  public CompressionOutputStream createOutputStream(OutputStream out, 
+                                                    Compressor compressor) 
+  throws IOException {
+    return new CompressorStream(out, compressor, 
+                                conf.getInt("io.file.buffer.size", 4*1024));
+  }
+
+  public Class<? extends Compressor> getCompressorType() {
+    return ZlibFactory.getZlibCompressorType(conf);
+  }
+
+  public Compressor createCompressor() {
+    return ZlibFactory.getZlibCompressor(conf);
+  }
+
+  public CompressionInputStream createInputStream(InputStream in) 
+  throws IOException {
+    return new DecompressorStream(in, createDecompressor(),
+                                  conf.getInt("io.file.buffer.size", 4*1024));
+  }
+
+  public CompressionInputStream createInputStream(InputStream in, 
+                                                  Decompressor decompressor) 
+  throws IOException {
+    return new DecompressorStream(in, decompressor, 
+                                  conf.getInt("io.file.buffer.size", 4*1024));
+  }
+
+  public Class<? extends Decompressor> getDecompressorType() {
+    return ZlibFactory.getZlibDecompressorType(conf);
+  }
+
+  public Decompressor createDecompressor() {
+    return ZlibFactory.getZlibDecompressor(conf);
+  }
+  
+  public String getDefaultExtension() {
+    return ".deflate";
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/compress/GzipCodec.java b/src/java/org/apache/hadoop/io/compress/GzipCodec.java
new file mode 100644
index 00000000000..674dce280fe
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/GzipCodec.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.*;
+import java.util.zip.GZIPOutputStream;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.hadoop.io.compress.zlib.*;
+
+/**
+ * This class creates gzip compressors/decompressors. 
+ */
+public class GzipCodec extends DefaultCodec {
+  /**
+   * A bridge that wraps around a DeflaterOutputStream to make it 
+   * a CompressionOutputStream.
+   */
+  protected static class GzipOutputStream extends CompressorStream {
+
+    private static class ResetableGZIPOutputStream extends GZIPOutputStream {
+      
+      public ResetableGZIPOutputStream(OutputStream out) throws IOException {
+        super(out);
+      }
+      
+      public void resetState() throws IOException {
+        def.reset();
+      }
+    }
+
+    public GzipOutputStream(OutputStream out) throws IOException {
+      super(new ResetableGZIPOutputStream(out));
+    }
+    
+    /**
+     * Allow children types to put a different type in here.
+     * @param out the Deflater stream to use
+     */
+    protected GzipOutputStream(CompressorStream out) {
+      super(out);
+    }
+    
+    public void close() throws IOException {
+      out.close();
+    }
+    
+    public void flush() throws IOException {
+      out.flush();
+    }
+    
+    public void write(int b) throws IOException {
+      out.write(b);
+    }
+    
+    public void write(byte[] data, int offset, int length) 
+      throws IOException {
+      out.write(data, offset, length);
+    }
+    
+    public void finish() throws IOException {
+      ((ResetableGZIPOutputStream) out).finish();
+    }
+
+    public void resetState() throws IOException {
+      ((ResetableGZIPOutputStream) out).resetState();
+    }
+  }
+  
+  protected static class GzipInputStream extends DecompressorStream {
+    
+    private static class ResetableGZIPInputStream extends GZIPInputStream {
+
+      public ResetableGZIPInputStream(InputStream in) throws IOException {
+        super(in);
+      }
+      
+      public void resetState() throws IOException {
+        inf.reset();
+      }
+    }
+    
+    public GzipInputStream(InputStream in) throws IOException {
+      super(new ResetableGZIPInputStream(in));
+    }
+    
+    /**
+     * Allow subclasses to directly set the inflater stream.
+     */
+    protected GzipInputStream(DecompressorStream in) {
+      super(in);
+    }
+
+    public int available() throws IOException {
+      return in.available(); 
+    }
+
+    public void close() throws IOException {
+      in.close();
+    }
+
+    public int read() throws IOException {
+      return in.read();
+    }
+    
+    public int read(byte[] data, int offset, int len) throws IOException {
+      return in.read(data, offset, len);
+    }
+    
+    public long skip(long offset) throws IOException {
+      return in.skip(offset);
+    }
+    
+    public void resetState() throws IOException {
+      ((ResetableGZIPInputStream) in).resetState();
+    }
+  }  
+  
+  public CompressionOutputStream createOutputStream(OutputStream out) 
+    throws IOException {
+    return (ZlibFactory.isNativeZlibLoaded(conf)) ?
+               new CompressorStream(out, createCompressor(),
+                                    conf.getInt("io.file.buffer.size", 4*1024)) :
+               new GzipOutputStream(out);
+  }
+  
+  public CompressionOutputStream createOutputStream(OutputStream out, 
+                                                    Compressor compressor) 
+  throws IOException {
+    return (compressor != null) ?
+               new CompressorStream(out, compressor,
+                                    conf.getInt("io.file.buffer.size", 
+                                                4*1024)) :
+               createOutputStream(out);                                               
+
+  }
+
+  public Compressor createCompressor() {
+    return (ZlibFactory.isNativeZlibLoaded(conf))
+      ? new GzipZlibCompressor()
+      : null;
+  }
+
+  public Class<? extends Compressor> getCompressorType() {
+    return ZlibFactory.isNativeZlibLoaded(conf)
+      ? GzipZlibCompressor.class
+      : BuiltInZlibDeflater.class;
+  }
+
+  public CompressionInputStream createInputStream(InputStream in) 
+  throws IOException {
+  return (ZlibFactory.isNativeZlibLoaded(conf)) ?
+             new DecompressorStream(in, createDecompressor(),
+                                    conf.getInt("io.file.buffer.size", 
+                                                4*1024)) :
+             new GzipInputStream(in);                                         
+  }
+
+  public CompressionInputStream createInputStream(InputStream in, 
+                                                  Decompressor decompressor) 
+  throws IOException {
+    return (decompressor != null) ? 
+               new DecompressorStream(in, decompressor,
+                                      conf.getInt("io.file.buffer.size", 
+                                                  4*1024)) :
+               createInputStream(in); 
+  }
+
+  public Decompressor createDecompressor() {
+    return (ZlibFactory.isNativeZlibLoaded(conf))
+      ? new GzipZlibDecompressor()
+      : null;
+  }
+
+  public Class<? extends Decompressor> getDecompressorType() {
+    return ZlibFactory.isNativeZlibLoaded(conf)
+      ? GzipZlibDecompressor.class
+      : BuiltInZlibInflater.class;
+  }
+
+  public String getDefaultExtension() {
+    return ".gz";
+  }
+
+  static final class GzipZlibCompressor extends ZlibCompressor {
+    public GzipZlibCompressor() {
+      super(ZlibCompressor.CompressionLevel.DEFAULT_COMPRESSION,
+          ZlibCompressor.CompressionStrategy.DEFAULT_STRATEGY,
+          ZlibCompressor.CompressionHeader.GZIP_FORMAT, 64*1024);
+    }
+  }
+
+  static final class GzipZlibDecompressor extends ZlibDecompressor {
+    public GzipZlibDecompressor() {
+      super(ZlibDecompressor.CompressionHeader.AUTODETECT_GZIP_ZLIB, 64*1024);
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/BZip2Constants.java b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2Constants.java
new file mode 100644
index 00000000000..99dc28146d3
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2Constants.java
@@ -0,0 +1,97 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
+ * great code.
+ */
+
+package org.apache.hadoop.io.compress.bzip2;
+
+/**
+ * Base class for both the compress and decompress classes. Holds common arrays,
+ * and static data.
+ * <p>
+ * This interface is public for historical purposes. You should have no need to
+ * use it.
+ * </p>
+ */
+public interface BZip2Constants {
+
+  int baseBlockSize = 100000;
+  int MAX_ALPHA_SIZE = 258;
+  int MAX_CODE_LEN = 23;
+  int RUNA = 0;
+  int RUNB = 1;
+  int N_GROUPS = 6;
+  int G_SIZE = 50;
+  int N_ITERS = 4;
+  int MAX_SELECTORS = (2 + (900000 / G_SIZE));
+  int NUM_OVERSHOOT_BYTES = 20;
+
+  /**
+  * This array really shouldn't be here. Again, for historical purposes it
+  * is.
+  *
+  * <p>
+  * FIXME: This array should be in a private or package private location,
+  * since it could be modified by malicious code.
+  * </p>
+  */
+  final int[] rNums = { 619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
+    985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 733, 859, 335,
+    708, 621, 574, 73, 654, 730, 472, 419, 436, 278, 496, 867, 210,
+    399, 680, 480, 51, 878, 465, 811, 169, 869, 675, 611, 697, 867,
+    561, 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 150, 238,
+    59, 379, 684, 877, 625, 169, 643, 105, 170, 607, 520, 932, 727,
+    476, 693, 425, 174, 647, 73, 122, 335, 530, 442, 853, 695, 249,
+    445, 515, 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 641,
+    801, 220, 162, 819, 984, 589, 513, 495, 799, 161, 604, 958, 533,
+    221, 400, 386, 867, 600, 782, 382, 596, 414, 171, 516, 375, 682,
+    485, 911, 276, 98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
+    227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 469, 68, 770,
+    919, 190, 373, 294, 822, 808, 206, 184, 943, 795, 384, 383, 461,
+    404, 758, 839, 887, 715, 67, 618, 276, 204, 918, 873, 777, 604,
+    560, 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 652, 934, 970,
+    447, 318, 353, 859, 672, 112, 785, 645, 863, 803, 350, 139, 93,
+    354, 99, 820, 908, 609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
+    653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 411, 521, 938,
+    300, 821, 78, 343, 175, 128, 250, 170, 774, 972, 275, 999, 639,
+    495, 78, 352, 126, 857, 956, 358, 619, 580, 124, 737, 594, 701,
+    612, 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 944, 375,
+    748, 52, 600, 747, 642, 182, 862, 81, 344, 805, 988, 739, 511, 655,
+    814, 334, 249, 515, 897, 955, 664, 981, 649, 113, 974, 459, 893,
+    228, 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 686, 754,
+    806, 760, 493, 403, 415, 394, 687, 700, 946, 670, 656, 610, 738,
+    392, 760, 799, 887, 653, 978, 321, 576, 617, 626, 502, 894, 679,
+    243, 440, 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 707,
+    151, 457, 449, 797, 195, 791, 558, 945, 679, 297, 59, 87, 824, 713,
+    663, 412, 693, 342, 606, 134, 108, 571, 364, 631, 212, 174, 643,
+    304, 329, 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 140,
+    206, 73, 263, 980, 736, 876, 478, 430, 305, 170, 514, 364, 692,
+    829, 82, 855, 953, 676, 246, 369, 970, 294, 750, 807, 827, 150,
+    790, 288, 923, 804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
+    896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 661, 821, 976,
+    991, 658, 869, 905, 758, 745, 193, 768, 550, 608, 933, 378, 286,
+    215, 979, 792, 961, 61, 688, 793, 644, 986, 403, 106, 366, 905,
+    644, 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 780, 773,
+    635, 389, 707, 100, 626, 958, 165, 504, 920, 176, 193, 713, 857,
+    265, 203, 50, 668, 108, 645, 990, 626, 197, 510, 357, 358, 850,
+    858, 364, 936, 638 };
+}
diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyCompressor.java b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyCompressor.java
new file mode 100644
index 00000000000..2594717113c
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyCompressor.java
@@ -0,0 +1,62 @@
+package org.apache.hadoop.io.compress.bzip2;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.compress.Compressor;
+
+/**
+ * This is a dummy compressor for BZip2.
+ */
+public class BZip2DummyCompressor implements Compressor {
+
+  @Override
+  public int compress(byte[] b, int off, int len) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void end() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void finish() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean finished() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public long getBytesRead() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public long getBytesWritten() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean needsInput() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void reset() {
+    // do nothing
+  }
+
+  @Override
+  public void setDictionary(byte[] b, int off, int len) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void setInput(byte[] b, int off, int len) {
+    throw new UnsupportedOperationException();
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyDecompressor.java b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyDecompressor.java
new file mode 100644
index 00000000000..15308fbd038
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyDecompressor.java
@@ -0,0 +1,52 @@
+package org.apache.hadoop.io.compress.bzip2;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.compress.Decompressor;
+
+/**
+ * This is a dummy decompressor for BZip2.
+ */
+public class BZip2DummyDecompressor implements Decompressor {
+
+  @Override
+  public int decompress(byte[] b, int off, int len) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void end() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean finished() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean needsDictionary() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean needsInput() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void reset() {
+    // do nothing
+  }
+
+  @Override
+  public void setDictionary(byte[] b, int off, int len) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void setInput(byte[] b, int off, int len) {
+    throw new UnsupportedOperationException();
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java b/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java
new file mode 100644
index 00000000000..567cb5efd3f
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java
@@ -0,0 +1,969 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
+ * great code.
+ */
+package org.apache.hadoop.io.compress.bzip2;
+
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * An input stream that decompresses from the BZip2 format (without the file
+ * header chars) to be read as any other stream.
+ *
+ * <p>
+ * The decompression requires large amounts of memory. Thus you should call the
+ * {@link #close() close()} method as soon as possible, to force
+ * <tt>CBZip2InputStream</tt> to release the allocated memory. See
+ * {@link CBZip2OutputStream CBZip2OutputStream} for information about memory
+ * usage.
+ * </p>
+ *
+ * <p>
+ * <tt>CBZip2InputStream</tt> reads bytes from the compressed source stream via
+ * the single byte {@link java.io.InputStream#read() read()} method exclusively.
+ * Thus you should consider to use a buffered source stream.
+ * </p>
+ *
+ * <p>
+ * Instances of this class are not threadsafe.
+ * </p>
+ */
+public class CBZip2InputStream extends InputStream implements BZip2Constants {
+
+  private static void reportCRCError() throws IOException {
+
+    throw new IOException("BZip2 CRC error");
+
+  }
+
+  private void makeMaps() {
+    final boolean[] inUse = this.data.inUse;
+    final byte[] seqToUnseq = this.data.seqToUnseq;
+
+    int nInUseShadow = 0;
+
+    for (int i = 0; i < 256; i++) {
+      if (inUse[i])
+        seqToUnseq[nInUseShadow++] = (byte) i;
+    }
+
+    this.nInUse = nInUseShadow;
+  }
+
+  /**
+  * Index of the last char in the block, so the block size == last + 1.
+  */
+  private int last;
+
+  /**
+  * Index in zptr[] of original string after sorting.
+  */
+  private int origPtr;
+
+  /**
+  * always: in the range 0 .. 9. The current block size is 100000 * this
+  * number.
+  */
+  private int blockSize100k;
+
+  private boolean blockRandomised;
+
+  private int bsBuff;
+  private int bsLive;
+  private final CRC crc = new CRC();
+
+  private int nInUse;
+
+  private InputStream in;
+
+  private int currentChar = -1;
+
+  private static final int EOF = 0;
+  private static final int START_BLOCK_STATE = 1;
+  private static final int RAND_PART_A_STATE = 2;
+  private static final int RAND_PART_B_STATE = 3;
+  private static final int RAND_PART_C_STATE = 4;
+  private static final int NO_RAND_PART_A_STATE = 5;
+  private static final int NO_RAND_PART_B_STATE = 6;
+  private static final int NO_RAND_PART_C_STATE = 7;
+
+  private int currentState = START_BLOCK_STATE;
+
+  private int storedBlockCRC, storedCombinedCRC;
+  private int computedBlockCRC, computedCombinedCRC;
+
+  // Variables used by setup* methods exclusively
+
+  private int su_count;
+  private int su_ch2;
+  private int su_chPrev;
+  private int su_i2;
+  private int su_j2;
+  private int su_rNToGo;
+  private int su_rTPos;
+  private int su_tPos;
+  private char su_z;
+
+  /**
+  * All memory intensive stuff. This field is initialized by initBlock().
+  */
+  private CBZip2InputStream.Data data;
+
+  /**
+  * Constructs a new CBZip2InputStream which decompresses bytes read from the
+  * specified stream.
+  *
+  * <p>
+  * Although BZip2 headers are marked with the magic <tt>"Bz"</tt> this
+  * constructor expects the next byte in the stream to be the first one after
+  * the magic. Thus callers have to skip the first two bytes. Otherwise this
+  * constructor will throw an exception.
+  * </p>
+  *
+  * @throws IOException
+  *             if the stream content is malformed or an I/O error occurs.
+  * @throws NullPointerException
+  *             if <tt>in == null</tt>
+  */
+  public CBZip2InputStream(final InputStream in) throws IOException {
+    super();
+
+    this.in = in;
+    init();
+  }
+
+  public int read() throws IOException {
+    if (this.in != null) {
+      return read0();
+    } else {
+      throw new IOException("stream closed");
+    }
+  }
+
+  public int read(final byte[] dest, final int offs, final int len)
+      throws IOException {
+    if (offs < 0) {
+      throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
+    }
+    if (len < 0) {
+      throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
+    }
+    if (offs + len > dest.length) {
+      throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
+          + len + ") > dest.length(" + dest.length + ").");
+    }
+    if (this.in == null) {
+      throw new IOException("stream closed");
+    }
+
+    final int hi = offs + len;
+    int destOffs = offs;
+    for (int b; (destOffs < hi) && ((b = read0()) >= 0);) {
+      dest[destOffs++] = (byte) b;
+    }
+
+    return (destOffs == offs) ? -1 : (destOffs - offs);
+  }
+
+  private int read0() throws IOException {
+    final int retChar = this.currentChar;
+
+    switch (this.currentState) {
+    case EOF:
+      return -1;
+
+    case START_BLOCK_STATE:
+      throw new IllegalStateException();
+
+    case RAND_PART_A_STATE:
+      throw new IllegalStateException();
+
+    case RAND_PART_B_STATE:
+      setupRandPartB();
+      break;
+
+    case RAND_PART_C_STATE:
+      setupRandPartC();
+      break;
+
+    case NO_RAND_PART_A_STATE:
+      throw new IllegalStateException();
+
+    case NO_RAND_PART_B_STATE:
+      setupNoRandPartB();
+      break;
+
+    case NO_RAND_PART_C_STATE:
+      setupNoRandPartC();
+      break;
+
+    default:
+      throw new IllegalStateException();
+    }
+
+    return retChar;
+  }
+
+  private void init() throws IOException {
+    int magic2 = this.in.read();
+    if (magic2 != 'h') {
+      throw new IOException("Stream is not BZip2 formatted: expected 'h'"
+          + " as first byte but got '" + (char) magic2 + "'");
+    }
+
+    int blockSize = this.in.read();
+    if ((blockSize < '1') || (blockSize > '9')) {
+      throw new IOException("Stream is not BZip2 formatted: illegal "
+          + "blocksize " + (char) blockSize);
+    }
+
+    this.blockSize100k = blockSize - '0';
+
+    initBlock();
+    setupBlock();
+  }
+
+  private void initBlock() throws IOException {
+    char magic0 = bsGetUByte();
+    char magic1 = bsGetUByte();
+    char magic2 = bsGetUByte();
+    char magic3 = bsGetUByte();
+    char magic4 = bsGetUByte();
+    char magic5 = bsGetUByte();
+
+    if (magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45
+        && magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90) {
+      complete(); // end of file
+    } else if (magic0 != 0x31 || // '1'
+        magic1 != 0x41 || // ')'
+        magic2 != 0x59 || // 'Y'
+        magic3 != 0x26 || // '&'
+        magic4 != 0x53 || // 'S'
+        magic5 != 0x59 // 'Y'
+    ) {
+      this.currentState = EOF;
+      throw new IOException("bad block header");
+    } else {
+      this.storedBlockCRC = bsGetInt();
+      this.blockRandomised = bsR(1) == 1;
+
+      /**
+      * Allocate data here instead in constructor, so we do not allocate
+      * it if the input file is empty.
+      */
+      if (this.data == null) {
+        this.data = new Data(this.blockSize100k);
+      }
+
+      // currBlockNo++;
+      getAndMoveToFrontDecode();
+
+      this.crc.initialiseCRC();
+      this.currentState = START_BLOCK_STATE;
+    }
+  }
+
+  private void endBlock() throws IOException {
+    this.computedBlockCRC = this.crc.getFinalCRC();
+
+    // A bad CRC is considered a fatal error.
+    if (this.storedBlockCRC != this.computedBlockCRC) {
+      // make next blocks readable without error
+      // (repair feature, not yet documented, not tested)
+      this.computedCombinedCRC = (this.storedCombinedCRC << 1)
+          | (this.storedCombinedCRC >>> 31);
+      this.computedCombinedCRC ^= this.storedBlockCRC;
+
+      reportCRCError();
+    }
+
+    this.computedCombinedCRC = (this.computedCombinedCRC << 1)
+        | (this.computedCombinedCRC >>> 31);
+    this.computedCombinedCRC ^= this.computedBlockCRC;
+  }
+
+  private void complete() throws IOException {
+    this.storedCombinedCRC = bsGetInt();
+    this.currentState = EOF;
+    this.data = null;
+
+    if (this.storedCombinedCRC != this.computedCombinedCRC) {
+      reportCRCError();
+    }
+  }
+
+  public void close() throws IOException {
+    InputStream inShadow = this.in;
+    if (inShadow != null) {
+      try {
+        if (inShadow != System.in) {
+          inShadow.close();
+        }
+      } finally {
+        this.data = null;
+        this.in = null;
+      }
+    }
+  }
+
+  private int bsR(final int n) throws IOException {
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    if (bsLiveShadow < n) {
+      final InputStream inShadow = this.in;
+      do {
+        int thech = inShadow.read();
+
+        if (thech < 0) {
+          throw new IOException("unexpected end of stream");
+        }
+
+        bsBuffShadow = (bsBuffShadow << 8) | thech;
+        bsLiveShadow += 8;
+      } while (bsLiveShadow < n);
+
+      this.bsBuff = bsBuffShadow;
+    }
+
+    this.bsLive = bsLiveShadow - n;
+    return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1);
+  }
+
+  private boolean bsGetBit() throws IOException {
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    if (bsLiveShadow < 1) {
+      int thech = this.in.read();
+
+      if (thech < 0) {
+        throw new IOException("unexpected end of stream");
+      }
+
+      bsBuffShadow = (bsBuffShadow << 8) | thech;
+      bsLiveShadow += 8;
+      this.bsBuff = bsBuffShadow;
+    }
+
+    this.bsLive = bsLiveShadow - 1;
+    return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0;
+  }
+
+  private char bsGetUByte() throws IOException {
+    return (char) bsR(8);
+  }
+
+  private int bsGetInt() throws IOException {
+    return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8);
+  }
+
+  /**
+  * Called by createHuffmanDecodingTables() exclusively.
+  */
+  private static void hbCreateDecodeTables(final int[] limit,
+      final int[] base, final int[] perm, final char[] length,
+      final int minLen, final int maxLen, final int alphaSize) {
+    for (int i = minLen, pp = 0; i <= maxLen; i++) {
+      for (int j = 0; j < alphaSize; j++) {
+        if (length[j] == i) {
+          perm[pp++] = j;
+        }
+      }
+    }
+
+    for (int i = MAX_CODE_LEN; --i > 0;) {
+      base[i] = 0;
+      limit[i] = 0;
+    }
+
+    for (int i = 0; i < alphaSize; i++) {
+      base[length[i] + 1]++;
+    }
+
+    for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
+      b += base[i];
+      base[i] = b;
+    }
+
+    for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
+      final int nb = base[i + 1];
+      vec += nb - b;
+      b = nb;
+      limit[i] = vec - 1;
+      vec <<= 1;
+    }
+
+    for (int i = minLen + 1; i <= maxLen; i++) {
+      base[i] = ((limit[i - 1] + 1) << 1) - base[i];
+    }
+  }
+
+  private void recvDecodingTables() throws IOException {
+    final Data dataShadow = this.data;
+    final boolean[] inUse = dataShadow.inUse;
+    final byte[] pos = dataShadow.recvDecodingTables_pos;
+    final byte[] selector = dataShadow.selector;
+    final byte[] selectorMtf = dataShadow.selectorMtf;
+
+    int inUse16 = 0;
+
+    /* Receive the mapping table */
+    for (int i = 0; i < 16; i++) {
+      if (bsGetBit()) {
+        inUse16 |= 1 << i;
+      }
+    }
+
+    for (int i = 256; --i >= 0;) {
+      inUse[i] = false;
+    }
+
+    for (int i = 0; i < 16; i++) {
+      if ((inUse16 & (1 << i)) != 0) {
+        final int i16 = i << 4;
+        for (int j = 0; j < 16; j++) {
+          if (bsGetBit()) {
+            inUse[i16 + j] = true;
+          }
+        }
+      }
+    }
+
+    makeMaps();
+    final int alphaSize = this.nInUse + 2;
+
+    /* Now the selectors */
+    final int nGroups = bsR(3);
+    final int nSelectors = bsR(15);
+
+    for (int i = 0; i < nSelectors; i++) {
+      int j = 0;
+      while (bsGetBit()) {
+        j++;
+      }
+      selectorMtf[i] = (byte) j;
+    }
+
+    /* Undo the MTF values for the selectors. */
+    for (int v = nGroups; --v >= 0;) {
+      pos[v] = (byte) v;
+    }
+
+    for (int i = 0; i < nSelectors; i++) {
+      int v = selectorMtf[i] & 0xff;
+      final byte tmp = pos[v];
+      while (v > 0) {
+        // nearly all times v is zero, 4 in most other cases
+        pos[v] = pos[v - 1];
+        v--;
+      }
+      pos[0] = tmp;
+      selector[i] = tmp;
+    }
+
+    final char[][] len = dataShadow.temp_charArray2d;
+
+    /* Now the coding tables */
+    for (int t = 0; t < nGroups; t++) {
+      int curr = bsR(5);
+      final char[] len_t = len[t];
+      for (int i = 0; i < alphaSize; i++) {
+        while (bsGetBit()) {
+          curr += bsGetBit() ? -1 : 1;
+        }
+        len_t[i] = (char) curr;
+      }
+    }
+
+    // finally create the Huffman tables
+    createHuffmanDecodingTables(alphaSize, nGroups);
+  }
+
+  /**
+  * Called by recvDecodingTables() exclusively.
+  */
+  private void createHuffmanDecodingTables(final int alphaSize,
+      final int nGroups) {
+    final Data dataShadow = this.data;
+    final char[][] len = dataShadow.temp_charArray2d;
+    final int[] minLens = dataShadow.minLens;
+    final int[][] limit = dataShadow.limit;
+    final int[][] base = dataShadow.base;
+    final int[][] perm = dataShadow.perm;
+
+    for (int t = 0; t < nGroups; t++) {
+      int minLen = 32;
+      int maxLen = 0;
+      final char[] len_t = len[t];
+      for (int i = alphaSize; --i >= 0;) {
+        final char lent = len_t[i];
+        if (lent > maxLen) {
+          maxLen = lent;
+        }
+        if (lent < minLen) {
+          minLen = lent;
+        }
+      }
+      hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
+          maxLen, alphaSize);
+      minLens[t] = minLen;
+    }
+  }
+
+  private void getAndMoveToFrontDecode() throws IOException {
+    this.origPtr = bsR(24);
+    recvDecodingTables();
+
+    final InputStream inShadow = this.in;
+    final Data dataShadow = this.data;
+    final byte[] ll8 = dataShadow.ll8;
+    final int[] unzftab = dataShadow.unzftab;
+    final byte[] selector = dataShadow.selector;
+    final byte[] seqToUnseq = dataShadow.seqToUnseq;
+    final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
+    final int[] minLens = dataShadow.minLens;
+    final int[][] limit = dataShadow.limit;
+    final int[][] base = dataShadow.base;
+    final int[][] perm = dataShadow.perm;
+    final int limitLast = this.blockSize100k * 100000;
+
+    /*
+    * Setting up the unzftab entries here is not strictly necessary, but it
+    * does save having to do it later in a separate pass, and so saves a
+    * block's worth of cache misses.
+    */
+    for (int i = 256; --i >= 0;) {
+      yy[i] = (char) i;
+      unzftab[i] = 0;
+    }
+
+    int groupNo = 0;
+    int groupPos = G_SIZE - 1;
+    final int eob = this.nInUse + 1;
+    int nextSym = getAndMoveToFrontDecode0(0);
+    int bsBuffShadow = this.bsBuff;
+    int bsLiveShadow = this.bsLive;
+    int lastShadow = -1;
+    int zt = selector[groupNo] & 0xff;
+    int[] base_zt = base[zt];
+    int[] limit_zt = limit[zt];
+    int[] perm_zt = perm[zt];
+    int minLens_zt = minLens[zt];
+
+    while (nextSym != eob) {
+      if ((nextSym == RUNA) || (nextSym == RUNB)) {
+        int s = -1;
+
+        for (int n = 1; true; n <<= 1) {
+          if (nextSym == RUNA) {
+            s += n;
+          } else if (nextSym == RUNB) {
+            s += n << 1;
+          } else {
+            break;
+          }
+
+          if (groupPos == 0) {
+            groupPos = G_SIZE - 1;
+            zt = selector[++groupNo] & 0xff;
+            base_zt = base[zt];
+            limit_zt = limit[zt];
+            perm_zt = perm[zt];
+            minLens_zt = minLens[zt];
+          } else {
+            groupPos--;
+          }
+
+          int zn = minLens_zt;
+
+          // Inlined:
+          // int zvec = bsR(zn);
+          while (bsLiveShadow < zn) {
+            final int thech = inShadow.read();
+            if (thech >= 0) {
+              bsBuffShadow = (bsBuffShadow << 8) | thech;
+              bsLiveShadow += 8;
+              continue;
+            } else {
+              throw new IOException("unexpected end of stream");
+            }
+          }
+          int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
+              & ((1 << zn) - 1);
+          bsLiveShadow -= zn;
+
+          while (zvec > limit_zt[zn]) {
+            zn++;
+            while (bsLiveShadow < 1) {
+              final int thech = inShadow.read();
+              if (thech >= 0) {
+                bsBuffShadow = (bsBuffShadow << 8) | thech;
+                bsLiveShadow += 8;
+                continue;
+              } else {
+                throw new IOException(
+                    "unexpected end of stream");
+              }
+            }
+            bsLiveShadow--;
+            zvec = (zvec << 1)
+                | ((bsBuffShadow >> bsLiveShadow) & 1);
+          }
+          nextSym = perm_zt[zvec - base_zt[zn]];
+        }
+
+        final byte ch = seqToUnseq[yy[0]];
+        unzftab[ch & 0xff] += s + 1;
+
+        while (s-- >= 0) {
+          ll8[++lastShadow] = ch;
+        }
+
+        if (lastShadow >= limitLast) {
+          throw new IOException("block overrun");
+        }
+      } else {
+        if (++lastShadow >= limitLast) {
+          throw new IOException("block overrun");
+        }
+
+        final char tmp = yy[nextSym - 1];
+        unzftab[seqToUnseq[tmp] & 0xff]++;
+        ll8[lastShadow] = seqToUnseq[tmp];
+
+        /*
+        * This loop is hammered during decompression, hence avoid
+        * native method call overhead of System.arraycopy for very
+        * small ranges to copy.
+        */
+        if (nextSym <= 16) {
+          for (int j = nextSym - 1; j > 0;) {
+            yy[j] = yy[--j];
+          }
+        } else {
+          System.arraycopy(yy, 0, yy, 1, nextSym - 1);
+        }
+
+        yy[0] = tmp;
+
+        if (groupPos == 0) {
+          groupPos = G_SIZE - 1;
+          zt = selector[++groupNo] & 0xff;
+          base_zt = base[zt];
+          limit_zt = limit[zt];
+          perm_zt = perm[zt];
+          minLens_zt = minLens[zt];
+        } else {
+          groupPos--;
+        }
+
+        int zn = minLens_zt;
+
+        // Inlined:
+        // int zvec = bsR(zn);
+        while (bsLiveShadow < zn) {
+          final int thech = inShadow.read();
+          if (thech >= 0) {
+            bsBuffShadow = (bsBuffShadow << 8) | thech;
+            bsLiveShadow += 8;
+            continue;
+          } else {
+            throw new IOException("unexpected end of stream");
+          }
+        }
+        int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
+            & ((1 << zn) - 1);
+        bsLiveShadow -= zn;
+
+        while (zvec > limit_zt[zn]) {
+          zn++;
+          while (bsLiveShadow < 1) {
+            final int thech = inShadow.read();
+            if (thech >= 0) {
+              bsBuffShadow = (bsBuffShadow << 8) | thech;
+              bsLiveShadow += 8;
+              continue;
+            } else {
+              throw new IOException("unexpected end of stream");
+            }
+          }
+          bsLiveShadow--;
+          zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
+        }
+        nextSym = perm_zt[zvec - base_zt[zn]];
+      }
+    }
+
+    this.last = lastShadow;
+    this.bsLive = bsLiveShadow;
+    this.bsBuff = bsBuffShadow;
+  }
+
+  private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
+    final InputStream inShadow = this.in;
+    final Data dataShadow = this.data;
+    final int zt = dataShadow.selector[groupNo] & 0xff;
+    final int[] limit_zt = dataShadow.limit[zt];
+    int zn = dataShadow.minLens[zt];
+    int zvec = bsR(zn);
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    while (zvec > limit_zt[zn]) {
+      zn++;
+      while (bsLiveShadow < 1) {
+        final int thech = inShadow.read();
+
+        if (thech >= 0) {
+          bsBuffShadow = (bsBuffShadow << 8) | thech;
+          bsLiveShadow += 8;
+          continue;
+        } else {
+          throw new IOException("unexpected end of stream");
+        }
+      }
+      bsLiveShadow--;
+      zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
+    }
+
+    this.bsLive = bsLiveShadow;
+    this.bsBuff = bsBuffShadow;
+
+    return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
+  }
+
+  private void setupBlock() throws IOException {
+    if (this.data == null) {
+      return;
+    }
+
+    final int[] cftab = this.data.cftab;
+    final int[] tt = this.data.initTT(this.last + 1);
+    final byte[] ll8 = this.data.ll8;
+    cftab[0] = 0;
+    System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
+
+    for (int i = 1, c = cftab[0]; i <= 256; i++) {
+      c += cftab[i];
+      cftab[i] = c;
+    }
+
+    for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
+      tt[cftab[ll8[i] & 0xff]++] = i;
+    }
+
+    if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
+      throw new IOException("stream corrupted");
+    }
+
+    this.su_tPos = tt[this.origPtr];
+    this.su_count = 0;
+    this.su_i2 = 0;
+    this.su_ch2 = 256; /* not a char and not EOF */
+
+    if (this.blockRandomised) {
+      this.su_rNToGo = 0;
+      this.su_rTPos = 0;
+      setupRandPartA();
+    } else {
+      setupNoRandPartA();
+    }
+  }
+
+  private void setupRandPartA() throws IOException {
+    if (this.su_i2 <= this.last) {
+      this.su_chPrev = this.su_ch2;
+      int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
+      this.su_tPos = this.data.tt[this.su_tPos];
+      if (this.su_rNToGo == 0) {
+        this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1;
+        if (++this.su_rTPos == 512) {
+          this.su_rTPos = 0;
+        }
+      } else {
+        this.su_rNToGo--;
+      }
+      this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
+      this.su_i2++;
+      this.currentChar = su_ch2Shadow;
+      this.currentState = RAND_PART_B_STATE;
+      this.crc.updateCRC(su_ch2Shadow);
+    } else {
+      endBlock();
+      initBlock();
+      setupBlock();
+    }
+  }
+
+  private void setupNoRandPartA() throws IOException {
+    if (this.su_i2 <= this.last) {
+      this.su_chPrev = this.su_ch2;
+      int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
+      this.su_ch2 = su_ch2Shadow;
+      this.su_tPos = this.data.tt[this.su_tPos];
+      this.su_i2++;
+      this.currentChar = su_ch2Shadow;
+      this.currentState = NO_RAND_PART_B_STATE;
+      this.crc.updateCRC(su_ch2Shadow);
+    } else {
+      this.currentState = NO_RAND_PART_A_STATE;
+      endBlock();
+      initBlock();
+      setupBlock();
+    }
+  }
+
+  private void setupRandPartB() throws IOException {
+    if (this.su_ch2 != this.su_chPrev) {
+      this.currentState = RAND_PART_A_STATE;
+      this.su_count = 1;
+      setupRandPartA();
+    } else if (++this.su_count >= 4) {
+      this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
+      this.su_tPos = this.data.tt[this.su_tPos];
+      if (this.su_rNToGo == 0) {
+        this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1;
+        if (++this.su_rTPos == 512) {
+          this.su_rTPos = 0;
+        }
+      } else {
+        this.su_rNToGo--;
+      }
+      this.su_j2 = 0;
+      this.currentState = RAND_PART_C_STATE;
+      if (this.su_rNToGo == 1) {
+        this.su_z ^= 1;
+      }
+      setupRandPartC();
+    } else {
+      this.currentState = RAND_PART_A_STATE;
+      setupRandPartA();
+    }
+  }
+
+  private void setupRandPartC() throws IOException {
+    if (this.su_j2 < this.su_z) {
+      this.currentChar = this.su_ch2;
+      this.crc.updateCRC(this.su_ch2);
+      this.su_j2++;
+    } else {
+      this.currentState = RAND_PART_A_STATE;
+      this.su_i2++;
+      this.su_count = 0;
+      setupRandPartA();
+    }
+  }
+
+  private void setupNoRandPartB() throws IOException {
+    if (this.su_ch2 != this.su_chPrev) {
+      this.su_count = 1;
+      setupNoRandPartA();
+    } else if (++this.su_count >= 4) {
+      this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
+      this.su_tPos = this.data.tt[this.su_tPos];
+      this.su_j2 = 0;
+      setupNoRandPartC();
+    } else {
+      setupNoRandPartA();
+    }
+  }
+
+  private void setupNoRandPartC() throws IOException {
+    if (this.su_j2 < this.su_z) {
+      int su_ch2Shadow = this.su_ch2;
+      this.currentChar = su_ch2Shadow;
+      this.crc.updateCRC(su_ch2Shadow);
+      this.su_j2++;
+      this.currentState = NO_RAND_PART_C_STATE;
+    } else {
+      this.su_i2++;
+      this.su_count = 0;
+      setupNoRandPartA();
+    }
+  }
+
+  private static final class Data extends Object {
+
+    // (with blockSize 900k)
+    final boolean[] inUse = new boolean[256]; // 256 byte
+
+    final byte[] seqToUnseq = new byte[256]; // 256 byte
+    final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
+    final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
+
+    /**
+    * Freq table collected to save a pass over the data during
+    * decompression.
+    */
+    final int[] unzftab = new int[256]; // 1024 byte
+
+    final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
+    final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
+    final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
+    final int[] minLens = new int[N_GROUPS]; // 24 byte
+
+    final int[] cftab = new int[257]; // 1028 byte
+    final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
+    final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
+                                        // byte
+    final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
+    // ---------------
+    // 60798 byte
+
+    int[] tt; // 3600000 byte
+    byte[] ll8; // 900000 byte
+
+    // ---------------
+    // 4560782 byte
+    // ===============
+
+    Data(int blockSize100k) {
+      super();
+
+      this.ll8 = new byte[blockSize100k * BZip2Constants.baseBlockSize];
+    }
+
+    /**
+    * Initializes the {@link #tt} array.
+    *
+    * This method is called when the required length of the array is known.
+    * I don't initialize it at construction time to avoid unneccessary
+    * memory allocation when compressing small files.
+    */
+    final int[] initTT(int length) {
+      int[] ttShadow = this.tt;
+
+      // tt.length should always be >= length, but theoretically
+      // it can happen, if the compressor mixed small and large
+      // blocks. Normally only the last block will be smaller
+      // than others.
+      if ((ttShadow == null) || (ttShadow.length < length)) {
+        this.tt = ttShadow = new int[length];
+      }
+
+      return ttShadow;
+    }
+
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java b/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java
new file mode 100644
index 00000000000..392cf2c521d
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java
@@ -0,0 +1,2081 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
+ * great code.
+ */
+
+package org.apache.hadoop.io.compress.bzip2;
+
+import java.io.OutputStream;
+import java.io.IOException;
+
+/**
+ * An output stream that compresses into the BZip2 format (without the file
+ * header chars) into another stream.
+ *
+ * <p>
+ * The compression requires large amounts of memory. Thus you should call the
+ * {@link #close() close()} method as soon as possible, to force
+ * <tt>CBZip2OutputStream</tt> to release the allocated memory.
+ * </p>
+ *
+ * <p>
+ * You can shrink the amount of allocated memory and maybe raise the compression
+ * speed by choosing a lower blocksize, which in turn may cause a lower
+ * compression ratio. You can avoid unnecessary memory allocation by avoiding
+ * using a blocksize which is bigger than the size of the input.
+ * </p>
+ *
+ * <p>
+ * You can compute the memory usage for compressing by the following formula:
+ * </p>
+ *
+ * <pre>
+ * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
+ * </pre>
+ *
+ * <p>
+ * To get the memory required for decompression by {@link CBZip2InputStream
+ * CBZip2InputStream} use
+ * </p>
+ *
+ * <pre>
+ * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
+ * </pre>
+ *
+ * <table width="100%" border="1">
+ * <colgroup> <col width="33%" /> <col width="33%" /> <col width="33%" />
+ * </colgroup>
+ * <tr>
+ * <th colspan="3">Memory usage by blocksize</th>
+ * </tr>
+ * <tr>
+ * <th align="right">Blocksize</th> <th align="right">Compression<br>
+ * memory usage</th> <th align="right">Decompression<br>
+ * memory usage</th>
+ * </tr>
+ * <tr>
+ * <td align="right">100k</td>
+ * <td align="right">1300k</td>
+ * <td align="right">565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">200k</td>
+ * <td align="right">2200k</td>
+ * <td align="right">1065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">300k</td>
+ * <td align="right">3100k</td>
+ * <td align="right">1565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">400k</td>
+ * <td align="right">4000k</td>
+ * <td align="right">2065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">500k</td>
+ * <td align="right">4900k</td>
+ * <td align="right">2565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">600k</td>
+ * <td align="right">5800k</td>
+ * <td align="right">3065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">700k</td>
+ * <td align="right">6700k</td>
+ * <td align="right">3565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">800k</td>
+ * <td align="right">7600k</td>
+ * <td align="right">4065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">900k</td>
+ * <td align="right">8500k</td>
+ * <td align="right">4565k</td>
+ * </tr>
+ * </table>
+ *
+ * <p>
+ * For decompression <tt>CBZip2InputStream</tt> allocates less memory if the
+ * bzipped input is smaller than one block.
+ * </p>
+ *
+ * <p>
+ * Instances of this class are not threadsafe.
+ * </p>
+ *
+ * <p>
+ * TODO: Update to BZip2 1.0.1
+ * </p>
+ *
+ */
+public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
+
+  /**
+  * The minimum supported blocksize <tt> == 1</tt>.
+  */
+  public static final int MIN_BLOCKSIZE = 1;
+
+  /**
+  * The maximum supported blocksize <tt> == 9</tt>.
+  */
+  public static final int MAX_BLOCKSIZE = 9;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int SETMASK = (1 << 21);
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int CLEARMASK = (~SETMASK);
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int GREATER_ICOST = 15;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int LESSER_ICOST = 0;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int SMALL_THRESH = 20;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int DEPTH_THRESH = 10;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int WORK_FACTOR = 30;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  * <p>
+  * If you are ever unlucky/improbable enough to get a stack overflow whilst
+  * sorting, increase the following constant and try again. In practice I
+  * have never seen the stack go above 27 elems, so the following limit seems
+  * very generous.
+  * </p>
+  */
+  protected static final int QSORT_STACK_SIZE = 1000;
+
+  /**
+  * Knuth's increments seem to work better than Incerpi-Sedgewick here.
+  * Possibly because the number of elems to sort is usually small, typically
+  * &lt;= 20.
+  */
+  private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+      9841, 29524, 88573, 265720, 797161, 2391484 };
+
+  /**
+  * This method is accessible by subclasses for historical purposes. If you
+  * don't know what it does then you don't need it.
+  */
+  protected static void hbMakeCodeLengths(char[] len, int[] freq,
+      int alphaSize, int maxLen) {
+    /*
+    * Nodes and heap entries run from 1. Entry 0 for both the heap and
+    * nodes is a sentinel.
+    */
+    final int[] heap = new int[MAX_ALPHA_SIZE * 2];
+    final int[] weight = new int[MAX_ALPHA_SIZE * 2];
+    final int[] parent = new int[MAX_ALPHA_SIZE * 2];
+
+    for (int i = alphaSize; --i >= 0;) {
+      weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
+    }
+
+    for (boolean tooLong = true; tooLong;) {
+      tooLong = false;
+
+      int nNodes = alphaSize;
+      int nHeap = 0;
+      heap[0] = 0;
+      weight[0] = 0;
+      parent[0] = -2;
+
+      for (int i = 1; i <= alphaSize; i++) {
+        parent[i] = -1;
+        nHeap++;
+        heap[nHeap] = i;
+
+        int zz = nHeap;
+        int tmp = heap[zz];
+        while (weight[tmp] < weight[heap[zz >> 1]]) {
+          heap[zz] = heap[zz >> 1];
+          zz >>= 1;
+        }
+        heap[zz] = tmp;
+      }
+
+      // assert (nHeap < (MAX_ALPHA_SIZE + 2)) : nHeap;
+
+      while (nHeap > 1) {
+        int n1 = heap[1];
+        heap[1] = heap[nHeap];
+        nHeap--;
+
+        int yy = 0;
+        int zz = 1;
+        int tmp = heap[1];
+
+        while (true) {
+          yy = zz << 1;
+
+          if (yy > nHeap) {
+            break;
+          }
+
+          if ((yy < nHeap)
+              && (weight[heap[yy + 1]] < weight[heap[yy]])) {
+            yy++;
+          }
+
+          if (weight[tmp] < weight[heap[yy]]) {
+            break;
+          }
+
+          heap[zz] = heap[yy];
+          zz = yy;
+        }
+
+        heap[zz] = tmp;
+
+        int n2 = heap[1];
+        heap[1] = heap[nHeap];
+        nHeap--;
+
+        yy = 0;
+        zz = 1;
+        tmp = heap[1];
+
+        while (true) {
+          yy = zz << 1;
+
+          if (yy > nHeap) {
+            break;
+          }
+
+          if ((yy < nHeap)
+              && (weight[heap[yy + 1]] < weight[heap[yy]])) {
+            yy++;
+          }
+
+          if (weight[tmp] < weight[heap[yy]]) {
+            break;
+          }
+
+          heap[zz] = heap[yy];
+          zz = yy;
+        }
+
+        heap[zz] = tmp;
+        nNodes++;
+        parent[n1] = parent[n2] = nNodes;
+
+        final int weight_n1 = weight[n1];
+        final int weight_n2 = weight[n2];
+        weight[nNodes] = (((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00)) | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff)
+            : (weight_n2 & 0x000000ff))));
+
+        parent[nNodes] = -1;
+        nHeap++;
+        heap[nHeap] = nNodes;
+
+        tmp = 0;
+        zz = nHeap;
+        tmp = heap[zz];
+        final int weight_tmp = weight[tmp];
+        while (weight_tmp < weight[heap[zz >> 1]]) {
+          heap[zz] = heap[zz >> 1];
+          zz >>= 1;
+        }
+        heap[zz] = tmp;
+
+      }
+
+      // assert (nNodes < (MAX_ALPHA_SIZE * 2)) : nNodes;
+
+      for (int i = 1; i <= alphaSize; i++) {
+        int j = 0;
+        int k = i;
+
+        for (int parent_k; (parent_k = parent[k]) >= 0;) {
+          k = parent_k;
+          j++;
+        }
+
+        len[i - 1] = (char) j;
+        if (j > maxLen) {
+          tooLong = true;
+        }
+      }
+
+      if (tooLong) {
+        for (int i = 1; i < alphaSize; i++) {
+          int j = weight[i] >> 8;
+          j = 1 + (j >> 1);
+          weight[i] = j << 8;
+        }
+      }
+    }
+  }
+
+  private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
+      final Data dat, final int alphaSize, final int maxLen) {
+    /*
+    * Nodes and heap entries run from 1. Entry 0 for both the heap and
+    * nodes is a sentinel.
+    */
+    final int[] heap = dat.heap;
+    final int[] weight = dat.weight;
+    final int[] parent = dat.parent;
+
+    for (int i = alphaSize; --i >= 0;) {
+      weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
+    }
+
+    for (boolean tooLong = true; tooLong;) {
+      tooLong = false;
+
+      int nNodes = alphaSize;
+      int nHeap = 0;
+      heap[0] = 0;
+      weight[0] = 0;
+      parent[0] = -2;
+
+      for (int i = 1; i <= alphaSize; i++) {
+        parent[i] = -1;
+        nHeap++;
+        heap[nHeap] = i;
+
+        int zz = nHeap;
+        int tmp = heap[zz];
+        while (weight[tmp] < weight[heap[zz >> 1]]) {
+          heap[zz] = heap[zz >> 1];
+          zz >>= 1;
+        }
+        heap[zz] = tmp;
+      }
+
+      while (nHeap > 1) {
+        int n1 = heap[1];
+        heap[1] = heap[nHeap];
+        nHeap--;
+
+        int yy = 0;
+        int zz = 1;
+        int tmp = heap[1];
+
+        while (true) {
+          yy = zz << 1;
+
+          if (yy > nHeap) {
+            break;
+          }
+
+          if ((yy < nHeap)
+              && (weight[heap[yy + 1]] < weight[heap[yy]])) {
+            yy++;
+          }
+
+          if (weight[tmp] < weight[heap[yy]]) {
+            break;
+          }
+
+          heap[zz] = heap[yy];
+          zz = yy;
+        }
+
+        heap[zz] = tmp;
+
+        int n2 = heap[1];
+        heap[1] = heap[nHeap];
+        nHeap--;
+
+        yy = 0;
+        zz = 1;
+        tmp = heap[1];
+
+        while (true) {
+          yy = zz << 1;
+
+          if (yy > nHeap) {
+            break;
+          }
+
+          if ((yy < nHeap)
+              && (weight[heap[yy + 1]] < weight[heap[yy]])) {
+            yy++;
+          }
+
+          if (weight[tmp] < weight[heap[yy]]) {
+            break;
+          }
+
+          heap[zz] = heap[yy];
+          zz = yy;
+        }
+
+        heap[zz] = tmp;
+        nNodes++;
+        parent[n1] = parent[n2] = nNodes;
+
+        final int weight_n1 = weight[n1];
+        final int weight_n2 = weight[n2];
+        weight[nNodes] = ((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00))
+            | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff)
+                : (weight_n2 & 0x000000ff)));
+
+        parent[nNodes] = -1;
+        nHeap++;
+        heap[nHeap] = nNodes;
+
+        tmp = 0;
+        zz = nHeap;
+        tmp = heap[zz];
+        final int weight_tmp = weight[tmp];
+        while (weight_tmp < weight[heap[zz >> 1]]) {
+          heap[zz] = heap[zz >> 1];
+          zz >>= 1;
+        }
+        heap[zz] = tmp;
+
+      }
+
+      for (int i = 1; i <= alphaSize; i++) {
+        int j = 0;
+        int k = i;
+
+        for (int parent_k; (parent_k = parent[k]) >= 0;) {
+          k = parent_k;
+          j++;
+        }
+
+        len[i - 1] = (byte) j;
+        if (j > maxLen) {
+          tooLong = true;
+        }
+      }
+
+      if (tooLong) {
+        for (int i = 1; i < alphaSize; i++) {
+          int j = weight[i] >> 8;
+          j = 1 + (j >> 1);
+          weight[i] = j << 8;
+        }
+      }
+    }
+  }
+
+  /**
+  * Index of the last char in the block, so the block size == last + 1.
+  */
+  private int last;
+
+  /**
+  * Index in fmap[] of original string after sorting.
+  */
+  private int origPtr;
+
+  /**
+  * Always: in the range 0 .. 9. The current block size is 100000 * this
+  * number.
+  */
+  private final int blockSize100k;
+
+  private boolean blockRandomised;
+
+  private int bsBuff;
+  private int bsLive;
+  private final CRC crc = new CRC();
+
+  private int nInUse;
+
+  private int nMTF;
+
+  /*
+  * Used when sorting. If too many long comparisons happen, we stop sorting,
+  * randomise the block slightly, and try again.
+  */
+  private int workDone;
+  private int workLimit;
+  private boolean firstAttempt;
+
+  private int currentChar = -1;
+  private int runLength = 0;
+
+  private int blockCRC;
+  private int combinedCRC;
+  private int allowableBlockSize;
+
+  /**
+  * All memory intensive stuff.
+  */
+  private CBZip2OutputStream.Data data;
+
+  private OutputStream out;
+
+  /**
+  * Chooses a blocksize based on the given length of the data to compress.
+  *
+  * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
+  *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
+  *         <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt>
+  *         always.
+  *
+  * @param inputLength
+  *            The length of the data which will be compressed by
+  *            <tt>CBZip2OutputStream</tt>.
+  */
+  public static int chooseBlockSize(long inputLength) {
+    return (inputLength > 0) ? (int) Math
+        .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
+  }
+
+  /**
+  * Constructs a new <tt>CBZip2OutputStream</tt> with a blocksize of 900k.
+  *
+  * <p>
+  * <b>Attention: </b>The caller is resonsible to write the two BZip2 magic
+  * bytes <tt>"BZ"</tt> to the specified stream prior to calling this
+  * constructor.
+  * </p>
+  *
+  * @param out *
+  *            the destination stream.
+  *
+  * @throws IOException
+  *             if an I/O error occurs in the specified stream.
+  * @throws NullPointerException
+  *             if <code>out == null</code>.
+  */
+  public CBZip2OutputStream(final OutputStream out) throws IOException {
+    this(out, MAX_BLOCKSIZE);
+  }
+
+  /**
+  * Constructs a new <tt>CBZip2OutputStream</tt> with specified blocksize.
+  *
+  * <p>
+  * <b>Attention: </b>The caller is resonsible to write the two BZip2 magic
+  * bytes <tt>"BZ"</tt> to the specified stream prior to calling this
+  * constructor.
+  * </p>
+  *
+  *
+  * @param out
+  *            the destination stream.
+  * @param blockSize
+  *            the blockSize as 100k units.
+  *
+  * @throws IOException
+  *             if an I/O error occurs in the specified stream.
+  * @throws IllegalArgumentException
+  *             if <code>(blockSize < 1) || (blockSize > 9)</code>.
+  * @throws NullPointerException
+  *             if <code>out == null</code>.
+  *
+  * @see #MIN_BLOCKSIZE
+  * @see #MAX_BLOCKSIZE
+  */
+  public CBZip2OutputStream(final OutputStream out, final int blockSize)
+      throws IOException {
+    super();
+
+    if (blockSize < 1) {
+      throw new IllegalArgumentException("blockSize(" + blockSize
+          + ") < 1");
+    }
+    if (blockSize > 9) {
+      throw new IllegalArgumentException("blockSize(" + blockSize
+          + ") > 9");
+    }
+
+    this.blockSize100k = blockSize;
+    this.out = out;
+    init();
+  }
+
+  public void write(final int b) throws IOException {
+    if (this.out != null) {
+      write0(b);
+    } else {
+      throw new IOException("closed");
+    }
+  }
+
+  private void writeRun() throws IOException {
+    final int lastShadow = this.last;
+
+    if (lastShadow < this.allowableBlockSize) {
+      final int currentCharShadow = this.currentChar;
+      final Data dataShadow = this.data;
+      dataShadow.inUse[currentCharShadow] = true;
+      final byte ch = (byte) currentCharShadow;
+
+      int runLengthShadow = this.runLength;
+      this.crc.updateCRC(currentCharShadow, runLengthShadow);
+
+      switch (runLengthShadow) {
+      case 1:
+        dataShadow.block[lastShadow + 2] = ch;
+        this.last = lastShadow + 1;
+        break;
+
+      case 2:
+        dataShadow.block[lastShadow + 2] = ch;
+        dataShadow.block[lastShadow + 3] = ch;
+        this.last = lastShadow + 2;
+        break;
+
+      case 3: {
+        final byte[] block = dataShadow.block;
+        block[lastShadow + 2] = ch;
+        block[lastShadow + 3] = ch;
+        block[lastShadow + 4] = ch;
+        this.last = lastShadow + 3;
+      }
+        break;
+
+      default: {
+        runLengthShadow -= 4;
+        dataShadow.inUse[runLengthShadow] = true;
+        final byte[] block = dataShadow.block;
+        block[lastShadow + 2] = ch;
+        block[lastShadow + 3] = ch;
+        block[lastShadow + 4] = ch;
+        block[lastShadow + 5] = ch;
+        block[lastShadow + 6] = (byte) runLengthShadow;
+        this.last = lastShadow + 5;
+      }
+        break;
+
+      }
+    } else {
+      endBlock();
+      initBlock();
+      writeRun();
+    }
+  }
+
+  /**
+  * Overriden to close the stream.
+  */
+  protected void finalize() throws Throwable {
+    finish();
+    super.finalize();
+  }
+
+  
+  public void finish() throws IOException {
+    if (out != null) {
+      try {
+        if (this.runLength > 0) {
+          writeRun();
+        }
+        this.currentChar = -1;
+        endBlock();
+        endCompression();
+      } finally {
+        this.out = null;
+        this.data = null;
+      }
+    }
+  }
+
+  public void close() throws IOException {
+    if (out != null) {
+      OutputStream outShadow = this.out;
+      finish();
+      outShadow.close();
+    }
+  }
+  
+  public void flush() throws IOException {
+    OutputStream outShadow = this.out;
+    if (outShadow != null) {
+      outShadow.flush();
+    }
+  }
+
+  private void init() throws IOException {
+    // write magic: done by caller who created this stream
+    // this.out.write('B');
+    // this.out.write('Z');
+
+    this.data = new Data(this.blockSize100k);
+
+    /*
+    * Write `magic' bytes h indicating file-format == huffmanised, followed
+    * by a digit indicating blockSize100k.
+    */
+    bsPutUByte('h');
+    bsPutUByte('0' + this.blockSize100k);
+
+    this.combinedCRC = 0;
+    initBlock();
+  }
+
+  private void initBlock() {
+    // blockNo++;
+    this.crc.initialiseCRC();
+    this.last = -1;
+    // ch = 0;
+
+    boolean[] inUse = this.data.inUse;
+    for (int i = 256; --i >= 0;) {
+      inUse[i] = false;
+    }
+
+    /* 20 is just a paranoia constant */
+    this.allowableBlockSize = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20;
+  }
+
+  private void endBlock() throws IOException {
+    this.blockCRC = this.crc.getFinalCRC();
+    this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
+    this.combinedCRC ^= this.blockCRC;
+
+    // empty block at end of file
+    if (this.last == -1) {
+      return;
+    }
+
+    /* sort the block and establish posn of original string */
+    blockSort();
+
+    /*
+    * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
+    * :-). A 32 bit value does not really give a strong enough guarantee
+    * that the value will not appear by chance in the compressed
+    * datastream. Worst-case probability of this event, for a 900k block,
+    * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
+    * bits. For a compressed file of size 100Gb -- about 100000 blocks --
+    * only a 48-bit marker will do. NB: normal compression/ decompression
+    * donot rely on these statistical properties. They are only important
+    * when trying to recover blocks from damaged files.
+    */
+    bsPutUByte(0x31);
+    bsPutUByte(0x41);
+    bsPutUByte(0x59);
+    bsPutUByte(0x26);
+    bsPutUByte(0x53);
+    bsPutUByte(0x59);
+
+    /* Now the block's CRC, so it is in a known place. */
+    bsPutInt(this.blockCRC);
+
+    /* Now a single bit indicating randomisation. */
+    if (this.blockRandomised) {
+      bsW(1, 1);
+    } else {
+      bsW(1, 0);
+    }
+
+    /* Finally, block's contents proper. */
+    moveToFrontCodeAndSend();
+  }
+
+  private void endCompression() throws IOException {
+    /*
+    * Now another magic 48-bit number, 0x177245385090, to indicate the end
+    * of the last block. (sqrt(pi), if you want to know. I did want to use
+    * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
+    * to feel statistically comfortable. Call me paranoid.)
+    */
+    bsPutUByte(0x17);
+    bsPutUByte(0x72);
+    bsPutUByte(0x45);
+    bsPutUByte(0x38);
+    bsPutUByte(0x50);
+    bsPutUByte(0x90);
+
+    bsPutInt(this.combinedCRC);
+    bsFinishedWithStream();
+  }
+
+  /**
+  * Returns the blocksize parameter specified at construction time.
+  */
+  public final int getBlockSize() {
+    return this.blockSize100k;
+  }
+
+  public void write(final byte[] buf, int offs, final int len)
+      throws IOException {
+    if (offs < 0) {
+      throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
+    }
+    if (len < 0) {
+      throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
+    }
+    if (offs + len > buf.length) {
+      throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
+          + len + ") > buf.length(" + buf.length + ").");
+    }
+    if (this.out == null) {
+      throw new IOException("stream closed");
+    }
+
+    for (int hi = offs + len; offs < hi;) {
+      write0(buf[offs++]);
+    }
+  }
+
+  private void write0(int b) throws IOException {
+    if (this.currentChar != -1) {
+      b &= 0xff;
+      if (this.currentChar == b) {
+        if (++this.runLength > 254) {
+          writeRun();
+          this.currentChar = -1;
+          this.runLength = 0;
+        }
+        // else nothing to do
+      } else {
+        writeRun();
+        this.runLength = 1;
+        this.currentChar = b;
+      }
+    } else {
+      this.currentChar = b & 0xff;
+      this.runLength++;
+    }
+  }
+
+  private static void hbAssignCodes(final int[] code, final byte[] length,
+      final int minLen, final int maxLen, final int alphaSize) {
+    int vec = 0;
+    for (int n = minLen; n <= maxLen; n++) {
+      for (int i = 0; i < alphaSize; i++) {
+        if ((length[i] & 0xff) == n) {
+          code[i] = vec;
+          vec++;
+        }
+      }
+      vec <<= 1;
+    }
+  }
+
+  private void bsFinishedWithStream() throws IOException {
+    while (this.bsLive > 0) {
+      int ch = this.bsBuff >> 24;
+      this.out.write(ch); // write 8-bit
+      this.bsBuff <<= 8;
+      this.bsLive -= 8;
+    }
+  }
+
+  private void bsW(final int n, final int v) throws IOException {
+    final OutputStream outShadow = this.out;
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    while (bsLiveShadow >= 8) {
+      outShadow.write(bsBuffShadow >> 24); // write 8-bit
+      bsBuffShadow <<= 8;
+      bsLiveShadow -= 8;
+    }
+
+    this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
+    this.bsLive = bsLiveShadow + n;
+  }
+
+  private void bsPutUByte(final int c) throws IOException {
+    bsW(8, c);
+  }
+
+  private void bsPutInt(final int u) throws IOException {
+    bsW(8, (u >> 24) & 0xff);
+    bsW(8, (u >> 16) & 0xff);
+    bsW(8, (u >> 8) & 0xff);
+    bsW(8, u & 0xff);
+  }
+
+  private void sendMTFValues() throws IOException {
+    final byte[][] len = this.data.sendMTFValues_len;
+    final int alphaSize = this.nInUse + 2;
+
+    for (int t = N_GROUPS; --t >= 0;) {
+      byte[] len_t = len[t];
+      for (int v = alphaSize; --v >= 0;) {
+        len_t[v] = GREATER_ICOST;
+      }
+    }
+
+    /* Decide how many coding tables to use */
+    // assert (this.nMTF > 0) : this.nMTF;
+    final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
+        : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
+
+    /* Generate an initial set of coding tables */
+    sendMTFValues0(nGroups, alphaSize);
+
+    /*
+    * Iterate up to N_ITERS times to improve the tables.
+    */
+    final int nSelectors = sendMTFValues1(nGroups, alphaSize);
+
+    /* Compute MTF values for the selectors. */
+    sendMTFValues2(nGroups, nSelectors);
+
+    /* Assign actual codes for the tables. */
+    sendMTFValues3(nGroups, alphaSize);
+
+    /* Transmit the mapping table. */
+    sendMTFValues4();
+
+    /* Now the selectors. */
+    sendMTFValues5(nGroups, nSelectors);
+
+    /* Now the coding tables. */
+    sendMTFValues6(nGroups, alphaSize);
+
+    /* And finally, the block data proper */
+    sendMTFValues7(nSelectors);
+  }
+
+  private void sendMTFValues0(final int nGroups, final int alphaSize) {
+    final byte[][] len = this.data.sendMTFValues_len;
+    final int[] mtfFreq = this.data.mtfFreq;
+
+    int remF = this.nMTF;
+    int gs = 0;
+
+    for (int nPart = nGroups; nPart > 0; nPart--) {
+      final int tFreq = remF / nPart;
+      int ge = gs - 1;
+      int aFreq = 0;
+
+      for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
+        aFreq += mtfFreq[++ge];
+      }
+
+      if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
+          && (((nGroups - nPart) & 1) != 0)) {
+        aFreq -= mtfFreq[ge--];
+      }
+
+      final byte[] len_np = len[nPart - 1];
+      for (int v = alphaSize; --v >= 0;) {
+        if ((v >= gs) && (v <= ge)) {
+          len_np[v] = LESSER_ICOST;
+        } else {
+          len_np[v] = GREATER_ICOST;
+        }
+      }
+
+      gs = ge + 1;
+      remF -= aFreq;
+    }
+  }
+
+  private int sendMTFValues1(final int nGroups, final int alphaSize) {
+    final Data dataShadow = this.data;
+    final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
+    final int[] fave = dataShadow.sendMTFValues_fave;
+    final short[] cost = dataShadow.sendMTFValues_cost;
+    final char[] sfmap = dataShadow.sfmap;
+    final byte[] selector = dataShadow.selector;
+    final byte[][] len = dataShadow.sendMTFValues_len;
+    final byte[] len_0 = len[0];
+    final byte[] len_1 = len[1];
+    final byte[] len_2 = len[2];
+    final byte[] len_3 = len[3];
+    final byte[] len_4 = len[4];
+    final byte[] len_5 = len[5];
+    final int nMTFShadow = this.nMTF;
+
+    int nSelectors = 0;
+
+    for (int iter = 0; iter < N_ITERS; iter++) {
+      for (int t = nGroups; --t >= 0;) {
+        fave[t] = 0;
+        int[] rfreqt = rfreq[t];
+        for (int i = alphaSize; --i >= 0;) {
+          rfreqt[i] = 0;
+        }
+      }
+
+      nSelectors = 0;
+
+      for (int gs = 0; gs < this.nMTF;) {
+        /* Set group start & end marks. */
+
+        /*
+        * Calculate the cost of this group as coded by each of the
+        * coding tables.
+        */
+
+        final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
+
+        if (nGroups == N_GROUPS) {
+          // unrolled version of the else-block
+
+          short cost0 = 0;
+          short cost1 = 0;
+          short cost2 = 0;
+          short cost3 = 0;
+          short cost4 = 0;
+          short cost5 = 0;
+
+          for (int i = gs; i <= ge; i++) {
+            final int icv = sfmap[i];
+            cost0 += len_0[icv] & 0xff;
+            cost1 += len_1[icv] & 0xff;
+            cost2 += len_2[icv] & 0xff;
+            cost3 += len_3[icv] & 0xff;
+            cost4 += len_4[icv] & 0xff;
+            cost5 += len_5[icv] & 0xff;
+          }
+
+          cost[0] = cost0;
+          cost[1] = cost1;
+          cost[2] = cost2;
+          cost[3] = cost3;
+          cost[4] = cost4;
+          cost[5] = cost5;
+
+        } else {
+          for (int t = nGroups; --t >= 0;) {
+            cost[t] = 0;
+          }
+
+          for (int i = gs; i <= ge; i++) {
+            final int icv = sfmap[i];
+            for (int t = nGroups; --t >= 0;) {
+              cost[t] += len[t][icv] & 0xff;
+            }
+          }
+        }
+
+        /*
+        * Find the coding table which is best for this group, and
+        * record its identity in the selector table.
+        */
+        int bt = -1;
+        for (int t = nGroups, bc = 999999999; --t >= 0;) {
+          final int cost_t = cost[t];
+          if (cost_t < bc) {
+            bc = cost_t;
+            bt = t;
+          }
+        }
+
+        fave[bt]++;
+        selector[nSelectors] = (byte) bt;
+        nSelectors++;
+
+        /*
+        * Increment the symbol frequencies for the selected table.
+        */
+        final int[] rfreq_bt = rfreq[bt];
+        for (int i = gs; i <= ge; i++) {
+          rfreq_bt[sfmap[i]]++;
+        }
+
+        gs = ge + 1;
+      }
+
+      /*
+      * Recompute the tables based on the accumulated frequencies.
+      */
+      for (int t = 0; t < nGroups; t++) {
+        hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
+      }
+    }
+
+    return nSelectors;
+  }
+
+  private void sendMTFValues2(final int nGroups, final int nSelectors) {
+    // assert (nGroups < 8) : nGroups;
+
+    final Data dataShadow = this.data;
+    byte[] pos = dataShadow.sendMTFValues2_pos;
+
+    for (int i = nGroups; --i >= 0;) {
+      pos[i] = (byte) i;
+    }
+
+    for (int i = 0; i < nSelectors; i++) {
+      final byte ll_i = dataShadow.selector[i];
+      byte tmp = pos[0];
+      int j = 0;
+
+      while (ll_i != tmp) {
+        j++;
+        byte tmp2 = tmp;
+        tmp = pos[j];
+        pos[j] = tmp2;
+      }
+
+      pos[0] = tmp;
+      dataShadow.selectorMtf[i] = (byte) j;
+    }
+  }
+
+  private void sendMTFValues3(final int nGroups, final int alphaSize) {
+    int[][] code = this.data.sendMTFValues_code;
+    byte[][] len = this.data.sendMTFValues_len;
+
+    for (int t = 0; t < nGroups; t++) {
+      int minLen = 32;
+      int maxLen = 0;
+      final byte[] len_t = len[t];
+      for (int i = alphaSize; --i >= 0;) {
+        final int l = len_t[i] & 0xff;
+        if (l > maxLen) {
+          maxLen = l;
+        }
+        if (l < minLen) {
+          minLen = l;
+        }
+      }
+
+      // assert (maxLen <= 20) : maxLen;
+      // assert (minLen >= 1) : minLen;
+
+      hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
+    }
+  }
+
+  private void sendMTFValues4() throws IOException {
+    final boolean[] inUse = this.data.inUse;
+    final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
+
+    for (int i = 16; --i >= 0;) {
+      inUse16[i] = false;
+      final int i16 = i * 16;
+      for (int j = 16; --j >= 0;) {
+        if (inUse[i16 + j]) {
+          inUse16[i] = true;
+        }
+      }
+    }
+
+    for (int i = 0; i < 16; i++) {
+      bsW(1, inUse16[i] ? 1 : 0);
+    }
+
+    final OutputStream outShadow = this.out;
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    for (int i = 0; i < 16; i++) {
+      if (inUse16[i]) {
+        final int i16 = i * 16;
+        for (int j = 0; j < 16; j++) {
+          // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
+          while (bsLiveShadow >= 8) {
+            outShadow.write(bsBuffShadow >> 24); // write 8-bit
+            bsBuffShadow <<= 8;
+            bsLiveShadow -= 8;
+          }
+          if (inUse[i16 + j]) {
+            bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
+          }
+          bsLiveShadow++;
+        }
+      }
+    }
+
+    this.bsBuff = bsBuffShadow;
+    this.bsLive = bsLiveShadow;
+  }
+
+  private void sendMTFValues5(final int nGroups, final int nSelectors)
+      throws IOException {
+    bsW(3, nGroups);
+    bsW(15, nSelectors);
+
+    final OutputStream outShadow = this.out;
+    final byte[] selectorMtf = this.data.selectorMtf;
+
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    for (int i = 0; i < nSelectors; i++) {
+      for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
+        // inlined: bsW(1, 1);
+        while (bsLiveShadow >= 8) {
+          outShadow.write(bsBuffShadow >> 24);
+          bsBuffShadow <<= 8;
+          bsLiveShadow -= 8;
+        }
+        bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
+        bsLiveShadow++;
+      }
+
+      // inlined: bsW(1, 0);
+      while (bsLiveShadow >= 8) {
+        outShadow.write(bsBuffShadow >> 24);
+        bsBuffShadow <<= 8;
+        bsLiveShadow -= 8;
+      }
+      // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
+      bsLiveShadow++;
+    }
+
+    this.bsBuff = bsBuffShadow;
+    this.bsLive = bsLiveShadow;
+  }
+
+  private void sendMTFValues6(final int nGroups, final int alphaSize)
+      throws IOException {
+    final byte[][] len = this.data.sendMTFValues_len;
+    final OutputStream outShadow = this.out;
+
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    for (int t = 0; t < nGroups; t++) {
+      byte[] len_t = len[t];
+      int curr = len_t[0] & 0xff;
+
+      // inlined: bsW(5, curr);
+      while (bsLiveShadow >= 8) {
+        outShadow.write(bsBuffShadow >> 24); // write 8-bit
+        bsBuffShadow <<= 8;
+        bsLiveShadow -= 8;
+      }
+      bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
+      bsLiveShadow += 5;
+
+      for (int i = 0; i < alphaSize; i++) {
+        int lti = len_t[i] & 0xff;
+        while (curr < lti) {
+          // inlined: bsW(2, 2);
+          while (bsLiveShadow >= 8) {
+            outShadow.write(bsBuffShadow >> 24); // write 8-bit
+            bsBuffShadow <<= 8;
+            bsLiveShadow -= 8;
+          }
+          bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
+          bsLiveShadow += 2;
+
+          curr++; /* 10 */
+        }
+
+        while (curr > lti) {
+          // inlined: bsW(2, 3);
+          while (bsLiveShadow >= 8) {
+            outShadow.write(bsBuffShadow >> 24); // write 8-bit
+            bsBuffShadow <<= 8;
+            bsLiveShadow -= 8;
+          }
+          bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
+          bsLiveShadow += 2;
+
+          curr--; /* 11 */
+        }
+
+        // inlined: bsW(1, 0);
+        while (bsLiveShadow >= 8) {
+          outShadow.write(bsBuffShadow >> 24); // write 8-bit
+          bsBuffShadow <<= 8;
+          bsLiveShadow -= 8;
+        }
+        // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
+        bsLiveShadow++;
+      }
+    }
+
+    this.bsBuff = bsBuffShadow;
+    this.bsLive = bsLiveShadow;
+  }
+
+  private void sendMTFValues7(final int nSelectors) throws IOException {
+    final Data dataShadow = this.data;
+    final byte[][] len = dataShadow.sendMTFValues_len;
+    final int[][] code = dataShadow.sendMTFValues_code;
+    final OutputStream outShadow = this.out;
+    final byte[] selector = dataShadow.selector;
+    final char[] sfmap = dataShadow.sfmap;
+    final int nMTFShadow = this.nMTF;
+
+    int selCtr = 0;
+
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    for (int gs = 0; gs < nMTFShadow;) {
+      final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
+      final int selector_selCtr = selector[selCtr] & 0xff;
+      final int[] code_selCtr = code[selector_selCtr];
+      final byte[] len_selCtr = len[selector_selCtr];
+
+      while (gs <= ge) {
+        final int sfmap_i = sfmap[gs];
+
+        //
+        // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
+        // code_selCtr[sfmap_i]);
+        //
+        while (bsLiveShadow >= 8) {
+          outShadow.write(bsBuffShadow >> 24);
+          bsBuffShadow <<= 8;
+          bsLiveShadow -= 8;
+        }
+        final int n = len_selCtr[sfmap_i] & 0xFF;
+        bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
+        bsLiveShadow += n;
+
+        gs++;
+      }
+
+      gs = ge + 1;
+      selCtr++;
+    }
+
+    this.bsBuff = bsBuffShadow;
+    this.bsLive = bsLiveShadow;
+  }
+
+  private void moveToFrontCodeAndSend() throws IOException {
+    bsW(24, this.origPtr);
+    generateMTFValues();
+    sendMTFValues();
+  }
+
+  /**
+  * This is the most hammered method of this class.
+  *
+  * <p>
+  * This is the version using unrolled loops. Normally I never use such ones
+  * in Java code. The unrolling has shown a noticable performance improvement
+  * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the
+  * JIT compiler of the vm.
+  * </p>
+  */
+  private boolean mainSimpleSort(final Data dataShadow, final int lo,
+      final int hi, final int d) {
+    final int bigN = hi - lo + 1;
+    if (bigN < 2) {
+      return this.firstAttempt && (this.workDone > this.workLimit);
+    }
+
+    int hp = 0;
+    while (INCS[hp] < bigN) {
+      hp++;
+    }
+
+    final int[] fmap = dataShadow.fmap;
+    final char[] quadrant = dataShadow.quadrant;
+    final byte[] block = dataShadow.block;
+    final int lastShadow = this.last;
+    final int lastPlus1 = lastShadow + 1;
+    final boolean firstAttemptShadow = this.firstAttempt;
+    final int workLimitShadow = this.workLimit;
+    int workDoneShadow = this.workDone;
+
+    // Following block contains unrolled code which could be shortened by
+    // coding it in additional loops.
+
+    HP: while (--hp >= 0) {
+      final int h = INCS[hp];
+      final int mj = lo + h - 1;
+
+      for (int i = lo + h; i <= hi;) {
+        // copy
+        for (int k = 3; (i <= hi) && (--k >= 0); i++) {
+          final int v = fmap[i];
+          final int vd = v + d;
+          int j = i;
+
+          // for (int a;
+          // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd,
+          // block, quadrant, lastShadow);
+          // j -= h) {
+          // fmap[j] = a;
+          // }
+          //
+          // unrolled version:
+
+          // start inline mainGTU
+          boolean onceRunned = false;
+          int a = 0;
+
+          HAMMER: while (true) {
+            if (onceRunned) {
+              fmap[j] = a;
+              if ((j -= h) <= mj) {
+                break HAMMER;
+              }
+            } else {
+              onceRunned = true;
+            }
+
+            a = fmap[j - h];
+            int i1 = a + d;
+            int i2 = vd;
+
+            // following could be done in a loop, but
+            // unrolled it for performance:
+            if (block[i1 + 1] == block[i2 + 1]) {
+              if (block[i1 + 2] == block[i2 + 2]) {
+                if (block[i1 + 3] == block[i2 + 3]) {
+                  if (block[i1 + 4] == block[i2 + 4]) {
+                    if (block[i1 + 5] == block[i2 + 5]) {
+                      if (block[(i1 += 6)] == block[(i2 += 6)]) {
+                        int x = lastShadow;
+                        X: while (x > 0) {
+                          x -= 4;
+
+                          if (block[i1 + 1] == block[i2 + 1]) {
+                            if (quadrant[i1] == quadrant[i2]) {
+                              if (block[i1 + 2] == block[i2 + 2]) {
+                                if (quadrant[i1 + 1] == quadrant[i2 + 1]) {
+                                  if (block[i1 + 3] == block[i2 + 3]) {
+                                    if (quadrant[i1 + 2] == quadrant[i2 + 2]) {
+                                      if (block[i1 + 4] == block[i2 + 4]) {
+                                        if (quadrant[i1 + 3] == quadrant[i2 + 3]) {
+                                          if ((i1 += 4) >= lastPlus1) {
+                                            i1 -= lastPlus1;
+                                          }
+                                          if ((i2 += 4) >= lastPlus1) {
+                                            i2 -= lastPlus1;
+                                          }
+                                          workDoneShadow++;
+                                          continue X;
+                                        } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) {
+                                          continue HAMMER;
+                                        } else {
+                                          break HAMMER;
+                                        }
+                                      } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
+                                        continue HAMMER;
+                                      } else {
+                                        break HAMMER;
+                                      }
+                                    } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) {
+                                      continue HAMMER;
+                                    } else {
+                                      break HAMMER;
+                                    }
+                                  } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
+                                    continue HAMMER;
+                                  } else {
+                                    break HAMMER;
+                                  }
+                                } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) {
+                                  continue HAMMER;
+                                } else {
+                                  break HAMMER;
+                                }
+                              } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
+                                continue HAMMER;
+                              } else {
+                                break HAMMER;
+                              }
+                            } else if ((quadrant[i1] > quadrant[i2])) {
+                              continue HAMMER;
+                            } else {
+                              break HAMMER;
+                            }
+                          } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
+                            continue HAMMER;
+                          } else {
+                            break HAMMER;
+                          }
+
+                        }
+                        break HAMMER;
+                      } // while x > 0
+                      else {
+                        if ((block[i1] & 0xff) > (block[i2] & 0xff)) {
+                          continue HAMMER;
+                        } else {
+                          break HAMMER;
+                        }
+                      }
+                    } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) {
+                      continue HAMMER;
+                    } else {
+                      break HAMMER;
+                    }
+                  } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
+                    continue HAMMER;
+                  } else {
+                    break HAMMER;
+                  }
+                } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
+                  continue HAMMER;
+                } else {
+                  break HAMMER;
+                }
+              } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
+                continue HAMMER;
+              } else {
+                break HAMMER;
+              }
+            } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
+              continue HAMMER;
+            } else {
+              break HAMMER;
+            }
+
+          } // HAMMER
+          // end inline mainGTU
+
+          fmap[j] = v;
+        }
+
+        if (firstAttemptShadow && (i <= hi)
+            && (workDoneShadow > workLimitShadow)) {
+          break HP;
+        }
+      }
+    }
+
+    this.workDone = workDoneShadow;
+    return firstAttemptShadow && (workDoneShadow > workLimitShadow);
+  }
+
+  private static void vswap(int[] fmap, int p1, int p2, int n) {
+    n += p1;
+    while (p1 < n) {
+      int t = fmap[p1];
+      fmap[p1++] = fmap[p2];
+      fmap[p2++] = t;
+    }
+  }
+
+  private static byte med3(byte a, byte b, byte c) {
+    return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c
+        : a);
+  }
+
+  private void blockSort() {
+    this.workLimit = WORK_FACTOR * this.last;
+    this.workDone = 0;
+    this.blockRandomised = false;
+    this.firstAttempt = true;
+    mainSort();
+
+    if (this.firstAttempt && (this.workDone > this.workLimit)) {
+      randomiseBlock();
+      this.workLimit = this.workDone = 0;
+      this.firstAttempt = false;
+      mainSort();
+    }
+
+    int[] fmap = this.data.fmap;
+    this.origPtr = -1;
+    for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
+      if (fmap[i] == 0) {
+        this.origPtr = i;
+        break;
+      }
+    }
+
+    // assert (this.origPtr != -1) : this.origPtr;
+  }
+
+  /**
+  * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
+  */
+  private void mainQSort3(final Data dataShadow, final int loSt,
+      final int hiSt, final int dSt) {
+    final int[] stack_ll = dataShadow.stack_ll;
+    final int[] stack_hh = dataShadow.stack_hh;
+    final int[] stack_dd = dataShadow.stack_dd;
+    final int[] fmap = dataShadow.fmap;
+    final byte[] block = dataShadow.block;
+
+    stack_ll[0] = loSt;
+    stack_hh[0] = hiSt;
+    stack_dd[0] = dSt;
+
+    for (int sp = 1; --sp >= 0;) {
+      final int lo = stack_ll[sp];
+      final int hi = stack_hh[sp];
+      final int d = stack_dd[sp];
+
+      if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) {
+        if (mainSimpleSort(dataShadow, lo, hi, d)) {
+          return;
+        }
+      } else {
+        final int d1 = d + 1;
+        final int med = med3(block[fmap[lo] + d1],
+            block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff;
+
+        int unLo = lo;
+        int unHi = hi;
+        int ltLo = lo;
+        int gtHi = hi;
+
+        while (true) {
+          while (unLo <= unHi) {
+            final int n = ((int) block[fmap[unLo] + d1] & 0xff)
+                - med;
+            if (n == 0) {
+              final int temp = fmap[unLo];
+              fmap[unLo++] = fmap[ltLo];
+              fmap[ltLo++] = temp;
+            } else if (n < 0) {
+              unLo++;
+            } else {
+              break;
+            }
+          }
+
+          while (unLo <= unHi) {
+            final int n = ((int) block[fmap[unHi] + d1] & 0xff)
+                - med;
+            if (n == 0) {
+              final int temp = fmap[unHi];
+              fmap[unHi--] = fmap[gtHi];
+              fmap[gtHi--] = temp;
+            } else if (n > 0) {
+              unHi--;
+            } else {
+              break;
+            }
+          }
+
+          if (unLo <= unHi) {
+            final int temp = fmap[unLo];
+            fmap[unLo++] = fmap[unHi];
+            fmap[unHi--] = temp;
+          } else {
+            break;
+          }
+        }
+
+        if (gtHi < ltLo) {
+          stack_ll[sp] = lo;
+          stack_hh[sp] = hi;
+          stack_dd[sp] = d1;
+          sp++;
+        } else {
+          int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo)
+              : (unLo - ltLo);
+          vswap(fmap, lo, unLo - n, n);
+          int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi)
+              : (gtHi - unHi);
+          vswap(fmap, unLo, hi - m + 1, m);
+
+          n = lo + unLo - ltLo - 1;
+          m = hi - (gtHi - unHi) + 1;
+
+          stack_ll[sp] = lo;
+          stack_hh[sp] = n;
+          stack_dd[sp] = d;
+          sp++;
+
+          stack_ll[sp] = n + 1;
+          stack_hh[sp] = m - 1;
+          stack_dd[sp] = d1;
+          sp++;
+
+          stack_ll[sp] = m;
+          stack_hh[sp] = hi;
+          stack_dd[sp] = d;
+          sp++;
+        }
+      }
+    }
+  }
+
+  private void mainSort() {
+    final Data dataShadow = this.data;
+    final int[] runningOrder = dataShadow.mainSort_runningOrder;
+    final int[] copy = dataShadow.mainSort_copy;
+    final boolean[] bigDone = dataShadow.mainSort_bigDone;
+    final int[] ftab = dataShadow.ftab;
+    final byte[] block = dataShadow.block;
+    final int[] fmap = dataShadow.fmap;
+    final char[] quadrant = dataShadow.quadrant;
+    final int lastShadow = this.last;
+    final int workLimitShadow = this.workLimit;
+    final boolean firstAttemptShadow = this.firstAttempt;
+
+    // Set up the 2-byte frequency table
+    for (int i = 65537; --i >= 0;) {
+      ftab[i] = 0;
+    }
+
+    /*
+    * In the various block-sized structures, live data runs from 0 to
+    * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area
+    * for block.
+    */
+    for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) {
+      block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1];
+    }
+    for (int i = lastShadow + NUM_OVERSHOOT_BYTES +1; --i >= 0;) {
+      quadrant[i] = 0;
+    }
+    block[0] = block[lastShadow + 1];
+
+    // Complete the initial radix sort:
+
+    int c1 = block[0] & 0xff;
+    for (int i = 0; i <= lastShadow; i++) {
+      final int c2 = block[i + 1] & 0xff;
+      ftab[(c1 << 8) + c2]++;
+      c1 = c2;
+    }
+
+    for (int i = 1; i <= 65536; i++)
+      ftab[i] += ftab[i - 1];
+
+    c1 = block[1] & 0xff;
+    for (int i = 0; i < lastShadow; i++) {
+      final int c2 = block[i + 2] & 0xff;
+      fmap[--ftab[(c1 << 8) + c2]] = i;
+      c1 = c2;
+    }
+
+    fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow;
+
+    /*
+    * Now ftab contains the first loc of every small bucket. Calculate the
+    * running order, from smallest to largest big bucket.
+    */
+    for (int i = 256; --i >= 0;) {
+      bigDone[i] = false;
+      runningOrder[i] = i;
+    }
+
+    for (int h = 364; h != 1;) {
+      h /= 3;
+      for (int i = h; i <= 255; i++) {
+        final int vv = runningOrder[i];
+        final int a = ftab[(vv + 1) << 8] - ftab[vv << 8];
+        final int b = h - 1;
+        int j = i;
+        for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j
+            - h]) {
+          runningOrder[j] = ro;
+          j -= h;
+          if (j <= b) {
+            break;
+          }
+        }
+        runningOrder[j] = vv;
+      }
+    }
+
+    /*
+    * The main sorting loop.
+    */
+    for (int i = 0; i <= 255; i++) {
+      /*
+      * Process big buckets, starting with the least full.
+      */
+      final int ss = runningOrder[i];
+
+      // Step 1:
+      /*
+      * Complete the big bucket [ss] by quicksorting any unsorted small
+      * buckets [ss, j]. Hopefully previous pointer-scanning phases have
+      * already completed many of the small buckets [ss, j], so we don't
+      * have to sort them at all.
+      */
+      for (int j = 0; j <= 255; j++) {
+        final int sb = (ss << 8) + j;
+        final int ftab_sb = ftab[sb];
+        if ((ftab_sb & SETMASK) != SETMASK) {
+          final int lo = ftab_sb & CLEARMASK;
+          final int hi = (ftab[sb + 1] & CLEARMASK) - 1;
+          if (hi > lo) {
+            mainQSort3(dataShadow, lo, hi, 2);
+            if (firstAttemptShadow
+                && (this.workDone > workLimitShadow)) {
+              return;
+            }
+          }
+          ftab[sb] = ftab_sb | SETMASK;
+        }
+      }
+
+      // Step 2:
+      // Now scan this big bucket so as to synthesise the
+      // sorted order for small buckets [t, ss] for all t != ss.
+
+      for (int j = 0; j <= 255; j++) {
+        copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
+      }
+
+      for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) {
+        final int fmap_j = fmap[j];
+        c1 = block[fmap_j] & 0xff;
+        if (!bigDone[c1]) {
+          fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1);
+          copy[c1]++;
+        }
+      }
+
+      for (int j = 256; --j >= 0;)
+        ftab[(j << 8) + ss] |= SETMASK;
+
+      // Step 3:
+      /*
+      * The ss big bucket is now done. Record this fact, and update the
+      * quadrant descriptors. Remember to update quadrants in the
+      * overshoot area too, if necessary. The "if (i < 255)" test merely
+      * skips this updating for the last bucket processed, since updating
+      * for the last bucket is pointless.
+      */
+      bigDone[ss] = true;
+
+      if (i < 255) {
+        final int bbStart = ftab[ss << 8] & CLEARMASK;
+        final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
+        int shifts = 0;
+
+        while ((bbSize >> shifts) > 65534) {
+          shifts++;
+        }
+
+        for (int j = 0; j < bbSize; j++) {
+          final int a2update = fmap[bbStart + j];
+          final char qVal = (char) (j >> shifts);
+          quadrant[a2update] = qVal;
+          if (a2update < NUM_OVERSHOOT_BYTES) {
+            quadrant[a2update + lastShadow + 1] = qVal;
+          }
+        }
+      }
+
+    }
+  }
+
+  private void randomiseBlock() {
+    final boolean[] inUse = this.data.inUse;
+    final byte[] block = this.data.block;
+    final int lastShadow = this.last;
+
+    for (int i = 256; --i >= 0;)
+      inUse[i] = false;
+
+    int rNToGo = 0;
+    int rTPos = 0;
+    for (int i = 0, j = 1; i <= lastShadow; i = j, j++) {
+      if (rNToGo == 0) {
+        rNToGo = (char) BZip2Constants.rNums[rTPos];
+        if (++rTPos == 512) {
+          rTPos = 0;
+        }
+      }
+
+      rNToGo--;
+      block[j] ^= ((rNToGo == 1) ? 1 : 0);
+
+      // handle 16 bit signed numbers
+      inUse[block[j] & 0xff] = true;
+    }
+
+    this.blockRandomised = true;
+  }
+
+  private void generateMTFValues() {
+    final int lastShadow = this.last;
+    final Data dataShadow = this.data;
+    final boolean[] inUse = dataShadow.inUse;
+    final byte[] block = dataShadow.block;
+    final int[] fmap = dataShadow.fmap;
+    final char[] sfmap = dataShadow.sfmap;
+    final int[] mtfFreq = dataShadow.mtfFreq;
+    final byte[] unseqToSeq = dataShadow.unseqToSeq;
+    final byte[] yy = dataShadow.generateMTFValues_yy;
+
+    // make maps
+    int nInUseShadow = 0;
+    for (int i = 0; i < 256; i++) {
+      if (inUse[i]) {
+        unseqToSeq[i] = (byte) nInUseShadow;
+        nInUseShadow++;
+      }
+    }
+    this.nInUse = nInUseShadow;
+
+    final int eob = nInUseShadow + 1;
+
+    for (int i = eob; i >= 0; i--) {
+      mtfFreq[i] = 0;
+    }
+
+    for (int i = nInUseShadow; --i >= 0;) {
+      yy[i] = (byte) i;
+    }
+
+    int wr = 0;
+    int zPend = 0;
+
+    for (int i = 0; i <= lastShadow; i++) {
+      final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
+      byte tmp = yy[0];
+      int j = 0;
+
+      while (ll_i != tmp) {
+        j++;
+        byte tmp2 = tmp;
+        tmp = yy[j];
+        yy[j] = tmp2;
+      }
+      yy[0] = tmp;
+
+      if (j == 0) {
+        zPend++;
+      } else {
+        if (zPend > 0) {
+          zPend--;
+          while (true) {
+            if ((zPend & 1) == 0) {
+              sfmap[wr] = RUNA;
+              wr++;
+              mtfFreq[RUNA]++;
+            } else {
+              sfmap[wr] = RUNB;
+              wr++;
+              mtfFreq[RUNB]++;
+            }
+
+            if (zPend >= 2) {
+              zPend = (zPend - 2) >> 1;
+            } else {
+              break;
+            }
+          }
+          zPend = 0;
+        }
+        sfmap[wr] = (char) (j + 1);
+        wr++;
+        mtfFreq[j + 1]++;
+      }
+    }
+
+    if (zPend > 0) {
+      zPend--;
+      while (true) {
+        if ((zPend & 1) == 0) {
+          sfmap[wr] = RUNA;
+          wr++;
+          mtfFreq[RUNA]++;
+        } else {
+          sfmap[wr] = RUNB;
+          wr++;
+          mtfFreq[RUNB]++;
+        }
+
+        if (zPend >= 2) {
+          zPend = (zPend - 2) >> 1;
+        } else {
+          break;
+        }
+      }
+    }
+
+    sfmap[wr] = (char) eob;
+    mtfFreq[eob]++;
+    this.nMTF = wr + 1;
+  }
+
+  private static final class Data extends Object {
+
+    // with blockSize 900k
+    final boolean[] inUse = new boolean[256]; // 256 byte
+    final byte[] unseqToSeq = new byte[256]; // 256 byte
+    final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
+    final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
+    final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
+
+    final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
+    final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
+    // byte
+    final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
+    // byte
+    final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
+    final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
+    final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
+    // byte
+    final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
+    final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
+
+    final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte
+    final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte
+    final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
+
+    final int[] mainSort_runningOrder = new int[256]; // 1024 byte
+    final int[] mainSort_copy = new int[256]; // 1024 byte
+    final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte
+
+    final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
+    final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
+    final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
+
+    final int[] ftab = new int[65537]; // 262148 byte
+    // ------------
+    // 333408 byte
+
+    final byte[] block; // 900021 byte
+    final int[] fmap; // 3600000 byte
+    final char[] sfmap; // 3600000 byte
+    // ------------
+    // 8433529 byte
+    // ============
+
+    /**
+    * Array instance identical to sfmap, both are used only temporarily and
+    * indepently, so we do not need to allocate additional memory.
+    */
+    final char[] quadrant;
+
+    Data(int blockSize100k) {
+      super();
+
+      final int n = blockSize100k * BZip2Constants.baseBlockSize;
+      this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
+      this.fmap = new int[n];
+      this.sfmap = new char[2 * n];
+      this.quadrant = this.sfmap;
+    }
+
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/CRC.java b/src/java/org/apache/hadoop/io/compress/bzip2/CRC.java
new file mode 100644
index 00000000000..a9eaf205804
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/bzip2/CRC.java
@@ -0,0 +1,125 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
+ * great code.
+ */
+
+package org.apache.hadoop.io.compress.bzip2;
+
+/**
+ * A simple class the hold and calculate the CRC for sanity checking of the
+ * data.
+ *
+ */
+final class CRC {
+  static final int crc32Table[] = { 0x00000000, 0x04c11db7, 0x09823b6e,
+      0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
+      0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64,
+      0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7,
+      0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2,
+      0x52568b75, 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
+      0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0,
+      0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b,
+      0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef, 0xb7a96036,
+      0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
+      0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c,
+      0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f,
+      0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a,
+      0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
+      0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, 0x128e9dcf,
+      0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4,
+      0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069,
+      0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
+      0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063,
+      0x495a2dd4, 0x44190b0d, 0x40d816ba, 0xaca5c697, 0xa864db20,
+      0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25,
+      0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
+      0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7,
+      0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c,
+      0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31,
+      0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
+      0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632,
+      0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1,
+      0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4,
+      0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
+      0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, 0x0315d626,
+      0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d,
+      0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60,
+      0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
+      0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a,
+      0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9,
+      0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc,
+      0xa379dd7b, 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
+      0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099,
+      0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2,
+      0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f,
+      0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
+      0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35,
+      0x065e2082, 0x0b1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6,
+      0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3,
+      0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
+      0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1,
+      0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa,
+      0xf9278673, 0xfde69bc4, 0x89b8fd09, 0x8d79e0be, 0x803ac667,
+      0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
+      0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d,
+      0xb8757bda, 0xb5365d03, 0xb1f740b4 };
+
+  CRC() {
+    initialiseCRC();
+  }
+
+  void initialiseCRC() {
+    globalCrc = 0xffffffff;
+  }
+
+  int getFinalCRC() {
+    return ~globalCrc;
+  }
+
+  int getGlobalCRC() {
+    return globalCrc;
+  }
+
+  void setGlobalCRC(int newCrc) {
+    globalCrc = newCrc;
+  }
+
+  void updateCRC(int inCh) {
+    int temp = (globalCrc >> 24) ^ inCh;
+    if (temp < 0) {
+      temp = 256 + temp;
+    }
+    globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp];
+  }
+
+  void updateCRC(int inCh, int repeat) {
+    int globalCrcShadow = this.globalCrc;
+    while (repeat-- > 0) {
+      int temp = (globalCrcShadow >> 24) ^ inCh;
+      globalCrcShadow = (globalCrcShadow << 8)
+          ^ crc32Table[(temp >= 0) ? temp : (temp + 256)];
+    }
+    this.globalCrc = globalCrcShadow;
+  }
+
+  int globalCrc;
+}
diff --git a/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java b/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java
new file mode 100644
index 00000000000..f27e831a58b
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress.zlib;
+
+import java.io.IOException;
+import java.util.zip.Deflater;
+
+import org.apache.hadoop.io.compress.Compressor;
+
+/**
+ * A wrapper around java.util.zip.Deflater to make it conform 
+ * to org.apache.hadoop.io.compress.Compressor interface.
+ * 
+ */
+public class BuiltInZlibDeflater extends Deflater implements Compressor {
+
+  public BuiltInZlibDeflater(int level, boolean nowrap) {
+    super(level, nowrap);
+  }
+
+  public BuiltInZlibDeflater(int level) {
+    super(level);
+  }
+
+  public BuiltInZlibDeflater() {
+    super();
+  }
+
+  public synchronized int compress(byte[] b, int off, int len) 
+    throws IOException {
+    return super.deflate(b, off, len);
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibInflater.java b/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibInflater.java
new file mode 100644
index 00000000000..0223587ad01
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibInflater.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress.zlib;
+
+import java.io.IOException;
+import java.util.zip.DataFormatException;
+import java.util.zip.Inflater;
+
+import org.apache.hadoop.io.compress.Decompressor;
+
+/**
+ * A wrapper around java.util.zip.Inflater to make it conform 
+ * to org.apache.hadoop.io.compress.Decompressor interface.
+ * 
+ */
+public class BuiltInZlibInflater extends Inflater implements Decompressor {
+
+  public BuiltInZlibInflater(boolean nowrap) {
+    super(nowrap);
+  }
+
+  public BuiltInZlibInflater() {
+    super();
+  }
+
+  public synchronized int decompress(byte[] b, int off, int len) 
+    throws IOException {
+    try {
+      return super.inflate(b, off, len);
+    } catch (DataFormatException dfe) {
+      throw new IOException(dfe.getMessage());
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java b/src/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java
new file mode 100644
index 00000000000..754af216ad2
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java
@@ -0,0 +1,378 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress.zlib;
+
+import java.io.IOException;
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.io.compress.Compressor;
+import org.apache.hadoop.util.NativeCodeLoader;
+
+/**
+ * A {@link Compressor} based on the popular 
+ * zlib compression algorithm.
+ * http://www.zlib.net/
+ * 
+ */
+public class ZlibCompressor implements Compressor {
+  private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64*1024;
+
+  // HACK - Use this as a global lock in the JNI layer
+  private static Class clazz = ZlibCompressor.class;
+
+  private long stream;
+  private CompressionLevel level;
+  private CompressionStrategy strategy;
+  private CompressionHeader windowBits;
+  private int directBufferSize;
+  private byte[] userBuf = null;
+  private int userBufOff = 0, userBufLen = 0;
+  private Buffer uncompressedDirectBuf = null;
+  private int uncompressedDirectBufOff = 0, uncompressedDirectBufLen = 0;
+  private Buffer compressedDirectBuf = null;
+  private boolean finish, finished;
+
+  /**
+   * The compression level for zlib library.
+   */
+  public static enum CompressionLevel {
+    /**
+     * Compression level for no compression.
+     */
+    NO_COMPRESSION (0),
+    
+    /**
+     * Compression level for fastest compression.
+     */
+    BEST_SPEED (1),
+    
+    /**
+     * Compression level for best compression.
+     */
+    BEST_COMPRESSION (9),
+    
+    /**
+     * Default compression level.
+     */
+    DEFAULT_COMPRESSION (-1);
+    
+    
+    private final int compressionLevel;
+    
+    CompressionLevel(int level) {
+      compressionLevel = level;
+    }
+    
+    int compressionLevel() {
+      return compressionLevel;
+    }
+  };
+  
+  /**
+   * The compression level for zlib library.
+   */
+  public static enum CompressionStrategy {
+    /**
+     * Compression strategy best used for data consisting mostly of small
+     * values with a somewhat random distribution. Forces more Huffman coding
+     * and less string matching.
+     */
+    FILTERED (1),
+    
+    /**
+     * Compression strategy for Huffman coding only.
+     */
+    HUFFMAN_ONLY (2),
+    
+    /**
+     * Compression strategy to limit match distances to one
+     * (run-length encoding).
+     */
+    RLE (3),
+
+    /**
+     * Compression strategy to prevent the use of dynamic Huffman codes, 
+     * allowing for a simpler decoder for special applications.
+     */
+    FIXED (4),
+
+    /**
+     * Default compression strategy.
+     */
+    DEFAULT_STRATEGY (0);
+    
+    
+    private final int compressionStrategy;
+    
+    CompressionStrategy(int strategy) {
+      compressionStrategy = strategy;
+    }
+    
+    int compressionStrategy() {
+      return compressionStrategy;
+    }
+  };
+
+  /**
+   * The type of header for compressed data.
+   */
+  public static enum CompressionHeader {
+    /**
+     * No headers/trailers/checksums.
+     */
+    NO_HEADER (-15),
+    
+    /**
+     * Default headers/trailers/checksums.
+     */
+    DEFAULT_HEADER (15),
+    
+    /**
+     * Simple gzip headers/trailers.
+     */
+    GZIP_FORMAT (31);
+
+    private final int windowBits;
+    
+    CompressionHeader(int windowBits) {
+      this.windowBits = windowBits;
+    }
+    
+    public int windowBits() {
+      return windowBits;
+    }
+  }
+  
+  private static boolean nativeZlibLoaded = false;
+  
+  static {
+    if (NativeCodeLoader.isNativeCodeLoaded()) {
+      try {
+        // Initialize the native library
+        initIDs();
+        nativeZlibLoaded = true;
+      } catch (Throwable t) {
+        // Ignore failure to load/initialize native-zlib
+      }
+    }
+  }
+  
+  static boolean isNativeZlibLoaded() {
+    return nativeZlibLoaded;
+  }
+
+  /** 
+   * Creates a new compressor using the specified compression level.
+   * Compressed data will be generated in ZLIB format.
+   * 
+   * @param level Compression level #CompressionLevel
+   * @param strategy Compression strategy #CompressionStrategy
+   * @param header Compression header #CompressionHeader
+   * @param directBufferSize Size of the direct buffer to be used.
+   */
+  public ZlibCompressor(CompressionLevel level, CompressionStrategy strategy, 
+                        CompressionHeader header, int directBufferSize) {
+    this.level = level;
+    this.strategy = strategy;
+    this.windowBits = header;
+    this.directBufferSize = directBufferSize;
+    
+    uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
+    compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
+    compressedDirectBuf.position(directBufferSize);
+    
+    stream = init(this.level.compressionLevel(), 
+                  this.strategy.compressionStrategy(), 
+                  this.windowBits.windowBits());
+  }
+  
+  /**
+   * Creates a new compressor with the default compression level.
+   * Compressed data will be generated in ZLIB format.
+   */
+  public ZlibCompressor() {
+    this(CompressionLevel.DEFAULT_COMPRESSION, 
+         CompressionStrategy.DEFAULT_STRATEGY, 
+         CompressionHeader.DEFAULT_HEADER, 
+         DEFAULT_DIRECT_BUFFER_SIZE);
+  }
+  
+  public synchronized void setInput(byte[] b, int off, int len) {
+    if (b== null) {
+      throw new NullPointerException();
+    }
+    if (off < 0 || len < 0 || off > b.length - len) {
+      throw new ArrayIndexOutOfBoundsException();
+    }
+    
+    this.userBuf = b;
+    this.userBufOff = off;
+    this.userBufLen = len;
+    setInputFromSavedData();
+    
+    // Reinitialize zlib's output direct buffer 
+    compressedDirectBuf.limit(directBufferSize);
+    compressedDirectBuf.position(directBufferSize);
+  }
+  
+  synchronized void setInputFromSavedData() {
+    uncompressedDirectBufOff = 0;
+    uncompressedDirectBufLen = userBufLen;
+    if (uncompressedDirectBufLen > directBufferSize) {
+      uncompressedDirectBufLen = directBufferSize;
+    }
+
+    // Reinitialize zlib's input direct buffer
+    uncompressedDirectBuf.rewind();
+    ((ByteBuffer)uncompressedDirectBuf).put(userBuf, userBufOff,  
+                                            uncompressedDirectBufLen);
+
+    // Note how much data is being fed to zlib
+    userBufOff += uncompressedDirectBufLen;
+    userBufLen -= uncompressedDirectBufLen;
+  }
+
+  public synchronized void setDictionary(byte[] b, int off, int len) {
+    if (stream == 0 || b == null) {
+      throw new NullPointerException();
+    }
+    if (off < 0 || len < 0 || off > b.length - len) {
+      throw new ArrayIndexOutOfBoundsException();
+    }
+    setDictionary(stream, b, off, len);
+  }
+
+  public synchronized boolean needsInput() {
+    // Consume remaining compressed data?
+    if (compressedDirectBuf.remaining() > 0) {
+      return false;
+    }
+
+    // Check if zlib has consumed all input
+    if (uncompressedDirectBufLen <= 0) {
+      // Check if we have consumed all user-input
+      if (userBufLen <= 0) {
+        return true;
+      } else {
+        setInputFromSavedData();
+      }
+    }
+    
+    return false;
+  }
+  
+  public synchronized void finish() {
+    finish = true;
+  }
+  
+  public synchronized boolean finished() {
+    // Check if 'zlib' says its 'finished' and
+    // all compressed data has been consumed
+    return (finished && compressedDirectBuf.remaining() == 0);
+  }
+
+  public synchronized int compress(byte[] b, int off, int len) 
+    throws IOException {
+    if (b == null) {
+      throw new NullPointerException();
+    }
+    if (off < 0 || len < 0 || off > b.length - len) {
+      throw new ArrayIndexOutOfBoundsException();
+    }
+    
+    int n = 0;
+    
+    // Check if there is compressed data
+    n = compressedDirectBuf.remaining();
+    if (n > 0) {
+      n = Math.min(n, len);
+      ((ByteBuffer)compressedDirectBuf).get(b, off, n);
+      return n;
+    }
+
+    // Re-initialize the zlib's output direct buffer
+    compressedDirectBuf.rewind();
+    compressedDirectBuf.limit(directBufferSize);
+
+    // Compress data
+    n = deflateBytesDirect();
+    compressedDirectBuf.limit(n);
+    
+    // Get atmost 'len' bytes
+    n = Math.min(n, len);
+    ((ByteBuffer)compressedDirectBuf).get(b, off, n);
+
+    return n;
+  }
+
+  /**
+   * Returns the total number of compressed bytes output so far.
+   *
+   * @return the total (non-negative) number of compressed bytes output so far
+   */
+  public synchronized long getBytesWritten() {
+    checkStream();
+    return getBytesWritten(stream);
+  }
+
+  /**
+   * Returns the total number of uncompressed bytes input so far.</p>
+   *
+   * @return the total (non-negative) number of uncompressed bytes input so far
+   */
+  public synchronized long getBytesRead() {
+    checkStream();
+    return getBytesRead(stream);
+  }
+
+  public synchronized void reset() {
+    checkStream();
+    reset(stream);
+    finish = false;
+    finished = false;
+    uncompressedDirectBuf.rewind();
+    uncompressedDirectBufOff = uncompressedDirectBufLen = 0;
+    compressedDirectBuf.limit(directBufferSize);
+    compressedDirectBuf.position(directBufferSize);
+    userBufOff = userBufLen = 0;
+  }
+  
+  public synchronized void end() {
+    if (stream != 0) {
+      end(stream);
+      stream = 0;
+    }
+  }
+  
+  private void checkStream() {
+    if (stream == 0)
+      throw new NullPointerException();
+  }
+  
+  private native static void initIDs();
+  private native static long init(int level, int strategy, int windowBits);
+  private native static void setDictionary(long strm, byte[] b, int off,
+                                           int len);
+  private native int deflateBytesDirect();
+  private native static long getBytesRead(long strm);
+  private native static long getBytesWritten(long strm);
+  private native static void reset(long strm);
+  private native static void end(long strm);
+}
diff --git a/src/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java b/src/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java
new file mode 100644
index 00000000000..56738252284
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress.zlib;
+
+import java.io.IOException;
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.io.compress.Decompressor;
+import org.apache.hadoop.util.NativeCodeLoader;
+
+/**
+ * A {@link Decompressor} based on the popular 
+ * zlib compression algorithm.
+ * http://www.zlib.net/
+ * 
+ */
+public class ZlibDecompressor implements Decompressor {
+  private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64*1024;
+  
+  // HACK - Use this as a global lock in the JNI layer
+  private static Class clazz = ZlibDecompressor.class;
+  
+  private long stream;
+  private CompressionHeader header;
+  private int directBufferSize;
+  private Buffer compressedDirectBuf = null;
+  private int compressedDirectBufOff, compressedDirectBufLen;
+  private Buffer uncompressedDirectBuf = null;
+  private byte[] userBuf = null;
+  private int userBufOff = 0, userBufLen = 0;
+  private boolean finished;
+  private boolean needDict;
+
+  /**
+   * The headers to detect from compressed data.
+   */
+  public static enum CompressionHeader {
+    /**
+     * No headers/trailers/checksums.
+     */
+    NO_HEADER (-15),
+    
+    /**
+     * Default headers/trailers/checksums.
+     */
+    DEFAULT_HEADER (15),
+    
+    /**
+     * Simple gzip headers/trailers.
+     */
+    GZIP_FORMAT (31),
+    
+    /**
+     * Autodetect gzip/zlib headers/trailers.
+     */
+    AUTODETECT_GZIP_ZLIB (47);
+
+    private final int windowBits;
+    
+    CompressionHeader(int windowBits) {
+      this.windowBits = windowBits;
+    }
+    
+    public int windowBits() {
+      return windowBits;
+    }
+  }
+
+  private static boolean nativeZlibLoaded = false;
+  
+  static {
+    if (NativeCodeLoader.isNativeCodeLoaded()) {
+      try {
+        // Initialize the native library
+        initIDs();
+        nativeZlibLoaded = true;
+      } catch (Throwable t) {
+        // Ignore failure to load/initialize native-zlib
+      }
+    }
+  }
+  
+  static boolean isNativeZlibLoaded() {
+    return nativeZlibLoaded;
+  }
+
+  /**
+   * Creates a new decompressor.
+   */
+  public ZlibDecompressor(CompressionHeader header, int directBufferSize) {
+    this.header = header;
+    this.directBufferSize = directBufferSize;
+    compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
+    uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
+    uncompressedDirectBuf.position(directBufferSize);
+    
+    stream = init(this.header.windowBits());
+  }
+  
+  public ZlibDecompressor() {
+    this(CompressionHeader.DEFAULT_HEADER, DEFAULT_DIRECT_BUFFER_SIZE);
+  }
+
+  public synchronized void setInput(byte[] b, int off, int len) {
+    if (b == null) {
+      throw new NullPointerException();
+    }
+    if (off < 0 || len < 0 || off > b.length - len) {
+      throw new ArrayIndexOutOfBoundsException();
+    }
+  
+    this.userBuf = b;
+    this.userBufOff = off;
+    this.userBufLen = len;
+    
+    setInputFromSavedData();
+    
+    // Reinitialize zlib's output direct buffer 
+    uncompressedDirectBuf.limit(directBufferSize);
+    uncompressedDirectBuf.position(directBufferSize);
+  }
+  
+  synchronized void setInputFromSavedData() {
+    compressedDirectBufOff = 0;
+    compressedDirectBufLen = userBufLen;
+    if (compressedDirectBufLen > directBufferSize) {
+      compressedDirectBufLen = directBufferSize;
+    }
+
+    // Reinitialize zlib's input direct buffer
+    compressedDirectBuf.rewind();
+    ((ByteBuffer)compressedDirectBuf).put(userBuf, userBufOff, 
+                                          compressedDirectBufLen);
+    
+    // Note how much data is being fed to zlib
+    userBufOff += compressedDirectBufLen;
+    userBufLen -= compressedDirectBufLen;
+  }
+
+  public synchronized void setDictionary(byte[] b, int off, int len) {
+    if (stream == 0 || b == null) {
+      throw new NullPointerException();
+    }
+    if (off < 0 || len < 0 || off > b.length - len) {
+      throw new ArrayIndexOutOfBoundsException();
+    }
+    setDictionary(stream, b, off, len);
+    needDict = false;
+  }
+
+  public synchronized boolean needsInput() {
+    // Consume remanining compressed data?
+    if (uncompressedDirectBuf.remaining() > 0) {
+      return false;
+    }
+    
+    // Check if zlib has consumed all input
+    if (compressedDirectBufLen <= 0) {
+      // Check if we have consumed all user-input
+      if (userBufLen <= 0) {
+        return true;
+      } else {
+        setInputFromSavedData();
+      }
+    }
+    
+    return false;
+  }
+
+  public synchronized boolean needsDictionary() {
+    return needDict;
+  }
+
+  public synchronized boolean finished() {
+    // Check if 'zlib' says its 'finished' and
+    // all compressed data has been consumed
+    return (finished && uncompressedDirectBuf.remaining() == 0);
+  }
+
+  public synchronized int decompress(byte[] b, int off, int len) 
+    throws IOException {
+    if (b == null) {
+      throw new NullPointerException();
+    }
+    if (off < 0 || len < 0 || off > b.length - len) {
+      throw new ArrayIndexOutOfBoundsException();
+    }
+    
+    int n = 0;
+    
+    // Check if there is uncompressed data
+    n = uncompressedDirectBuf.remaining();
+    if (n > 0) {
+      n = Math.min(n, len);
+      ((ByteBuffer)uncompressedDirectBuf).get(b, off, n);
+      return n;
+    }
+    
+    // Re-initialize the zlib's output direct buffer
+    uncompressedDirectBuf.rewind();
+    uncompressedDirectBuf.limit(directBufferSize);
+
+    // Decompress data
+    n = inflateBytesDirect();
+    uncompressedDirectBuf.limit(n);
+
+    // Get atmost 'len' bytes
+    n = Math.min(n, len);
+    ((ByteBuffer)uncompressedDirectBuf).get(b, off, n);
+
+    return n;
+  }
+  
+  /**
+   * Returns the total number of compressed bytes output so far.
+   *
+   * @return the total (non-negative) number of compressed bytes output so far
+   */
+  public synchronized long getBytesWritten() {
+    checkStream();
+    return getBytesWritten(stream);
+  }
+
+  /**
+   * Returns the total number of uncompressed bytes input so far.</p>
+   *
+   * @return the total (non-negative) number of uncompressed bytes input so far
+   */
+  public synchronized long getBytesRead() {
+    checkStream();
+    return getBytesRead(stream);
+  }
+
+  public synchronized void reset() {
+    checkStream();
+    reset(stream);
+    finished = false;
+    needDict = false;
+    compressedDirectBufOff = compressedDirectBufLen = 0;
+    uncompressedDirectBuf.limit(directBufferSize);
+    uncompressedDirectBuf.position(directBufferSize);
+    userBufOff = userBufLen = 0;
+  }
+
+  public synchronized void end() {
+    if (stream != 0) {
+      end(stream);
+      stream = 0;
+    }
+  }
+
+  protected void finalize() {
+    end();
+  }
+  
+  private void checkStream() {
+    if (stream == 0)
+      throw new NullPointerException();
+  }
+  
+  private native static void initIDs();
+  private native static long init(int windowBits);
+  private native static void setDictionary(long strm, byte[] b, int off,
+                                           int len);
+  private native int inflateBytesDirect();
+  private native static long getBytesRead(long strm);
+  private native static long getBytesWritten(long strm);
+  private native static void reset(long strm);
+  private native static void end(long strm);
+}
diff --git a/src/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java b/src/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java
new file mode 100644
index 00000000000..e3ce3ec1afe
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress.zlib;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.compress.Compressor;
+import org.apache.hadoop.io.compress.Decompressor;
+import org.apache.hadoop.util.NativeCodeLoader;
+
+/**
+ * A collection of factories to create the right 
+ * zlib/gzip compressor/decompressor instances.
+ * 
+ */
+public class ZlibFactory {
+  private static final Log LOG =
+    LogFactory.getLog(ZlibFactory.class);
+
+  private static boolean nativeZlibLoaded = false;
+  
+  static {
+    if (NativeCodeLoader.isNativeCodeLoaded()) {
+      nativeZlibLoaded = ZlibCompressor.isNativeZlibLoaded() &&
+        ZlibDecompressor.isNativeZlibLoaded();
+      
+      if (nativeZlibLoaded) {
+        LOG.info("Successfully loaded & initialized native-zlib library");
+      } else {
+        LOG.warn("Failed to load/initialize native-zlib library");
+      }
+    }
+  }
+  
+  /**
+   * Check if native-zlib code is loaded & initialized correctly and 
+   * can be loaded for this job.
+   * 
+   * @param conf configuration
+   * @return <code>true</code> if native-zlib is loaded & initialized 
+   *         and can be loaded for this job, else <code>false</code>
+   */
+  public static boolean isNativeZlibLoaded(Configuration conf) {
+    return nativeZlibLoaded && conf.getBoolean("hadoop.native.lib", true); 
+  }
+  
+  /**
+   * Return the appropriate type of the zlib compressor. 
+   * 
+   * @param conf configuration
+   * @return the appropriate type of the zlib compressor.
+   */
+  public static Class<? extends Compressor> 
+  getZlibCompressorType(Configuration conf) {
+    return (isNativeZlibLoaded(conf)) ? 
+            ZlibCompressor.class : BuiltInZlibDeflater.class;
+  }
+  
+  /**
+   * Return the appropriate implementation of the zlib compressor. 
+   * 
+   * @param conf configuration
+   * @return the appropriate implementation of the zlib compressor.
+   */
+  public static Compressor getZlibCompressor(Configuration conf) {
+    return (isNativeZlibLoaded(conf)) ? 
+      new ZlibCompressor() : new BuiltInZlibDeflater(); 
+  }
+
+  /**
+   * Return the appropriate type of the zlib decompressor. 
+   * 
+   * @param conf configuration
+   * @return the appropriate type of the zlib decompressor.
+   */
+  public static Class<? extends Decompressor> 
+  getZlibDecompressorType(Configuration conf) {
+    return (isNativeZlibLoaded(conf)) ? 
+            ZlibDecompressor.class : BuiltInZlibInflater.class;
+  }
+  
+  /**
+   * Return the appropriate implementation of the zlib decompressor. 
+   * 
+   * @param conf configuration
+   * @return the appropriate implementation of the zlib decompressor.
+   */
+  public static Decompressor getZlibDecompressor(Configuration conf) {
+    return (isNativeZlibLoaded(conf)) ? 
+      new ZlibDecompressor() : new BuiltInZlibInflater(); 
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/io/package.html b/src/java/org/apache/hadoop/io/package.html
new file mode 100644
index 00000000000..ce4ca1f352a
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/package.html
@@ -0,0 +1,24 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+Generic i/o code for use when reading and writing data to the network,
+to databases, and to files.
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/src/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
new file mode 100644
index 00000000000..19b68538539
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.retry;
+
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.StringUtils;
+
+class RetryInvocationHandler implements InvocationHandler {
+  public static final Log LOG = LogFactory.getLog(RetryInvocationHandler.class);
+  private Object implementation;
+  
+  private RetryPolicy defaultPolicy;
+  private Map<String,RetryPolicy> methodNameToPolicyMap;
+  
+  public RetryInvocationHandler(Object implementation, RetryPolicy retryPolicy) {
+    this.implementation = implementation;
+    this.defaultPolicy = retryPolicy;
+    this.methodNameToPolicyMap = Collections.emptyMap();
+  }
+  
+  public RetryInvocationHandler(Object implementation, Map<String, RetryPolicy> methodNameToPolicyMap) {
+    this.implementation = implementation;
+    this.defaultPolicy = RetryPolicies.TRY_ONCE_THEN_FAIL;
+    this.methodNameToPolicyMap = methodNameToPolicyMap;
+  }
+
+  public Object invoke(Object proxy, Method method, Object[] args)
+    throws Throwable {
+    RetryPolicy policy = methodNameToPolicyMap.get(method.getName());
+    if (policy == null) {
+      policy = defaultPolicy;
+    }
+    
+    int retries = 0;
+    while (true) {
+      try {
+        return invokeMethod(method, args);
+      } catch (Exception e) {
+        if (!policy.shouldRetry(e, retries++)) {
+          LOG.info("Exception while invoking " + method.getName()
+                   + " of " + implementation.getClass() + ". Not retrying."
+                   + StringUtils.stringifyException(e));
+          if (!method.getReturnType().equals(Void.TYPE)) {
+            throw e; // non-void methods can't fail without an exception
+          }
+          return null;
+        }
+        LOG.debug("Exception while invoking " + method.getName()
+                 + " of " + implementation.getClass() + ". Retrying."
+                 + StringUtils.stringifyException(e));
+      }
+    }
+  }
+
+  private Object invokeMethod(Method method, Object[] args) throws Throwable {
+    try {
+      if (!method.isAccessible()) {
+        method.setAccessible(true);
+      }
+      return method.invoke(implementation, args);
+    } catch (InvocationTargetException e) {
+      throw e.getCause();
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/retry/RetryPolicies.java b/src/java/org/apache/hadoop/io/retry/RetryPolicies.java
new file mode 100644
index 00000000000..30a78885da8
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/retry/RetryPolicies.java
@@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.retry;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.Map.Entry;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.ipc.RemoteException;
+
+/**
+ * <p>
+ * A collection of useful implementations of {@link RetryPolicy}.
+ * </p>
+ */
+public class RetryPolicies {
+  
+  /**
+   * <p>
+   * Try once, and fail by re-throwing the exception.
+   * This corresponds to having no retry mechanism in place.
+   * </p>
+   */
+  public static final RetryPolicy TRY_ONCE_THEN_FAIL = new TryOnceThenFail();
+  
+  /**
+   * <p>
+   * Try once, and fail silently for <code>void</code> methods, or by
+   * re-throwing the exception for non-<code>void</code> methods.
+   * </p>
+   */
+  public static final RetryPolicy TRY_ONCE_DONT_FAIL = new TryOnceDontFail();
+  
+  /**
+   * <p>
+   * Keep trying forever.
+   * </p>
+   */
+  public static final RetryPolicy RETRY_FOREVER = new RetryForever();
+  
+  /**
+   * <p>
+   * Keep trying a limited number of times, waiting a fixed time between attempts,
+   * and then fail by re-throwing the exception.
+   * </p>
+   */
+  public static final RetryPolicy retryUpToMaximumCountWithFixedSleep(int maxRetries, long sleepTime, TimeUnit timeUnit) {
+    return new RetryUpToMaximumCountWithFixedSleep(maxRetries, sleepTime, timeUnit);
+  }
+  
+  /**
+   * <p>
+   * Keep trying for a maximum time, waiting a fixed time between attempts,
+   * and then fail by re-throwing the exception.
+   * </p>
+   */
+  public static final RetryPolicy retryUpToMaximumTimeWithFixedSleep(long maxTime, long sleepTime, TimeUnit timeUnit) {
+    return new RetryUpToMaximumTimeWithFixedSleep(maxTime, sleepTime, timeUnit);
+  }
+  
+  /**
+   * <p>
+   * Keep trying a limited number of times, waiting a growing amount of time between attempts,
+   * and then fail by re-throwing the exception.
+   * The time between attempts is <code>sleepTime</code> mutliplied by the number of tries so far.
+   * </p>
+   */
+  public static final RetryPolicy retryUpToMaximumCountWithProportionalSleep(int maxRetries, long sleepTime, TimeUnit timeUnit) {
+    return new RetryUpToMaximumCountWithProportionalSleep(maxRetries, sleepTime, timeUnit);
+  }
+  
+  /**
+   * <p>
+   * Keep trying a limited number of times, waiting a growing amount of time between attempts,
+   * and then fail by re-throwing the exception.
+   * The time between attempts is <code>sleepTime</code> mutliplied by a random
+   * number in the range of [0, 2 to the number of retries)
+   * </p>
+   */
+  public static final RetryPolicy exponentialBackoffRetry(
+      int maxRetries, long sleepTime, TimeUnit timeUnit) {
+    return new ExponentialBackoffRetry(maxRetries, sleepTime, timeUnit);
+  }
+  
+  /**
+   * <p>
+   * Set a default policy with some explicit handlers for specific exceptions.
+   * </p>
+   */
+  public static final RetryPolicy retryByException(RetryPolicy defaultPolicy,
+                                                   Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap) {
+    return new ExceptionDependentRetry(defaultPolicy, exceptionToPolicyMap);
+  }
+  
+  /**
+   * <p>
+   * A retry policy for RemoteException
+   * Set a default policy with some explicit handlers for specific exceptions.
+   * </p>
+   */
+  public static final RetryPolicy retryByRemoteException(
+      RetryPolicy defaultPolicy,
+      Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap) {
+    return new RemoteExceptionDependentRetry(defaultPolicy, exceptionToPolicyMap);
+  }
+  
+  static class TryOnceThenFail implements RetryPolicy {
+    public boolean shouldRetry(Exception e, int retries) throws Exception {
+      throw e;
+    }
+  }
+  static class TryOnceDontFail implements RetryPolicy {
+    public boolean shouldRetry(Exception e, int retries) throws Exception {
+      return false;
+    }
+  }
+  
+  static class RetryForever implements RetryPolicy {
+    public boolean shouldRetry(Exception e, int retries) throws Exception {
+      return true;
+    }
+  }
+  
+  static abstract class RetryLimited implements RetryPolicy {
+    int maxRetries;
+    long sleepTime;
+    TimeUnit timeUnit;
+    
+    public RetryLimited(int maxRetries, long sleepTime, TimeUnit timeUnit) {
+      this.maxRetries = maxRetries;
+      this.sleepTime = sleepTime;
+      this.timeUnit = timeUnit;
+    }
+
+    public boolean shouldRetry(Exception e, int retries) throws Exception {
+      if (retries >= maxRetries) {
+        throw e;
+      }
+      try {
+        timeUnit.sleep(calculateSleepTime(retries));
+      } catch (InterruptedException ie) {
+        // retry
+      }
+      return true;
+    }
+    
+    protected abstract long calculateSleepTime(int retries);
+  }
+  
+  static class RetryUpToMaximumCountWithFixedSleep extends RetryLimited {
+    public RetryUpToMaximumCountWithFixedSleep(int maxRetries, long sleepTime, TimeUnit timeUnit) {
+      super(maxRetries, sleepTime, timeUnit);
+    }
+    
+    @Override
+    protected long calculateSleepTime(int retries) {
+      return sleepTime;
+    }
+  }
+  
+  static class RetryUpToMaximumTimeWithFixedSleep extends RetryUpToMaximumCountWithFixedSleep {
+    public RetryUpToMaximumTimeWithFixedSleep(long maxTime, long sleepTime, TimeUnit timeUnit) {
+      super((int) (maxTime / sleepTime), sleepTime, timeUnit);
+    }
+  }
+  
+  static class RetryUpToMaximumCountWithProportionalSleep extends RetryLimited {
+    public RetryUpToMaximumCountWithProportionalSleep(int maxRetries, long sleepTime, TimeUnit timeUnit) {
+      super(maxRetries, sleepTime, timeUnit);
+    }
+    
+    @Override
+    protected long calculateSleepTime(int retries) {
+      return sleepTime * (retries + 1);
+    }
+  }
+  
+  static class ExceptionDependentRetry implements RetryPolicy {
+
+    RetryPolicy defaultPolicy;
+    Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap;
+    
+    public ExceptionDependentRetry(RetryPolicy defaultPolicy,
+                                   Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap) {
+      this.defaultPolicy = defaultPolicy;
+      this.exceptionToPolicyMap = exceptionToPolicyMap;
+    }
+
+    public boolean shouldRetry(Exception e, int retries) throws Exception {
+      RetryPolicy policy = exceptionToPolicyMap.get(e.getClass());
+      if (policy == null) {
+        policy = defaultPolicy;
+      }
+      return policy.shouldRetry(e, retries);
+    }
+    
+  }
+  
+  static class RemoteExceptionDependentRetry implements RetryPolicy {
+
+    RetryPolicy defaultPolicy;
+    Map<String, RetryPolicy> exceptionNameToPolicyMap;
+    
+    public RemoteExceptionDependentRetry(RetryPolicy defaultPolicy,
+                                   Map<Class<? extends Exception>,
+                                   RetryPolicy> exceptionToPolicyMap) {
+      this.defaultPolicy = defaultPolicy;
+      this.exceptionNameToPolicyMap = new HashMap<String, RetryPolicy>();
+      for (Entry<Class<? extends Exception>, RetryPolicy> e :
+          exceptionToPolicyMap.entrySet()) {
+        exceptionNameToPolicyMap.put(e.getKey().getName(), e.getValue());
+      }
+    }
+
+    public boolean shouldRetry(Exception e, int retries) throws Exception {
+      RetryPolicy policy = null;
+      if (e instanceof RemoteException) {
+        policy = exceptionNameToPolicyMap.get(
+            ((RemoteException) e).getClassName());
+      }
+      if (policy == null) {
+        policy = defaultPolicy;
+      }
+      return policy.shouldRetry(e, retries);
+    }
+  }
+  
+  static class ExponentialBackoffRetry extends RetryLimited {
+    private Random r = new Random();
+    public ExponentialBackoffRetry(
+        int maxRetries, long sleepTime, TimeUnit timeUnit) {
+      super(maxRetries, sleepTime, timeUnit);
+    }
+    
+    @Override
+    protected long calculateSleepTime(int retries) {
+      return sleepTime*r.nextInt(1<<(retries+1));
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/retry/RetryPolicy.java b/src/java/org/apache/hadoop/io/retry/RetryPolicy.java
new file mode 100644
index 00000000000..26d3267bc2a
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/retry/RetryPolicy.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.retry;
+
+/**
+ * <p>
+ * Specifies a policy for retrying method failures.
+ * Implementations of this interface should be immutable.
+ * </p>
+ */
+public interface RetryPolicy {
+  /**
+   * <p>
+   * Determines whether the framework should retry a
+   * method for the given exception, and the number
+   * of retries that have been made for that operation
+   * so far.
+   * </p>
+   * @param e The exception that caused the method to fail.
+   * @param retries The number of times the method has been retried.
+   * @return <code>true</code> if the method should be retried,
+   *   <code>false</code> if the method should not be retried
+   *   but shouldn't fail with an exception (only for void methods).
+   * @throws Exception The re-thrown exception <code>e</code> indicating
+   *   that the method failed and should not be retried further. 
+   */
+  public boolean shouldRetry(Exception e, int retries) throws Exception;
+}
diff --git a/src/java/org/apache/hadoop/io/retry/RetryProxy.java b/src/java/org/apache/hadoop/io/retry/RetryProxy.java
new file mode 100644
index 00000000000..937f832213c
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/retry/RetryProxy.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.retry;
+
+import java.lang.reflect.Proxy;
+import java.util.Map;
+
+/**
+ * <p>
+ * A factory for creating retry proxies.
+ * </p>
+ */
+public class RetryProxy {
+  /**
+   * <p>
+   * Create a proxy for an interface of an implementation class
+   * using the same retry policy for each method in the interface. 
+   * </p>
+   * @param iface the interface that the retry will implement
+   * @param implementation the instance whose methods should be retried
+   * @param retryPolicy the policy for retirying method call failures
+   * @return the retry proxy
+   */
+  public static Object create(Class<?> iface, Object implementation,
+                              RetryPolicy retryPolicy) {
+    return Proxy.newProxyInstance(
+                                  implementation.getClass().getClassLoader(),
+                                  new Class<?>[] { iface },
+                                  new RetryInvocationHandler(implementation, retryPolicy)
+                                  );
+  }  
+  
+  /**
+   * <p>
+   * Create a proxy for an interface of an implementation class
+   * using the a set of retry policies specified by method name.
+   * If no retry policy is defined for a method then a default of
+   * {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used.
+   * </p>
+   * @param iface the interface that the retry will implement
+   * @param implementation the instance whose methods should be retried
+   * @param methodNameToPolicyMap a map of method names to retry policies
+   * @return the retry proxy
+   */
+  public static Object create(Class<?> iface, Object implementation,
+                              Map<String,RetryPolicy> methodNameToPolicyMap) {
+    return Proxy.newProxyInstance(
+                                  implementation.getClass().getClassLoader(),
+                                  new Class<?>[] { iface },
+                                  new RetryInvocationHandler(implementation, methodNameToPolicyMap)
+                                  );
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/retry/package.html b/src/java/org/apache/hadoop/io/retry/package.html
new file mode 100644
index 00000000000..ae553fc7a62
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/retry/package.html
@@ -0,0 +1,48 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+
+<p>
+A mechanism for selectively retrying methods that throw exceptions under certain circumstances.
+</p>
+
+<p>
+Typical usage is
+</p>
+
+<pre>
+UnreliableImplementation unreliableImpl = new UnreliableImplementation();
+UnreliableInterface unreliable = (UnreliableInterface)
+  RetryProxy.create(UnreliableInterface.class, unreliableImpl,
+    RetryPolicies.retryUpToMaximumCountWithFixedSleep(4, 10, TimeUnit.SECONDS));
+unreliable.call();
+</pre>
+
+<p>
+This will retry any method called on <code>unreliable</code> four times - in this case the <code>call()</code>
+method - sleeping 10 seconds between
+each retry. There are a number of {@link org.apache.hadoop.io.retry.RetryPolicies retry policies}
+available, or you can implement a custom one by implementing {@link org.apache.hadoop.io.retry.RetryPolicy}.
+It is also possible to specify retry policies on a 
+{@link org.apache.hadoop.io.retry.RetryProxy#create(Class, Object, Map) per-method basis}.
+</p>
+
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/io/serializer/Deserializer.java b/src/java/org/apache/hadoop/io/serializer/Deserializer.java
new file mode 100644
index 00000000000..1234a57b2b4
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/serializer/Deserializer.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.serializer;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * <p>
+ * Provides a facility for deserializing objects of type <T> from an
+ * {@link InputStream}.
+ * </p>
+ * 
+ * <p>
+ * Deserializers are stateful, but must not buffer the input since
+ * other producers may read from the input between calls to
+ * {@link #deserialize(Object)}.
+ * </p>
+ * @param <T>
+ */
+public interface Deserializer<T> {
+  /**
+   * <p>Prepare the deserializer for reading.</p>
+   */
+  void open(InputStream in) throws IOException;
+  
+  /**
+   * <p>
+   * Deserialize the next object from the underlying input stream.
+   * If the object <code>t</code> is non-null then this deserializer
+   * <i>may</i> set its internal state to the next object read from the input
+   * stream. Otherwise, if the object <code>t</code> is null a new
+   * deserialized object will be created.
+   * </p>
+   * @return the deserialized object
+   */
+  T deserialize(T t) throws IOException;
+  
+  /**
+   * <p>Close the underlying input stream and clear up any resources.</p>
+   */
+  void close() throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/io/serializer/DeserializerComparator.java b/src/java/org/apache/hadoop/io/serializer/DeserializerComparator.java
new file mode 100644
index 00000000000..70e8b689e9c
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/serializer/DeserializerComparator.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.serializer;
+
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.hadoop.io.InputBuffer;
+import org.apache.hadoop.io.RawComparator;
+
+/**
+ * <p>
+ * A {@link RawComparator} that uses a {@link Deserializer} to deserialize
+ * the objects to be compared so that the standard {@link Comparator} can
+ * be used to compare them.
+ * </p>
+ * <p>
+ * One may optimize compare-intensive operations by using a custom
+ * implementation of {@link RawComparator} that operates directly
+ * on byte representations.
+ * </p>
+ * @param <T>
+ */
+public abstract class DeserializerComparator<T> implements RawComparator<T> {
+  
+  private InputBuffer buffer = new InputBuffer();
+  private Deserializer<T> deserializer;
+  
+  private T key1;
+  private T key2;
+
+  protected DeserializerComparator(Deserializer<T> deserializer)
+    throws IOException {
+    
+    this.deserializer = deserializer;
+    this.deserializer.open(buffer);
+  }
+
+  public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+    try {
+      
+      buffer.reset(b1, s1, l1);
+      key1 = deserializer.deserialize(key1);
+      
+      buffer.reset(b2, s2, l2);
+      key2 = deserializer.deserialize(key2);
+      
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return compare(key1, key2);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java b/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java
new file mode 100644
index 00000000000..b44b4b1db76
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.serializer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+
+/**
+ * <p>
+ * An experimental {@link Serialization} for Java {@link Serializable} classes.
+ * </p>
+ * @see JavaSerializationComparator
+ */
+public class JavaSerialization implements Serialization<Serializable> {
+  
+  static class JavaSerializationDeserializer<T extends Serializable>
+    implements Deserializer<T> {
+
+    private ObjectInputStream ois;
+
+    public void open(InputStream in) throws IOException {
+      ois = new ObjectInputStream(in) {
+        @Override protected void readStreamHeader() {
+          // no header
+        }
+      };
+    }
+    
+    @SuppressWarnings("unchecked")
+    public T deserialize(T object) throws IOException {
+      try {
+        // ignore passed-in object
+        return (T) ois.readObject();
+      } catch (ClassNotFoundException e) {
+        throw new IOException(e.toString());
+      }
+    }
+
+    public void close() throws IOException {
+      ois.close();
+    }
+
+  }
+  
+  static class JavaSerializationSerializer
+    implements Serializer<Serializable> {
+
+    private ObjectOutputStream oos;
+
+    public void open(OutputStream out) throws IOException {
+      oos = new ObjectOutputStream(out) {
+        @Override protected void writeStreamHeader() {
+          // no header
+        }
+      };
+    }
+
+    public void serialize(Serializable object) throws IOException {
+      oos.reset(); // clear (class) back-references
+      oos.writeObject(object);
+    }
+
+    public void close() throws IOException {
+      oos.close();
+    }
+
+  }
+
+  public boolean accept(Class<?> c) {
+    return Serializable.class.isAssignableFrom(c);
+  }
+
+  public Deserializer<Serializable> getDeserializer(Class<Serializable> c) {
+    return new JavaSerializationDeserializer<Serializable>();
+  }
+
+  public Serializer<Serializable> getSerializer(Class<Serializable> c) {
+    return new JavaSerializationSerializer();
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java b/src/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java
new file mode 100644
index 00000000000..f3de2b10c32
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.serializer;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.hadoop.io.RawComparator;
+
+/**
+ * <p>
+ * A {@link RawComparator} that uses a {@link JavaSerialization}
+ * {@link Deserializer} to deserialize objects that are then compared via
+ * their {@link Comparable} interfaces.
+ * </p>
+ * @param <T>
+ * @see JavaSerialization
+ */
+public class JavaSerializationComparator<T extends Serializable&Comparable<T>>
+  extends DeserializerComparator<T> {
+
+  public JavaSerializationComparator() throws IOException {
+    super(new JavaSerialization.JavaSerializationDeserializer<T>());
+  }
+
+  public int compare(T o1, T o2) {
+    return o1.compareTo(o2);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/serializer/Serialization.java b/src/java/org/apache/hadoop/io/serializer/Serialization.java
new file mode 100644
index 00000000000..6e724bd78b1
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/serializer/Serialization.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.serializer;
+
+/**
+ * <p>
+ * Encapsulates a {@link Serializer}/{@link Deserializer} pair.
+ * </p>
+ * @param <T>
+ */
+public interface Serialization<T> {
+  
+  /**
+   * Allows clients to test whether this {@link Serialization}
+   * supports the given class.
+   */
+  boolean accept(Class<?> c);
+  
+  /**
+   * @return a {@link Serializer} for the given class.
+   */
+  Serializer<T> getSerializer(Class<T> c);
+
+  /**
+   * @return a {@link Deserializer} for the given class.
+   */
+  Deserializer<T> getDeserializer(Class<T> c);
+}
diff --git a/src/java/org/apache/hadoop/io/serializer/SerializationFactory.java b/src/java/org/apache/hadoop/io/serializer/SerializationFactory.java
new file mode 100644
index 00000000000..f4ba54b4e49
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/serializer/SerializationFactory.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.serializer;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * <p>
+ * A factory for {@link Serialization}s.
+ * </p>
+ */
+public class SerializationFactory extends Configured {
+  
+  private static final Log LOG =
+    LogFactory.getLog(SerializationFactory.class.getName());
+
+  private List<Serialization<?>> serializations = new ArrayList<Serialization<?>>();
+  
+  /**
+   * <p>
+   * Serializations are found by reading the <code>io.serializations</code>
+   * property from <code>conf</code>, which is a comma-delimited list of
+   * classnames. 
+   * </p>
+   */
+  public SerializationFactory(Configuration conf) {
+    super(conf);
+    for (String serializerName : conf.getStrings("io.serializations", 
+      new String[]{"org.apache.hadoop.io.serializer.WritableSerialization"})) {
+      add(conf, serializerName);
+    }
+  }
+  
+  @SuppressWarnings("unchecked")
+  private void add(Configuration conf, String serializationName) {
+    try {
+      
+      Class<? extends Serialization> serializionClass =
+        (Class<? extends Serialization>) conf.getClassByName(serializationName);
+      serializations.add((Serialization)
+          ReflectionUtils.newInstance(serializionClass, getConf()));
+    } catch (ClassNotFoundException e) {
+      LOG.warn("Serilization class not found: " +
+          StringUtils.stringifyException(e));
+    }
+  }
+
+  public <T> Serializer<T> getSerializer(Class<T> c) {
+    return getSerialization(c).getSerializer(c);
+  }
+
+  public <T> Deserializer<T> getDeserializer(Class<T> c) {
+    return getSerialization(c).getDeserializer(c);
+  }
+
+  @SuppressWarnings("unchecked")
+  public <T> Serialization<T> getSerialization(Class<T> c) {
+    for (Serialization serialization : serializations) {
+      if (serialization.accept(c)) {
+        return (Serialization<T>) serialization;
+      }
+    }
+    return null;
+  }
+}
diff --git a/src/java/org/apache/hadoop/io/serializer/Serializer.java b/src/java/org/apache/hadoop/io/serializer/Serializer.java
new file mode 100644
index 00000000000..b3243f5b6b8
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/serializer/Serializer.java
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.serializer;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * <p>
+ * Provides a facility for serializing objects of type <T> to an
+ * {@link OutputStream}.
+ * </p>
+ * 
+ * <p>
+ * Serializers are stateful, but must not buffer the output since
+ * other producers may write to the output between calls to
+ * {@link #serialize(Object)}.
+ * </p>
+ * @param <T>
+ */
+public interface Serializer<T> {
+  /**
+   * <p>Prepare the serializer for writing.</p>
+   */
+  void open(OutputStream out) throws IOException;
+  
+  /**
+   * <p>Serialize <code>t</code> to the underlying output stream.</p>
+   */
+  void serialize(T t) throws IOException;
+  
+  /**
+   * <p>Close the underlying output stream and clear up any resources.</p>
+   */  
+  void close() throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java b/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java
new file mode 100644
index 00000000000..47586e8c2dd
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.serializer;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * A {@link Serialization} for {@link Writable}s that delegates to
+ * {@link Writable#write(java.io.DataOutput)} and
+ * {@link Writable#readFields(java.io.DataInput)}.
+ */
+public class WritableSerialization extends Configured 
+  implements Serialization<Writable> {
+  
+  static class WritableDeserializer extends Configured 
+    implements Deserializer<Writable> {
+
+    private Class<?> writableClass;
+    private DataInputStream dataIn;
+    
+    public WritableDeserializer(Configuration conf, Class<?> c) {
+      setConf(conf);
+      this.writableClass = c;
+    }
+    
+    public void open(InputStream in) {
+      if (in instanceof DataInputStream) {
+        dataIn = (DataInputStream) in;
+      } else {
+        dataIn = new DataInputStream(in);
+      }
+    }
+    
+    public Writable deserialize(Writable w) throws IOException {
+      Writable writable;
+      if (w == null) {
+        writable 
+          = (Writable) ReflectionUtils.newInstance(writableClass, getConf());
+      } else {
+        writable = w;
+      }
+      writable.readFields(dataIn);
+      return writable;
+    }
+
+    public void close() throws IOException {
+      dataIn.close();
+    }
+    
+  }
+  
+  static class WritableSerializer implements Serializer<Writable> {
+
+    private DataOutputStream dataOut;
+    
+    public void open(OutputStream out) {
+      if (out instanceof DataOutputStream) {
+        dataOut = (DataOutputStream) out;
+      } else {
+        dataOut = new DataOutputStream(out);
+      }
+    }
+
+    public void serialize(Writable w) throws IOException {
+      w.write(dataOut);
+    }
+
+    public void close() throws IOException {
+      dataOut.close();
+    }
+
+  }
+
+  public boolean accept(Class<?> c) {
+    return Writable.class.isAssignableFrom(c);
+  }
+
+  public Deserializer<Writable> getDeserializer(Class<Writable> c) {
+    return new WritableDeserializer(getConf(), c);
+  }
+
+  public Serializer<Writable> getSerializer(Class<Writable> c) {
+    return new WritableSerializer();
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/io/serializer/package.html b/src/java/org/apache/hadoop/io/serializer/package.html
new file mode 100644
index 00000000000..58c8a3a5c3b
--- /dev/null
+++ b/src/java/org/apache/hadoop/io/serializer/package.html
@@ -0,0 +1,37 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+
+<p>
+This package provides a mechanism for using different serialization frameworks
+in Hadoop. The property "io.serializations" defines a list of
+{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create
+{@link org.apache.hadoop.io.serializer.Serializer}s and
+{@link org.apache.hadoop.io.serializer.Deserializer}s.
+</p>
+
+<p>
+To add a new serialization framework write an implementation of
+{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the
+"io.serializations" property.
+</p>
+
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/ipc/Client.java b/src/java/org/apache/hadoop/ipc/Client.java
new file mode 100644
index 00000000000..1d01faf673f
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/Client.java
@@ -0,0 +1,914 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ipc;
+
+import java.net.Socket;
+import java.net.InetSocketAddress;
+import java.net.SocketTimeoutException;
+import java.net.UnknownHostException;
+import java.net.ConnectException;
+
+import java.io.IOException;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.FilterInputStream;
+import java.io.InputStream;
+
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.Map.Entry;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
+
+import javax.net.SocketFactory;
+
+import org.apache.commons.logging.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/** A client for an IPC service.  IPC calls take a single {@link Writable} as a
+ * parameter, and return a {@link Writable} as their value.  A service runs on
+ * a port and is defined by a parameter class and a value class.
+ * 
+ * @see Server
+ */
+public class Client {
+  
+  public static final Log LOG =
+    LogFactory.getLog(Client.class);
+  private Hashtable<ConnectionId, Connection> connections =
+    new Hashtable<ConnectionId, Connection>();
+
+  private Class<? extends Writable> valueClass;   // class of call values
+  private int counter;                            // counter for call ids
+  private AtomicBoolean running = new AtomicBoolean(true); // if client runs
+  final private Configuration conf;
+  final private int maxIdleTime; //connections will be culled if it was idle for 
+                           //maxIdleTime msecs
+  final private int maxRetries; //the max. no. of retries for socket connections
+  private boolean tcpNoDelay; // if T then disable Nagle's Algorithm
+  private int pingInterval; // how often sends ping to the server in msecs
+
+  private SocketFactory socketFactory;           // how to create sockets
+  private int refCount = 1;
+  
+  final private static String PING_INTERVAL_NAME = "ipc.ping.interval";
+  final static int DEFAULT_PING_INTERVAL = 60000; // 1 min
+  final static int PING_CALL_ID = -1;
+  
+  /**
+   * set the ping interval value in configuration
+   * 
+   * @param conf Configuration
+   * @param pingInterval the ping interval
+   */
+  final public static void setPingInterval(Configuration conf, int pingInterval) {
+    conf.setInt(PING_INTERVAL_NAME, pingInterval);
+  }
+
+  /**
+   * Get the ping interval from configuration;
+   * If not set in the configuration, return the default value.
+   * 
+   * @param conf Configuration
+   * @return the ping interval
+   */
+  final static int getPingInterval(Configuration conf) {
+    return conf.getInt(PING_INTERVAL_NAME, DEFAULT_PING_INTERVAL);
+  }
+  
+  /**
+   * Increment this client's reference count
+   *
+   */
+  synchronized void incCount() {
+    refCount++;
+  }
+  
+  /**
+   * Decrement this client's reference count
+   *
+   */
+  synchronized void decCount() {
+    refCount--;
+  }
+  
+  /**
+   * Return if this client has no reference
+   * 
+   * @return true if this client has no reference; false otherwise
+   */
+  synchronized boolean isZeroReference() {
+    return refCount==0;
+  }
+
+  /** A call waiting for a value. */
+  private class Call {
+    int id;                                       // call id
+    Writable param;                               // parameter
+    Writable value;                               // value, null if error
+    IOException error;                            // exception, null if value
+    boolean done;                                 // true when call is done
+
+    protected Call(Writable param) {
+      this.param = param;
+      synchronized (Client.this) {
+        this.id = counter++;
+      }
+    }
+
+    /** Indicate when the call is complete and the
+     * value or error are available.  Notifies by default.  */
+    protected synchronized void callComplete() {
+      this.done = true;
+      notify();                                 // notify caller
+    }
+
+    /** Set the exception when there is an error.
+     * Notify the caller the call is done.
+     * 
+     * @param error exception thrown by the call; either local or remote
+     */
+    public synchronized void setException(IOException error) {
+      this.error = error;
+      callComplete();
+    }
+    
+    /** Set the return value when there is no error. 
+     * Notify the caller the call is done.
+     * 
+     * @param value return value of the call.
+     */
+    public synchronized void setValue(Writable value) {
+      this.value = value;
+      callComplete();
+    }
+    
+    public synchronized Writable getValue() {
+      return value;
+    }
+  }
+
+  /** Thread that reads responses and notifies callers.  Each connection owns a
+   * socket connected to a remote address.  Calls are multiplexed through this
+   * socket: responses may be delivered out of order. */
+  private class Connection extends Thread {
+    private InetSocketAddress server;             // server ip:port
+    private ConnectionHeader header;              // connection header
+    private ConnectionId remoteId;                // connection id
+    
+    private Socket socket = null;                 // connected socket
+    private DataInputStream in;
+    private DataOutputStream out;
+    
+    // currently active calls
+    private Hashtable<Integer, Call> calls = new Hashtable<Integer, Call>();
+    private AtomicLong lastActivity = new AtomicLong();// last I/O activity time
+    private AtomicBoolean shouldCloseConnection = new AtomicBoolean();  // indicate if the connection is closed
+    private IOException closeException; // close reason
+
+    public Connection(ConnectionId remoteId) throws IOException {
+      this.remoteId = remoteId;
+      this.server = remoteId.getAddress();
+      if (server.isUnresolved()) {
+        throw new UnknownHostException("unknown host: " + 
+                                       remoteId.getAddress().getHostName());
+      }
+      
+      UserGroupInformation ticket = remoteId.getTicket();
+      Class<?> protocol = remoteId.getProtocol();
+      header = 
+        new ConnectionHeader(protocol == null ? null : protocol.getName(), ticket);
+      
+      this.setName("IPC Client (" + socketFactory.hashCode() +") connection to " +
+          remoteId.getAddress().toString() +
+          " from " + ((ticket==null)?"an unknown user":ticket.getUserName()));
+      this.setDaemon(true);
+    }
+
+    /** Update lastActivity with the current time. */
+    private void touch() {
+      lastActivity.set(System.currentTimeMillis());
+    }
+
+    /**
+     * Add a call to this connection's call queue and notify
+     * a listener; synchronized.
+     * Returns false if called during shutdown.
+     * @param call to add
+     * @return true if the call was added.
+     */
+    private synchronized boolean addCall(Call call) {
+      if (shouldCloseConnection.get())
+        return false;
+      calls.put(call.id, call);
+      notify();
+      return true;
+    }
+
+    /** This class sends a ping to the remote side when timeout on
+     * reading. If no failure is detected, it retries until at least
+     * a byte is read.
+     */
+    private class PingInputStream extends FilterInputStream {
+      /* constructor */
+      protected PingInputStream(InputStream in) {
+        super(in);
+      }
+
+      /* Process timeout exception
+       * if the connection is not going to be closed, send a ping.
+       * otherwise, throw the timeout exception.
+       */
+      private void handleTimeout(SocketTimeoutException e) throws IOException {
+        if (shouldCloseConnection.get() || !running.get()) {
+          throw e;
+        } else {
+          sendPing();
+        }
+      }
+      
+      /** Read a byte from the stream.
+       * Send a ping if timeout on read. Retries if no failure is detected
+       * until a byte is read.
+       * @throws IOException for any IO problem other than socket timeout
+       */
+      public int read() throws IOException {
+        do {
+          try {
+            return super.read();
+          } catch (SocketTimeoutException e) {
+            handleTimeout(e);
+          }
+        } while (true);
+      }
+
+      /** Read bytes into a buffer starting from offset <code>off</code>
+       * Send a ping if timeout on read. Retries if no failure is detected
+       * until a byte is read.
+       * 
+       * @return the total number of bytes read; -1 if the connection is closed.
+       */
+      public int read(byte[] buf, int off, int len) throws IOException {
+        do {
+          try {
+            return super.read(buf, off, len);
+          } catch (SocketTimeoutException e) {
+            handleTimeout(e);
+          }
+        } while (true);
+      }
+    }
+    
+    /** Connect to the server and set up the I/O streams. It then sends
+     * a header to the server and starts
+     * the connection thread that waits for responses.
+     */
+    private synchronized void setupIOstreams() {
+      if (socket != null || shouldCloseConnection.get()) {
+        return;
+      }
+      
+      short ioFailures = 0;
+      short timeoutFailures = 0;
+      try {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Connecting to "+server);
+        }
+        while (true) {
+          try {
+            this.socket = socketFactory.createSocket();
+            this.socket.setTcpNoDelay(tcpNoDelay);
+            // connection time out is 20s
+            NetUtils.connect(this.socket, remoteId.getAddress(), 20000);
+            this.socket.setSoTimeout(pingInterval);
+            break;
+          } catch (SocketTimeoutException toe) {
+            /* The max number of retries is 45,
+             * which amounts to 20s*45 = 15 minutes retries.
+             */
+            handleConnectionFailure(timeoutFailures++, 45, toe);
+          } catch (IOException ie) {
+            handleConnectionFailure(ioFailures++, maxRetries, ie);
+          }
+        }
+        this.in = new DataInputStream(new BufferedInputStream
+            (new PingInputStream(NetUtils.getInputStream(socket))));
+        this.out = new DataOutputStream
+            (new BufferedOutputStream(NetUtils.getOutputStream(socket)));
+        writeHeader();
+
+        // update last activity time
+        touch();
+
+        // start the receiver thread after the socket connection has been set up
+        start();
+      } catch (IOException e) {
+        markClosed(e);
+        close();
+      }
+    }
+
+    /* Handle connection failures
+     *
+     * If the current number of retries is equal to the max number of retries,
+     * stop retrying and throw the exception; Otherwise backoff 1 second and
+     * try connecting again.
+     *
+     * This Method is only called from inside setupIOstreams(), which is
+     * synchronized. Hence the sleep is synchronized; the locks will be retained.
+     *
+     * @param curRetries current number of retries
+     * @param maxRetries max number of retries allowed
+     * @param ioe failure reason
+     * @throws IOException if max number of retries is reached
+     */
+    private void handleConnectionFailure(
+        int curRetries, int maxRetries, IOException ioe) throws IOException {
+      // close the current connection
+      try {
+        socket.close();
+      } catch (IOException e) {
+        LOG.warn("Not able to close a socket", e);
+      }
+      // set socket to null so that the next call to setupIOstreams
+      // can start the process of connect all over again.
+      socket = null;
+
+      // throw the exception if the maximum number of retries is reached
+      if (curRetries >= maxRetries) {
+        throw ioe;
+      }
+
+      // otherwise back off and retry
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException ignored) {}
+      
+      LOG.info("Retrying connect to server: " + server + 
+          ". Already tried " + curRetries + " time(s).");
+    }
+
+    /* Write the header for each connection
+     * Out is not synchronized because only the first thread does this.
+     */
+    private void writeHeader() throws IOException {
+      // Write out the header and version
+      out.write(Server.HEADER.array());
+      out.write(Server.CURRENT_VERSION);
+
+      // Write out the ConnectionHeader
+      DataOutputBuffer buf = new DataOutputBuffer();
+      header.write(buf);
+      
+      // Write out the payload length
+      int bufLen = buf.getLength();
+      out.writeInt(bufLen);
+      out.write(buf.getData(), 0, bufLen);
+    }
+    
+    /* wait till someone signals us to start reading RPC response or
+     * it is idle too long, it is marked as to be closed, 
+     * or the client is marked as not running.
+     * 
+     * Return true if it is time to read a response; false otherwise.
+     */
+    private synchronized boolean waitForWork() {
+      if (calls.isEmpty() && !shouldCloseConnection.get()  && running.get())  {
+        long timeout = maxIdleTime-
+              (System.currentTimeMillis()-lastActivity.get());
+        if (timeout>0) {
+          try {
+            wait(timeout);
+          } catch (InterruptedException e) {}
+        }
+      }
+      
+      if (!calls.isEmpty() && !shouldCloseConnection.get() && running.get()) {
+        return true;
+      } else if (shouldCloseConnection.get()) {
+        return false;
+      } else if (calls.isEmpty()) { // idle connection closed or stopped
+        markClosed(null);
+        return false;
+      } else { // get stopped but there are still pending requests 
+        markClosed((IOException)new IOException().initCause(
+            new InterruptedException()));
+        return false;
+      }
+    }
+
+    public InetSocketAddress getRemoteAddress() {
+      return server;
+    }
+
+    /* Send a ping to the server if the time elapsed 
+     * since last I/O activity is equal to or greater than the ping interval
+     */
+    private synchronized void sendPing() throws IOException {
+      long curTime = System.currentTimeMillis();
+      if ( curTime - lastActivity.get() >= pingInterval) {
+        lastActivity.set(curTime);
+        synchronized (out) {
+          out.writeInt(PING_CALL_ID);
+          out.flush();
+        }
+      }
+    }
+
+    public void run() {
+      if (LOG.isDebugEnabled())
+        LOG.debug(getName() + ": starting, having connections " 
+            + connections.size());
+
+      while (waitForWork()) {//wait here for work - read or close connection
+        receiveResponse();
+      }
+      
+      close();
+      
+      if (LOG.isDebugEnabled())
+        LOG.debug(getName() + ": stopped, remaining connections "
+            + connections.size());
+    }
+
+    /** Initiates a call by sending the parameter to the remote server.
+     * Note: this is not called from the Connection thread, but by other
+     * threads.
+     */
+    public void sendParam(Call call) {
+      if (shouldCloseConnection.get()) {
+        return;
+      }
+
+      DataOutputBuffer d=null;
+      try {
+        synchronized (this.out) {
+          if (LOG.isDebugEnabled())
+            LOG.debug(getName() + " sending #" + call.id);
+          
+          //for serializing the
+          //data to be written
+          d = new DataOutputBuffer();
+          d.writeInt(call.id);
+          call.param.write(d);
+          byte[] data = d.getData();
+          int dataLength = d.getLength();
+          out.writeInt(dataLength);      //first put the data length
+          out.write(data, 0, dataLength);//write the data
+          out.flush();
+        }
+      } catch(IOException e) {
+        markClosed(e);
+      } finally {
+        //the buffer is just an in-memory buffer, but it is still polite to
+        // close early
+        IOUtils.closeStream(d);
+      }
+    }  
+
+    /* Receive a response.
+     * Because only one receiver, so no synchronization on in.
+     */
+    private void receiveResponse() {
+      if (shouldCloseConnection.get()) {
+        return;
+      }
+      touch();
+      
+      try {
+        int id = in.readInt();                    // try to read an id
+
+        if (LOG.isDebugEnabled())
+          LOG.debug(getName() + " got value #" + id);
+
+        Call call = calls.remove(id);
+
+        int state = in.readInt();     // read call status
+        if (state == Status.SUCCESS.state) {
+          Writable value = ReflectionUtils.newInstance(valueClass, conf);
+          value.readFields(in);                 // read value
+          call.setValue(value);
+        } else if (state == Status.ERROR.state) {
+          call.setException(new RemoteException(WritableUtils.readString(in),
+                                                WritableUtils.readString(in)));
+        } else if (state == Status.FATAL.state) {
+          // Close the connection
+          markClosed(new RemoteException(WritableUtils.readString(in), 
+                                         WritableUtils.readString(in)));
+        }
+      } catch (IOException e) {
+        markClosed(e);
+      }
+    }
+    
+    private synchronized void markClosed(IOException e) {
+      if (shouldCloseConnection.compareAndSet(false, true)) {
+        closeException = e;
+        notifyAll();
+      }
+    }
+    
+    /** Close the connection. */
+    private synchronized void close() {
+      if (!shouldCloseConnection.get()) {
+        LOG.error("The connection is not in the closed state");
+        return;
+      }
+
+      // release the resources
+      // first thing to do;take the connection out of the connection list
+      synchronized (connections) {
+        if (connections.get(remoteId) == this) {
+          connections.remove(remoteId);
+        }
+      }
+
+      // close the streams and therefore the socket
+      IOUtils.closeStream(out);
+      IOUtils.closeStream(in);
+
+      // clean up all calls
+      if (closeException == null) {
+        if (!calls.isEmpty()) {
+          LOG.warn(
+              "A connection is closed for no cause and calls are not empty");
+
+          // clean up calls anyway
+          closeException = new IOException("Unexpected closed connection");
+          cleanupCalls();
+        }
+      } else {
+        // log the info
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("closing ipc connection to " + server + ": " +
+              closeException.getMessage(),closeException);
+        }
+
+        // cleanup calls
+        cleanupCalls();
+      }
+      if (LOG.isDebugEnabled())
+        LOG.debug(getName() + ": closed");
+    }
+    
+    /* Cleanup all calls and mark them as done */
+    private void cleanupCalls() {
+      Iterator<Entry<Integer, Call>> itor = calls.entrySet().iterator() ;
+      while (itor.hasNext()) {
+        Call c = itor.next().getValue(); 
+        c.setException(closeException); // local exception
+        itor.remove();         
+      }
+    }
+  }
+
+  /** Call implementation used for parallel calls. */
+  private class ParallelCall extends Call {
+    private ParallelResults results;
+    private int index;
+    
+    public ParallelCall(Writable param, ParallelResults results, int index) {
+      super(param);
+      this.results = results;
+      this.index = index;
+    }
+
+    /** Deliver result to result collector. */
+    protected void callComplete() {
+      results.callComplete(this);
+    }
+  }
+
+  /** Result collector for parallel calls. */
+  private static class ParallelResults {
+    private Writable[] values;
+    private int size;
+    private int count;
+
+    public ParallelResults(int size) {
+      this.values = new Writable[size];
+      this.size = size;
+    }
+
+    /** Collect a result. */
+    public synchronized void callComplete(ParallelCall call) {
+      values[call.index] = call.getValue();       // store the value
+      count++;                                    // count it
+      if (count == size)                          // if all values are in
+        notify();                                 // then notify waiting caller
+    }
+  }
+
+  /** Construct an IPC client whose values are of the given {@link Writable}
+   * class. */
+  public Client(Class<? extends Writable> valueClass, Configuration conf, 
+      SocketFactory factory) {
+    this.valueClass = valueClass;
+    this.maxIdleTime = 
+      conf.getInt("ipc.client.connection.maxidletime", 10000); //10s
+    this.maxRetries = conf.getInt("ipc.client.connect.max.retries", 10);
+    this.tcpNoDelay = conf.getBoolean("ipc.client.tcpnodelay", false);
+    this.pingInterval = getPingInterval(conf);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("The ping interval is" + this.pingInterval + "ms.");
+    }
+    this.conf = conf;
+    this.socketFactory = factory;
+  }
+
+  /**
+   * Construct an IPC client with the default SocketFactory
+   * @param valueClass
+   * @param conf
+   */
+  public Client(Class<? extends Writable> valueClass, Configuration conf) {
+    this(valueClass, conf, NetUtils.getDefaultSocketFactory(conf));
+  }
+ 
+  /** Return the socket factory of this client
+   *
+   * @return this client's socket factory
+   */
+  SocketFactory getSocketFactory() {
+    return socketFactory;
+  }
+
+  /** Stop all threads related to this client.  No further calls may be made
+   * using this client. */
+  public void stop() {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Stopping client");
+    }
+
+    if (!running.compareAndSet(true, false)) {
+      return;
+    }
+    
+    // wake up all connections
+    synchronized (connections) {
+      for (Connection conn : connections.values()) {
+        conn.interrupt();
+      }
+    }
+    
+    // wait until all connections are closed
+    while (!connections.isEmpty()) {
+      try {
+        Thread.sleep(100);
+      } catch (InterruptedException e) {
+      }
+    }
+  }
+
+  /** Make a call, passing <code>param</code>, to the IPC server running at
+   * <code>address</code>, returning the value.  Throws exceptions if there are
+   * network problems or if the remote code threw an exception.
+   * @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead 
+   */
+  @Deprecated
+  public Writable call(Writable param, InetSocketAddress address)
+  throws InterruptedException, IOException {
+      return call(param, address, null);
+  }
+  
+  /** Make a call, passing <code>param</code>, to the IPC server running at
+   * <code>address</code> with the <code>ticket</code> credentials, returning 
+   * the value.  
+   * Throws exceptions if there are network problems or if the remote code 
+   * threw an exception.
+   * @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead 
+   */
+  @Deprecated
+  public Writable call(Writable param, InetSocketAddress addr, 
+      UserGroupInformation ticket)  
+      throws InterruptedException, IOException {
+    return call(param, addr, null, ticket);
+  }
+  
+  /** Make a call, passing <code>param</code>, to the IPC server running at
+   * <code>address</code> which is servicing the <code>protocol</code> protocol, 
+   * with the <code>ticket</code> credentials, returning the value.  
+   * Throws exceptions if there are network problems or if the remote code 
+   * threw an exception. */
+  public Writable call(Writable param, InetSocketAddress addr, 
+                       Class<?> protocol, UserGroupInformation ticket)  
+                       throws InterruptedException, IOException {
+    Call call = new Call(param);
+    Connection connection = getConnection(addr, protocol, ticket, call);
+    connection.sendParam(call);                 // send the parameter
+    boolean interrupted = false;
+    synchronized (call) {
+      while (!call.done) {
+        try {
+          call.wait();                           // wait for the result
+        } catch (InterruptedException ie) {
+          // save the fact that we were interrupted
+          interrupted = true;
+        }
+      }
+
+      if (interrupted) {
+        // set the interrupt flag now that we are done waiting
+        Thread.currentThread().interrupt();
+      }
+
+      if (call.error != null) {
+        if (call.error instanceof RemoteException) {
+          call.error.fillInStackTrace();
+          throw call.error;
+        } else { // local exception
+          throw wrapException(addr, call.error);
+        }
+      } else {
+        return call.value;
+      }
+    }
+  }
+
+  /**
+   * Take an IOException and the address we were trying to connect to
+   * and return an IOException with the input exception as the cause.
+   * The new exception provides the stack trace of the place where 
+   * the exception is thrown and some extra diagnostics information.
+   * If the exception is ConnectException or SocketTimeoutException, 
+   * return a new one of the same type; Otherwise return an IOException.
+   * 
+   * @param addr target address
+   * @param exception the relevant exception
+   * @return an exception to throw
+   */
+  private IOException wrapException(InetSocketAddress addr,
+                                         IOException exception) {
+    if (exception instanceof ConnectException) {
+      //connection refused; include the host:port in the error
+      return (ConnectException)new ConnectException(
+           "Call to " + addr + " failed on connection exception: " + exception)
+                    .initCause(exception);
+    } else if (exception instanceof SocketTimeoutException) {
+      return (SocketTimeoutException)new SocketTimeoutException(
+           "Call to " + addr + " failed on socket timeout exception: "
+                      + exception).initCause(exception);
+    } else {
+      return (IOException)new IOException(
+           "Call to " + addr + " failed on local exception: " + exception)
+                                 .initCause(exception);
+
+    }
+  }
+
+  /** 
+   * Makes a set of calls in parallel.  Each parameter is sent to the
+   * corresponding address.  When all values are available, or have timed out
+   * or errored, the collected results are returned in an array.  The array
+   * contains nulls for calls that timed out or errored.
+   * @deprecated Use {@link #call(Writable[], InetSocketAddress[], Class, UserGroupInformation)} instead 
+   */
+  @Deprecated
+  public Writable[] call(Writable[] params, InetSocketAddress[] addresses)
+    throws IOException {
+    return call(params, addresses, null, null);
+  }
+  
+  /** Makes a set of calls in parallel.  Each parameter is sent to the
+   * corresponding address.  When all values are available, or have timed out
+   * or errored, the collected results are returned in an array.  The array
+   * contains nulls for calls that timed out or errored.  */
+  public Writable[] call(Writable[] params, InetSocketAddress[] addresses, 
+                         Class<?> protocol, UserGroupInformation ticket)
+    throws IOException {
+    if (addresses.length == 0) return new Writable[0];
+
+    ParallelResults results = new ParallelResults(params.length);
+    synchronized (results) {
+      for (int i = 0; i < params.length; i++) {
+        ParallelCall call = new ParallelCall(params[i], results, i);
+        try {
+          Connection connection = 
+            getConnection(addresses[i], protocol, ticket, call);
+          connection.sendParam(call);             // send each parameter
+        } catch (IOException e) {
+          // log errors
+          LOG.info("Calling "+addresses[i]+" caught: " + 
+                   e.getMessage(),e);
+          results.size--;                         //  wait for one fewer result
+        }
+      }
+      while (results.count != results.size) {
+        try {
+          results.wait();                    // wait for all results
+        } catch (InterruptedException e) {}
+      }
+
+      return results.values;
+    }
+  }
+
+  /** Get a connection from the pool, or create a new one and add it to the
+   * pool.  Connections to a given host/port are reused. */
+  private Connection getConnection(InetSocketAddress addr,
+                                   Class<?> protocol,
+                                   UserGroupInformation ticket,
+                                   Call call)
+                                   throws IOException {
+    if (!running.get()) {
+      // the client is stopped
+      throw new IOException("The client is stopped");
+    }
+    Connection connection;
+    /* we could avoid this allocation for each RPC by having a  
+     * connectionsId object and with set() method. We need to manage the
+     * refs for keys in HashMap properly. For now its ok.
+     */
+    ConnectionId remoteId = new ConnectionId(addr, protocol, ticket);
+    do {
+      synchronized (connections) {
+        connection = connections.get(remoteId);
+        if (connection == null) {
+          connection = new Connection(remoteId);
+          connections.put(remoteId, connection);
+        }
+      }
+    } while (!connection.addCall(call));
+    
+    //we don't invoke the method below inside "synchronized (connections)"
+    //block above. The reason for that is if the server happens to be slow,
+    //it will take longer to establish a connection and that will slow the
+    //entire system down.
+    connection.setupIOstreams();
+    return connection;
+  }
+
+  /**
+   * This class holds the address and the user ticket. The client connections
+   * to servers are uniquely identified by <remoteAddress, protocol, ticket>
+   */
+  private static class ConnectionId {
+    InetSocketAddress address;
+    UserGroupInformation ticket;
+    Class<?> protocol;
+    private static final int PRIME = 16777619;
+    
+    ConnectionId(InetSocketAddress address, Class<?> protocol, 
+                 UserGroupInformation ticket) {
+      this.protocol = protocol;
+      this.address = address;
+      this.ticket = ticket;
+    }
+    
+    InetSocketAddress getAddress() {
+      return address;
+    }
+    
+    Class<?> getProtocol() {
+      return protocol;
+    }
+    
+    UserGroupInformation getTicket() {
+      return ticket;
+    }
+    
+    
+    @Override
+    public boolean equals(Object obj) {
+     if (obj instanceof ConnectionId) {
+       ConnectionId id = (ConnectionId) obj;
+       return address.equals(id.address) && protocol == id.protocol && 
+              ticket == id.ticket;
+       //Note : ticket is a ref comparision.
+     }
+     return false;
+    }
+    
+    @Override
+    public int hashCode() {
+      return (address.hashCode() + PRIME * System.identityHashCode(protocol)) ^ 
+             System.identityHashCode(ticket);
+    }
+  }  
+}
diff --git a/src/java/org/apache/hadoop/ipc/ConnectionHeader.java b/src/java/org/apache/hadoop/ipc/ConnectionHeader.java
new file mode 100644
index 00000000000..44b113b7edc
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/ConnectionHeader.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ipc;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.security.UnixUserGroupInformation;
+import org.apache.hadoop.security.UserGroupInformation;
+
+/**
+ * The IPC connection header sent by the client to the server
+ * on connection establishment.
+ */
+class ConnectionHeader implements Writable {
+  public static final Log LOG = LogFactory.getLog(ConnectionHeader.class);
+  
+  private String protocol;
+  private UserGroupInformation ugi = new UnixUserGroupInformation();
+  
+  public ConnectionHeader() {}
+  
+  /**
+   * Create a new {@link ConnectionHeader} with the given <code>protocol</code>
+   * and {@link UserGroupInformation}. 
+   * @param protocol protocol used for communication between the IPC client
+   *                 and the server
+   * @param ugi {@link UserGroupInformation} of the client communicating with
+   *            the server
+   */
+  public ConnectionHeader(String protocol, UserGroupInformation ugi) {
+    this.protocol = protocol;
+    this.ugi = ugi;
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    protocol = Text.readString(in);
+    if (protocol.isEmpty()) {
+      protocol = null;
+    }
+    
+    boolean ugiPresent = in.readBoolean();
+    if (ugiPresent) {
+      ugi.readFields(in);
+    } else {
+      ugi = null;
+    }
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    Text.writeString(out, (protocol == null) ? "" : protocol);
+    if (ugi != null) {
+      out.writeBoolean(true);
+      ugi.write(out);
+    } else {
+      out.writeBoolean(false);
+    }
+  }
+
+  public String getProtocol() {
+    return protocol;
+  }
+
+  public UserGroupInformation getUgi() {
+    return ugi;
+  }
+
+  public String toString() {
+    return protocol + "-" + ugi;
+  }
+}
diff --git a/src/java/org/apache/hadoop/ipc/RPC.java b/src/java/org/apache/hadoop/ipc/RPC.java
new file mode 100644
index 00000000000..94b0ec82e2e
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/RPC.java
@@ -0,0 +1,575 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ipc;
+
+import java.lang.reflect.Proxy;
+import java.lang.reflect.Method;
+import java.lang.reflect.Array;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.InvocationTargetException;
+
+import java.net.ConnectException;
+import java.net.InetSocketAddress;
+import java.net.SocketTimeoutException;
+import java.io.*;
+import java.util.Map;
+import java.util.HashMap;
+
+import javax.net.SocketFactory;
+import javax.security.auth.Subject;
+import javax.security.auth.login.LoginException;
+
+import org.apache.commons.logging.*;
+
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authorize.AuthorizationException;
+import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.metrics.util.MetricsTimeVaryingRate;
+
+/** A simple RPC mechanism.
+ *
+ * A <i>protocol</i> is a Java interface.  All parameters and return types must
+ * be one of:
+ *
+ * <ul> <li>a primitive type, <code>boolean</code>, <code>byte</code>,
+ * <code>char</code>, <code>short</code>, <code>int</code>, <code>long</code>,
+ * <code>float</code>, <code>double</code>, or <code>void</code>; or</li>
+ *
+ * <li>a {@link String}; or</li>
+ *
+ * <li>a {@link Writable}; or</li>
+ *
+ * <li>an array of the above types</li> </ul>
+ *
+ * All methods in the protocol should throw only IOException.  No field data of
+ * the protocol instance is transmitted.
+ */
+public class RPC {
+  private static final Log LOG =
+    LogFactory.getLog(RPC.class);
+
+  private RPC() {}                                  // no public ctor
+
+
+  /** A method invocation, including the method name and its parameters.*/
+  private static class Invocation implements Writable, Configurable {
+    private String methodName;
+    private Class[] parameterClasses;
+    private Object[] parameters;
+    private Configuration conf;
+
+    public Invocation() {}
+
+    public Invocation(Method method, Object[] parameters) {
+      this.methodName = method.getName();
+      this.parameterClasses = method.getParameterTypes();
+      this.parameters = parameters;
+    }
+
+    /** The name of the method invoked. */
+    public String getMethodName() { return methodName; }
+
+    /** The parameter classes. */
+    public Class[] getParameterClasses() { return parameterClasses; }
+
+    /** The parameter instances. */
+    public Object[] getParameters() { return parameters; }
+
+    public void readFields(DataInput in) throws IOException {
+      methodName = UTF8.readString(in);
+      parameters = new Object[in.readInt()];
+      parameterClasses = new Class[parameters.length];
+      ObjectWritable objectWritable = new ObjectWritable();
+      for (int i = 0; i < parameters.length; i++) {
+        parameters[i] = ObjectWritable.readObject(in, objectWritable, this.conf);
+        parameterClasses[i] = objectWritable.getDeclaredClass();
+      }
+    }
+
+    public void write(DataOutput out) throws IOException {
+      UTF8.writeString(out, methodName);
+      out.writeInt(parameterClasses.length);
+      for (int i = 0; i < parameterClasses.length; i++) {
+        ObjectWritable.writeObject(out, parameters[i], parameterClasses[i],
+                                   conf);
+      }
+    }
+
+    public String toString() {
+      StringBuffer buffer = new StringBuffer();
+      buffer.append(methodName);
+      buffer.append("(");
+      for (int i = 0; i < parameters.length; i++) {
+        if (i != 0)
+          buffer.append(", ");
+        buffer.append(parameters[i]);
+      }
+      buffer.append(")");
+      return buffer.toString();
+    }
+
+    public void setConf(Configuration conf) {
+      this.conf = conf;
+    }
+
+    public Configuration getConf() {
+      return this.conf;
+    }
+
+  }
+
+  /* Cache a client using its socket factory as the hash key */
+  static private class ClientCache {
+    private Map<SocketFactory, Client> clients =
+      new HashMap<SocketFactory, Client>();
+
+    /**
+     * Construct & cache an IPC client with the user-provided SocketFactory 
+     * if no cached client exists.
+     * 
+     * @param conf Configuration
+     * @return an IPC client
+     */
+    private synchronized Client getClient(Configuration conf,
+        SocketFactory factory) {
+      // Construct & cache client.  The configuration is only used for timeout,
+      // and Clients have connection pools.  So we can either (a) lose some
+      // connection pooling and leak sockets, or (b) use the same timeout for all
+      // configurations.  Since the IPC is usually intended globally, not
+      // per-job, we choose (a).
+      Client client = clients.get(factory);
+      if (client == null) {
+        client = new Client(ObjectWritable.class, conf, factory);
+        clients.put(factory, client);
+      } else {
+        client.incCount();
+      }
+      return client;
+    }
+
+    /**
+     * Construct & cache an IPC client with the default SocketFactory 
+     * if no cached client exists.
+     * 
+     * @param conf Configuration
+     * @return an IPC client
+     */
+    private synchronized Client getClient(Configuration conf) {
+      return getClient(conf, SocketFactory.getDefault());
+    }
+
+    /**
+     * Stop a RPC client connection 
+     * A RPC client is closed only when its reference count becomes zero.
+     */
+    private void stopClient(Client client) {
+      synchronized (this) {
+        client.decCount();
+        if (client.isZeroReference()) {
+          clients.remove(client.getSocketFactory());
+        }
+      }
+      if (client.isZeroReference()) {
+        client.stop();
+      }
+    }
+  }
+
+  private static ClientCache CLIENTS=new ClientCache();
+  
+  private static class Invoker implements InvocationHandler {
+    private InetSocketAddress address;
+    private UserGroupInformation ticket;
+    private Client client;
+    private boolean isClosed = false;
+
+    public Invoker(InetSocketAddress address, UserGroupInformation ticket, 
+                   Configuration conf, SocketFactory factory) {
+      this.address = address;
+      this.ticket = ticket;
+      this.client = CLIENTS.getClient(conf, factory);
+    }
+
+    public Object invoke(Object proxy, Method method, Object[] args)
+      throws Throwable {
+      final boolean logDebug = LOG.isDebugEnabled();
+      long startTime = 0;
+      if (logDebug) {
+        startTime = System.currentTimeMillis();
+      }
+
+      ObjectWritable value = (ObjectWritable)
+        client.call(new Invocation(method, args), address, 
+                    method.getDeclaringClass(), ticket);
+      if (logDebug) {
+        long callTime = System.currentTimeMillis() - startTime;
+        LOG.debug("Call: " + method.getName() + " " + callTime);
+      }
+      return value.get();
+    }
+    
+    /* close the IPC client that's responsible for this invoker's RPCs */ 
+    synchronized private void close() {
+      if (!isClosed) {
+        isClosed = true;
+        CLIENTS.stopClient(client);
+      }
+    }
+  }
+
+  /**
+   * A version mismatch for the RPC protocol.
+   */
+  public static class VersionMismatch extends IOException {
+    private String interfaceName;
+    private long clientVersion;
+    private long serverVersion;
+    
+    /**
+     * Create a version mismatch exception
+     * @param interfaceName the name of the protocol mismatch
+     * @param clientVersion the client's version of the protocol
+     * @param serverVersion the server's version of the protocol
+     */
+    public VersionMismatch(String interfaceName, long clientVersion,
+                           long serverVersion) {
+      super("Protocol " + interfaceName + " version mismatch. (client = " +
+            clientVersion + ", server = " + serverVersion + ")");
+      this.interfaceName = interfaceName;
+      this.clientVersion = clientVersion;
+      this.serverVersion = serverVersion;
+    }
+    
+    /**
+     * Get the interface name
+     * @return the java class name 
+     *          (eg. org.apache.hadoop.mapred.InterTrackerProtocol)
+     */
+    public String getInterfaceName() {
+      return interfaceName;
+    }
+    
+    /**
+     * Get the client's preferred version
+     */
+    public long getClientVersion() {
+      return clientVersion;
+    }
+    
+    /**
+     * Get the server's agreed to version.
+     */
+    public long getServerVersion() {
+      return serverVersion;
+    }
+  }
+  
+  public static VersionedProtocol waitForProxy(Class protocol,
+      long clientVersion,
+      InetSocketAddress addr,
+      Configuration conf
+      ) throws IOException {
+    return waitForProxy(protocol, clientVersion, addr, conf, Long.MAX_VALUE);
+  }
+
+  /**
+   * Get a proxy connection to a remote server
+   * @param protocol protocol class
+   * @param clientVersion client version
+   * @param addr remote address
+   * @param conf configuration to use
+   * @param timeout time in milliseconds before giving up
+   * @return the proxy
+   * @throws IOException if the far end through a RemoteException
+   */
+  static VersionedProtocol waitForProxy(Class protocol,
+                                               long clientVersion,
+                                               InetSocketAddress addr,
+                                               Configuration conf,
+                                               long timeout
+                                               ) throws IOException { 
+    long startTime = System.currentTimeMillis();
+    IOException ioe;
+    while (true) {
+      try {
+        return getProxy(protocol, clientVersion, addr, conf);
+      } catch(ConnectException se) {  // namenode has not been started
+        LOG.info("Server at " + addr + " not available yet, Zzzzz...");
+        ioe = se;
+      } catch(SocketTimeoutException te) {  // namenode is busy
+        LOG.info("Problem connecting to server: " + addr);
+        ioe = te;
+      }
+      // check if timed out
+      if (System.currentTimeMillis()-timeout >= startTime) {
+        throw ioe;
+      }
+
+      // wait for retry
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException ie) {
+        // IGNORE
+      }
+    }
+  }
+  /** Construct a client-side proxy object that implements the named protocol,
+   * talking to a server at the named address. */
+  public static VersionedProtocol getProxy(Class<?> protocol,
+      long clientVersion, InetSocketAddress addr, Configuration conf,
+      SocketFactory factory) throws IOException {
+    UserGroupInformation ugi = null;
+    try {
+      ugi = UserGroupInformation.login(conf);
+    } catch (LoginException le) {
+      throw new RuntimeException("Couldn't login!");
+    }
+    return getProxy(protocol, clientVersion, addr, ugi, conf, factory);
+  }
+  
+  /** Construct a client-side proxy object that implements the named protocol,
+   * talking to a server at the named address. */
+  public static VersionedProtocol getProxy(Class<?> protocol,
+      long clientVersion, InetSocketAddress addr, UserGroupInformation ticket,
+      Configuration conf, SocketFactory factory) throws IOException {    
+
+    VersionedProtocol proxy =
+        (VersionedProtocol) Proxy.newProxyInstance(
+            protocol.getClassLoader(), new Class[] { protocol },
+            new Invoker(addr, ticket, conf, factory));
+    long serverVersion = proxy.getProtocolVersion(protocol.getName(), 
+                                                  clientVersion);
+    if (serverVersion == clientVersion) {
+      return proxy;
+    } else {
+      throw new VersionMismatch(protocol.getName(), clientVersion, 
+                                serverVersion);
+    }
+  }
+
+  /**
+   * Construct a client-side proxy object with the default SocketFactory
+   * 
+   * @param protocol
+   * @param clientVersion
+   * @param addr
+   * @param conf
+   * @return a proxy instance
+   * @throws IOException
+   */
+  public static VersionedProtocol getProxy(Class<?> protocol,
+      long clientVersion, InetSocketAddress addr, Configuration conf)
+      throws IOException {
+
+    return getProxy(protocol, clientVersion, addr, conf, NetUtils
+        .getDefaultSocketFactory(conf));
+  }
+
+  /**
+   * Stop this proxy and release its invoker's resource
+   * @param proxy the proxy to be stopped
+   */
+  public static void stopProxy(VersionedProtocol proxy) {
+    if (proxy!=null) {
+      ((Invoker)Proxy.getInvocationHandler(proxy)).close();
+    }
+  }
+
+  /** 
+   * Expert: Make multiple, parallel calls to a set of servers.
+   * @deprecated Use {@link #call(Method, Object[][], InetSocketAddress[], UserGroupInformation, Configuration)} instead 
+   */
+  public static Object[] call(Method method, Object[][] params,
+                              InetSocketAddress[] addrs, Configuration conf)
+    throws IOException {
+    return call(method, params, addrs, null, conf);
+  }
+  
+  /** Expert: Make multiple, parallel calls to a set of servers. */
+  public static Object[] call(Method method, Object[][] params,
+                              InetSocketAddress[] addrs, 
+                              UserGroupInformation ticket, Configuration conf)
+    throws IOException {
+
+    Invocation[] invocations = new Invocation[params.length];
+    for (int i = 0; i < params.length; i++)
+      invocations[i] = new Invocation(method, params[i]);
+    Client client = CLIENTS.getClient(conf);
+    try {
+    Writable[] wrappedValues = 
+      client.call(invocations, addrs, method.getDeclaringClass(), ticket);
+    
+    if (method.getReturnType() == Void.TYPE) {
+      return null;
+    }
+
+    Object[] values =
+      (Object[])Array.newInstance(method.getReturnType(), wrappedValues.length);
+    for (int i = 0; i < values.length; i++)
+      if (wrappedValues[i] != null)
+        values[i] = ((ObjectWritable)wrappedValues[i]).get();
+    
+    return values;
+    } finally {
+      CLIENTS.stopClient(client);
+    }
+  }
+
+  /** Construct a server for a protocol implementation instance listening on a
+   * port and address. */
+  public static Server getServer(final Object instance, final String bindAddress, final int port, Configuration conf) 
+    throws IOException {
+    return getServer(instance, bindAddress, port, 1, false, conf);
+  }
+
+  /** Construct a server for a protocol implementation instance listening on a
+   * port and address. */
+  public static Server getServer(final Object instance, final String bindAddress, final int port,
+                                 final int numHandlers,
+                                 final boolean verbose, Configuration conf) 
+    throws IOException {
+    return new Server(instance, conf, bindAddress, port, numHandlers, verbose);
+  }
+
+  /** An RPC Server. */
+  public static class Server extends org.apache.hadoop.ipc.Server {
+    private Object instance;
+    private boolean verbose;
+    private boolean authorize = false;
+
+    /** Construct an RPC server.
+     * @param instance the instance whose methods will be called
+     * @param conf the configuration to use
+     * @param bindAddress the address to bind on to listen for connection
+     * @param port the port to listen for connections on
+     */
+    public Server(Object instance, Configuration conf, String bindAddress, int port) 
+      throws IOException {
+      this(instance, conf,  bindAddress, port, 1, false);
+    }
+    
+    private static String classNameBase(String className) {
+      String[] names = className.split("\\.", -1);
+      if (names == null || names.length == 0) {
+        return className;
+      }
+      return names[names.length-1];
+    }
+    
+    /** Construct an RPC server.
+     * @param instance the instance whose methods will be called
+     * @param conf the configuration to use
+     * @param bindAddress the address to bind on to listen for connection
+     * @param port the port to listen for connections on
+     * @param numHandlers the number of method handler threads to run
+     * @param verbose whether each call should be logged
+     */
+    public Server(Object instance, Configuration conf, String bindAddress,  int port,
+                  int numHandlers, boolean verbose) throws IOException {
+      super(bindAddress, port, Invocation.class, numHandlers, conf, classNameBase(instance.getClass().getName()));
+      this.instance = instance;
+      this.verbose = verbose;
+      this.authorize = 
+        conf.getBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, 
+                        false);
+    }
+
+    public Writable call(Class<?> protocol, Writable param, long receivedTime) 
+    throws IOException {
+      try {
+        Invocation call = (Invocation)param;
+        if (verbose) log("Call: " + call);
+
+        Method method =
+          protocol.getMethod(call.getMethodName(),
+                                   call.getParameterClasses());
+        method.setAccessible(true);
+
+        long startTime = System.currentTimeMillis();
+        Object value = method.invoke(instance, call.getParameters());
+        int processingTime = (int) (System.currentTimeMillis() - startTime);
+        int qTime = (int) (startTime-receivedTime);
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Served: " + call.getMethodName() +
+                    " queueTime= " + qTime +
+                    " procesingTime= " + processingTime);
+        }
+        rpcMetrics.rpcQueueTime.inc(qTime);
+        rpcMetrics.rpcProcessingTime.inc(processingTime);
+
+        MetricsTimeVaryingRate m =
+         (MetricsTimeVaryingRate) rpcMetrics.registry.get(call.getMethodName());
+      	if (m == null) {
+      	  try {
+      	    m = new MetricsTimeVaryingRate(call.getMethodName(),
+      	                                        rpcMetrics.registry);
+      	  } catch (IllegalArgumentException iae) {
+      	    // the metrics has been registered; re-fetch the handle
+      	    LOG.info("Error register " + call.getMethodName(), iae);
+      	    m = (MetricsTimeVaryingRate) rpcMetrics.registry.get(
+      	        call.getMethodName());
+      	  }
+      	}
+        m.inc(processingTime);
+
+        if (verbose) log("Return: "+value);
+
+        return new ObjectWritable(method.getReturnType(), value);
+
+      } catch (InvocationTargetException e) {
+        Throwable target = e.getTargetException();
+        if (target instanceof IOException) {
+          throw (IOException)target;
+        } else {
+          IOException ioe = new IOException(target.toString());
+          ioe.setStackTrace(target.getStackTrace());
+          throw ioe;
+        }
+      } catch (Throwable e) {
+        IOException ioe = new IOException(e.toString());
+        ioe.setStackTrace(e.getStackTrace());
+        throw ioe;
+      }
+    }
+
+    @Override
+    public void authorize(Subject user, ConnectionHeader connection) 
+    throws AuthorizationException {
+      if (authorize) {
+        Class<?> protocol = null;
+        try {
+          protocol = getProtocolClass(connection.getProtocol(), getConf());
+        } catch (ClassNotFoundException cfne) {
+          throw new AuthorizationException("Unknown protocol: " + 
+                                           connection.getProtocol());
+        }
+        ServiceAuthorizationManager.authorize(user, protocol);
+      }
+    }
+  }
+
+  private static void log(String value) {
+    if (value!= null && value.length() > 55)
+      value = value.substring(0, 55)+"...";
+    LOG.info(value);
+  }
+}
diff --git a/src/java/org/apache/hadoop/ipc/RemoteException.java b/src/java/org/apache/hadoop/ipc/RemoteException.java
new file mode 100644
index 00000000000..214b2f66b61
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/RemoteException.java
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ipc;
+
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+
+import org.xml.sax.Attributes;
+import org.znerd.xmlenc.XMLOutputter;
+
+public class RemoteException extends IOException {
+  /** For java.io.Serializable */
+  private static final long serialVersionUID = 1L;
+
+  private String className;
+  
+  public RemoteException(String className, String msg) {
+    super(msg);
+    this.className = className;
+  }
+  
+  public String getClassName() {
+    return className;
+  }
+
+  /**
+   * If this remote exception wraps up one of the lookupTypes
+   * then return this exception.
+   * <p>
+   * Unwraps any IOException.
+   * 
+   * @param lookupTypes the desired exception class.
+   * @return IOException, which is either the lookupClass exception or this.
+   */
+  public IOException unwrapRemoteException(Class<?>... lookupTypes) {
+    if(lookupTypes == null)
+      return this;
+    for(Class<?> lookupClass : lookupTypes) {
+      if(!lookupClass.getName().equals(getClassName()))
+        continue;
+      try {
+        return instantiateException(lookupClass.asSubclass(IOException.class));
+      } catch(Exception e) {
+        // cannot instantiate lookupClass, just return this
+        return this;
+      }
+    }
+    // wrapped up exception is not in lookupTypes, just return this
+    return this;
+  }
+
+  /**
+   * Instantiate and return the exception wrapped up by this remote exception.
+   * 
+   * <p> This unwraps any <code>Throwable</code> that has a constructor taking
+   * a <code>String</code> as a parameter.
+   * Otherwise it returns this.
+   * 
+   * @return <code>Throwable
+   */
+  public IOException unwrapRemoteException() {
+    try {
+      Class<?> realClass = Class.forName(getClassName());
+      return instantiateException(realClass.asSubclass(IOException.class));
+    } catch(Exception e) {
+      // cannot instantiate the original exception, just return this
+    }
+    return this;
+  }
+
+  private IOException instantiateException(Class<? extends IOException> cls)
+      throws Exception {
+    Constructor<? extends IOException> cn = cls.getConstructor(String.class);
+    cn.setAccessible(true);
+    String firstLine = this.getMessage();
+    int eol = firstLine.indexOf('\n');
+    if (eol>=0) {
+      firstLine = firstLine.substring(0, eol);
+    }
+    IOException ex = cn.newInstance(firstLine);
+    ex.initCause(this);
+    return ex;
+  }
+
+  /** Write the object to XML format */
+  public void writeXml(String path, XMLOutputter doc) throws IOException {
+    doc.startTag(RemoteException.class.getSimpleName());
+    doc.attribute("path", path);
+    doc.attribute("class", getClassName());
+    String msg = getLocalizedMessage();
+    int i = msg.indexOf("\n");
+    if (i >= 0) {
+      msg = msg.substring(0, i);
+    }
+    doc.attribute("message", msg.substring(msg.indexOf(":") + 1).trim());
+    doc.endTag();
+  }
+
+  /** Create RemoteException from attributes */
+  public static RemoteException valueOf(Attributes attrs) {
+    return new RemoteException(attrs.getValue("class"),
+        attrs.getValue("message")); 
+  }
+}
diff --git a/src/java/org/apache/hadoop/ipc/Server.java b/src/java/org/apache/hadoop/ipc/Server.java
new file mode 100644
index 00000000000..890569897b4
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/Server.java
@@ -0,0 +1,1255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ipc;
+
+import java.io.IOException;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
+import java.nio.ByteBuffer;
+import java.nio.channels.CancelledKeyException;
+import java.nio.channels.ClosedChannelException;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.SelectionKey;
+import java.nio.channels.Selector;
+import java.nio.channels.ServerSocketChannel;
+import java.nio.channels.SocketChannel;
+import java.nio.channels.WritableByteChannel;
+
+import java.net.BindException;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.ServerSocket;
+import java.net.Socket;
+import java.net.SocketException;
+import java.net.UnknownHostException;
+
+import java.security.PrivilegedActionException;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import javax.security.auth.Subject;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.ipc.metrics.RpcMetrics;
+import org.apache.hadoop.security.authorize.AuthorizationException;
+
+/** An abstract IPC service.  IPC calls take a single {@link Writable} as a
+ * parameter, and return a {@link Writable} as their value.  A service runs on
+ * a port and is defined by a parameter class and a value class.
+ * 
+ * @see Client
+ */
+public abstract class Server {
+  
+  /**
+   * The first four bytes of Hadoop RPC connections
+   */
+  public static final ByteBuffer HEADER = ByteBuffer.wrap("hrpc".getBytes());
+  
+  // 1 : Introduce ping and server does not throw away RPCs
+  // 3 : Introduce the protocol into the RPC connection header
+  public static final byte CURRENT_VERSION = 3;
+  
+  /**
+   * How many calls/handler are allowed in the queue.
+   */
+  private static final int MAX_QUEUE_SIZE_PER_HANDLER = 100;
+  
+  public static final Log LOG = LogFactory.getLog(Server.class);
+
+  private static final ThreadLocal<Server> SERVER = new ThreadLocal<Server>();
+
+  private static final Map<String, Class<?>> PROTOCOL_CACHE = 
+    new ConcurrentHashMap<String, Class<?>>();
+  
+  static Class<?> getProtocolClass(String protocolName, Configuration conf) 
+  throws ClassNotFoundException {
+    Class<?> protocol = PROTOCOL_CACHE.get(protocolName);
+    if (protocol == null) {
+      protocol = conf.getClassByName(protocolName);
+      PROTOCOL_CACHE.put(protocolName, protocol);
+    }
+    return protocol;
+  }
+  
+  /** Returns the server instance called under or null.  May be called under
+   * {@link #call(Writable, long)} implementations, and under {@link Writable}
+   * methods of paramters and return values.  Permits applications to access
+   * the server context.*/
+  public static Server get() {
+    return SERVER.get();
+  }
+ 
+  /** This is set to Call object before Handler invokes an RPC and reset
+   * after the call returns.
+   */
+  private static final ThreadLocal<Call> CurCall = new ThreadLocal<Call>();
+  
+  /** Returns the remote side ip address when invoked inside an RPC 
+   *  Returns null incase of an error.
+   */
+  public static InetAddress getRemoteIp() {
+    Call call = CurCall.get();
+    if (call != null) {
+      return call.connection.socket.getInetAddress();
+    }
+    return null;
+  }
+  /** Returns remote address as a string when invoked inside an RPC.
+   *  Returns null in case of an error.
+   */
+  public static String getRemoteAddress() {
+    InetAddress addr = getRemoteIp();
+    return (addr == null) ? null : addr.getHostAddress();
+  }
+
+  private String bindAddress; 
+  private int port;                               // port we listen on
+  private int handlerCount;                       // number of handler threads
+  private Class<? extends Writable> paramClass;   // class of call parameters
+  private int maxIdleTime;                        // the maximum idle time after 
+                                                  // which a client may be disconnected
+  private int thresholdIdleConnections;           // the number of idle connections
+                                                  // after which we will start
+                                                  // cleaning up idle 
+                                                  // connections
+  int maxConnectionsToNuke;                       // the max number of 
+                                                  // connections to nuke
+                                                  //during a cleanup
+  
+  protected RpcMetrics  rpcMetrics;
+  
+  private Configuration conf;
+
+  private int maxQueueSize;
+  private int socketSendBufferSize;
+  private final boolean tcpNoDelay; // if T then disable Nagle's Algorithm
+
+  volatile private boolean running = true;         // true while server runs
+  private BlockingQueue<Call> callQueue; // queued calls
+
+  private List<Connection> connectionList = 
+    Collections.synchronizedList(new LinkedList<Connection>());
+  //maintain a list
+  //of client connections
+  private Listener listener = null;
+  private Responder responder = null;
+  private int numConnections = 0;
+  private Handler[] handlers = null;
+
+  /**
+   * A convenience method to bind to a given address and report 
+   * better exceptions if the address is not a valid host.
+   * @param socket the socket to bind
+   * @param address the address to bind to
+   * @param backlog the number of connections allowed in the queue
+   * @throws BindException if the address can't be bound
+   * @throws UnknownHostException if the address isn't a valid host name
+   * @throws IOException other random errors from bind
+   */
+  public static void bind(ServerSocket socket, InetSocketAddress address, 
+                          int backlog) throws IOException {
+    try {
+      socket.bind(address, backlog);
+    } catch (BindException e) {
+      BindException bindException = new BindException("Problem binding to " + address
+                                                      + " : " + e.getMessage());
+      bindException.initCause(e);
+      throw bindException;
+    } catch (SocketException e) {
+      // If they try to bind to a different host's address, give a better
+      // error message.
+      if ("Unresolved address".equals(e.getMessage())) {
+        throw new UnknownHostException("Invalid hostname for server: " + 
+                                       address.getHostName());
+      } else {
+        throw e;
+      }
+    }
+  }
+
+  /** A call queued for handling. */
+  private static class Call {
+    private int id;                               // the client's call id
+    private Writable param;                       // the parameter passed
+    private Connection connection;                // connection to client
+    private long timestamp;     // the time received when response is null
+                                   // the time served when response is not null
+    private ByteBuffer response;                      // the response for this call
+
+    public Call(int id, Writable param, Connection connection) { 
+      this.id = id;
+      this.param = param;
+      this.connection = connection;
+      this.timestamp = System.currentTimeMillis();
+      this.response = null;
+    }
+    
+    @Override
+    public String toString() {
+      return param.toString() + " from " + connection.toString();
+    }
+
+    public void setResponse(ByteBuffer response) {
+      this.response = response;
+    }
+  }
+
+  /** Listens on the socket. Creates jobs for the handler threads*/
+  private class Listener extends Thread {
+    
+    private ServerSocketChannel acceptChannel = null; //the accept channel
+    private Selector selector = null; //the selector that we use for the server
+    private InetSocketAddress address; //the address we bind at
+    private Random rand = new Random();
+    private long lastCleanupRunTime = 0; //the last time when a cleanup connec-
+                                         //-tion (for idle connections) ran
+    private long cleanupInterval = 10000; //the minimum interval between 
+                                          //two cleanup runs
+    private int backlogLength = conf.getInt("ipc.server.listen.queue.size", 128);
+    
+    public Listener() throws IOException {
+      address = new InetSocketAddress(bindAddress, port);
+      // Create a new server socket and set to non blocking mode
+      acceptChannel = ServerSocketChannel.open();
+      acceptChannel.configureBlocking(false);
+
+      // Bind the server socket to the local host and port
+      bind(acceptChannel.socket(), address, backlogLength);
+      port = acceptChannel.socket().getLocalPort(); //Could be an ephemeral port
+      // create a selector;
+      selector= Selector.open();
+
+      // Register accepts on the server socket with the selector.
+      acceptChannel.register(selector, SelectionKey.OP_ACCEPT);
+      this.setName("IPC Server listener on " + port);
+      this.setDaemon(true);
+    }
+    /** cleanup connections from connectionList. Choose a random range
+     * to scan and also have a limit on the number of the connections
+     * that will be cleanedup per run. The criteria for cleanup is the time
+     * for which the connection was idle. If 'force' is true then all 
+     * connections will be looked at for the cleanup.
+     */
+    private void cleanupConnections(boolean force) {
+      if (force || numConnections > thresholdIdleConnections) {
+        long currentTime = System.currentTimeMillis();
+        if (!force && (currentTime - lastCleanupRunTime) < cleanupInterval) {
+          return;
+        }
+        int start = 0;
+        int end = numConnections - 1;
+        if (!force) {
+          start = rand.nextInt() % numConnections;
+          end = rand.nextInt() % numConnections;
+          int temp;
+          if (end < start) {
+            temp = start;
+            start = end;
+            end = temp;
+          }
+        }
+        int i = start;
+        int numNuked = 0;
+        while (i <= end) {
+          Connection c;
+          synchronized (connectionList) {
+            try {
+              c = connectionList.get(i);
+            } catch (Exception e) {return;}
+          }
+          if (c.timedOut(currentTime)) {
+            if (LOG.isDebugEnabled())
+              LOG.debug(getName() + ": disconnecting client " + c.getHostAddress());
+            closeConnection(c);
+            numNuked++;
+            end--;
+            c = null;
+            if (!force && numNuked == maxConnectionsToNuke) break;
+          }
+          else i++;
+        }
+        lastCleanupRunTime = System.currentTimeMillis();
+      }
+    }
+
+    @Override
+    public void run() {
+      LOG.info(getName() + ": starting");
+      SERVER.set(Server.this);
+      while (running) {
+        SelectionKey key = null;
+        try {
+          getSelector().select();
+          Iterator<SelectionKey> iter = getSelector().selectedKeys().iterator();
+          while (iter.hasNext()) {
+            key = iter.next();
+            iter.remove();
+            try {
+              if (key.isValid()) {
+                if (key.isAcceptable())
+                  doAccept(key);
+                else if (key.isReadable())
+                  doRead(key);
+              }
+            } catch (IOException e) {
+            }
+            key = null;
+          }
+        } catch (OutOfMemoryError e) {
+          // we can run out of memory if we have too many threads
+          // log the event and sleep for a minute and give 
+          // some thread(s) a chance to finish
+          LOG.warn("Out of Memory in server select", e);
+          closeCurrentConnection(key, e);
+          cleanupConnections(true);
+          try { Thread.sleep(60000); } catch (Exception ie) {}
+        } catch (InterruptedException e) {
+          if (running) {                          // unexpected -- log it
+            LOG.info(getName() + " caught: " +
+                     StringUtils.stringifyException(e));
+          }
+        } catch (Exception e) {
+          closeCurrentConnection(key, e);
+        }
+        cleanupConnections(false);
+      }
+      LOG.info("Stopping " + this.getName());
+
+      synchronized (this) {
+        try {
+          acceptChannel.close();
+          selector.close();
+        } catch (IOException e) { }
+
+        selector= null;
+        acceptChannel= null;
+        
+        // clean up all connections
+        while (!connectionList.isEmpty()) {
+          closeConnection(connectionList.remove(0));
+        }
+      }
+    }
+
+    private void closeCurrentConnection(SelectionKey key, Throwable e) {
+      if (key != null) {
+        Connection c = (Connection)key.attachment();
+        if (c != null) {
+          if (LOG.isDebugEnabled())
+            LOG.debug(getName() + ": disconnecting client " + c.getHostAddress());
+          closeConnection(c);
+          c = null;
+        }
+      }
+    }
+
+    InetSocketAddress getAddress() {
+      return (InetSocketAddress)acceptChannel.socket().getLocalSocketAddress();
+    }
+    
+    void doAccept(SelectionKey key) throws IOException,  OutOfMemoryError {
+      Connection c = null;
+      ServerSocketChannel server = (ServerSocketChannel) key.channel();
+      // accept up to 10 connections
+      for (int i=0; i<10; i++) {
+        SocketChannel channel = server.accept();
+        if (channel==null) return;
+
+        channel.configureBlocking(false);
+        channel.socket().setTcpNoDelay(tcpNoDelay);
+        SelectionKey readKey = channel.register(getSelector(), 
+          SelectionKey.OP_READ);
+        c = new Connection(readKey, channel, System.currentTimeMillis());
+        readKey.attach(c);
+        synchronized (connectionList) {
+          connectionList.add(numConnections, c);
+          numConnections++;
+        }
+        if (LOG.isDebugEnabled())
+          LOG.debug("Server connection from " + c.toString() +
+              "; # active connections: " + numConnections +
+              "; # queued calls: " + callQueue.size());
+      }
+    }
+
+    void doRead(SelectionKey key) throws InterruptedException {
+      int count = 0;
+      Connection c = (Connection)key.attachment();
+      if (c == null) {
+        return;  
+      }
+      c.setLastContact(System.currentTimeMillis());
+      
+      try {
+        count = c.readAndProcess();
+      } catch (InterruptedException ieo) {
+        LOG.info(getName() + ": readAndProcess caught InterruptedException", ieo);
+        throw ieo;
+      } catch (Exception e) {
+        LOG.info(getName() + ": readAndProcess threw exception " + e + ". Count of bytes read: " + count, e);
+        count = -1; //so that the (count < 0) block is executed
+      }
+      if (count < 0) {
+        if (LOG.isDebugEnabled())
+          LOG.debug(getName() + ": disconnecting client " + 
+                    c.getHostAddress() + ". Number of active connections: "+
+                    numConnections);
+        closeConnection(c);
+        c = null;
+      }
+      else {
+        c.setLastContact(System.currentTimeMillis());
+      }
+    }   
+
+    synchronized void doStop() {
+      if (selector != null) {
+        selector.wakeup();
+        Thread.yield();
+      }
+      if (acceptChannel != null) {
+        try {
+          acceptChannel.socket().close();
+        } catch (IOException e) {
+          LOG.info(getName() + ":Exception in closing listener socket. " + e);
+        }
+      }
+    }
+    
+    synchronized Selector getSelector() { return selector; }
+  }
+
+  // Sends responses of RPC back to clients.
+  private class Responder extends Thread {
+    private Selector writeSelector;
+    private int pending;         // connections waiting to register
+    
+    final static int PURGE_INTERVAL = 900000; // 15mins
+
+    Responder() throws IOException {
+      this.setName("IPC Server Responder");
+      this.setDaemon(true);
+      writeSelector = Selector.open(); // create a selector
+      pending = 0;
+    }
+
+    @Override
+    public void run() {
+      LOG.info(getName() + ": starting");
+      SERVER.set(Server.this);
+      long lastPurgeTime = 0;   // last check for old calls.
+
+      while (running) {
+        try {
+          waitPending();     // If a channel is being registered, wait.
+          writeSelector.select(PURGE_INTERVAL);
+          Iterator<SelectionKey> iter = writeSelector.selectedKeys().iterator();
+          while (iter.hasNext()) {
+            SelectionKey key = iter.next();
+            iter.remove();
+            try {
+              if (key.isValid() && key.isWritable()) {
+                  doAsyncWrite(key);
+              }
+            } catch (IOException e) {
+              LOG.info(getName() + ": doAsyncWrite threw exception " + e);
+            }
+          }
+          long now = System.currentTimeMillis();
+          if (now < lastPurgeTime + PURGE_INTERVAL) {
+            continue;
+          }
+          lastPurgeTime = now;
+          //
+          // If there were some calls that have not been sent out for a
+          // long time, discard them.
+          //
+          LOG.debug("Checking for old call responses.");
+          ArrayList<Call> calls;
+          
+          // get the list of channels from list of keys.
+          synchronized (writeSelector.keys()) {
+            calls = new ArrayList<Call>(writeSelector.keys().size());
+            iter = writeSelector.keys().iterator();
+            while (iter.hasNext()) {
+              SelectionKey key = iter.next();
+              Call call = (Call)key.attachment();
+              if (call != null && key.channel() == call.connection.channel) { 
+                calls.add(call);
+              }
+            }
+          }
+          
+          for(Call call : calls) {
+            try {
+              doPurge(call, now);
+            } catch (IOException e) {
+              LOG.warn("Error in purging old calls " + e);
+            }
+          }
+        } catch (OutOfMemoryError e) {
+          //
+          // we can run out of memory if we have too many threads
+          // log the event and sleep for a minute and give
+          // some thread(s) a chance to finish
+          //
+          LOG.warn("Out of Memory in server select", e);
+          try { Thread.sleep(60000); } catch (Exception ie) {}
+        } catch (Exception e) {
+          LOG.warn("Exception in Responder " + 
+                   StringUtils.stringifyException(e));
+        }
+      }
+      LOG.info("Stopping " + this.getName());
+    }
+
+    private void doAsyncWrite(SelectionKey key) throws IOException {
+      Call call = (Call)key.attachment();
+      if (call == null) {
+        return;
+      }
+      if (key.channel() != call.connection.channel) {
+        throw new IOException("doAsyncWrite: bad channel");
+      }
+
+      synchronized(call.connection.responseQueue) {
+        if (processResponse(call.connection.responseQueue, false)) {
+          try {
+            key.interestOps(0);
+          } catch (CancelledKeyException e) {
+            /* The Listener/reader might have closed the socket.
+             * We don't explicitly cancel the key, so not sure if this will
+             * ever fire.
+             * This warning could be removed.
+             */
+            LOG.warn("Exception while changing ops : " + e);
+          }
+        }
+      }
+    }
+
+    //
+    // Remove calls that have been pending in the responseQueue 
+    // for a long time.
+    //
+    private void doPurge(Call call, long now) throws IOException {
+      LinkedList<Call> responseQueue = call.connection.responseQueue;
+      synchronized (responseQueue) {
+        Iterator<Call> iter = responseQueue.listIterator(0);
+        while (iter.hasNext()) {
+          call = iter.next();
+          if (now > call.timestamp + PURGE_INTERVAL) {
+            closeConnection(call.connection);
+            break;
+          }
+        }
+      }
+    }
+
+    // Processes one response. Returns true if there are no more pending
+    // data for this channel.
+    //
+    private boolean processResponse(LinkedList<Call> responseQueue,
+                                    boolean inHandler) throws IOException {
+      boolean error = true;
+      boolean done = false;       // there is more data for this channel.
+      int numElements = 0;
+      Call call = null;
+      try {
+        synchronized (responseQueue) {
+          //
+          // If there are no items for this channel, then we are done
+          //
+          numElements = responseQueue.size();
+          if (numElements == 0) {
+            error = false;
+            return true;              // no more data for this channel.
+          }
+          //
+          // Extract the first call
+          //
+          call = responseQueue.removeFirst();
+          SocketChannel channel = call.connection.channel;
+          if (LOG.isDebugEnabled()) {
+            LOG.debug(getName() + ": responding to #" + call.id + " from " +
+                      call.connection);
+          }
+          //
+          // Send as much data as we can in the non-blocking fashion
+          //
+          int numBytes = channelWrite(channel, call.response);
+          if (numBytes < 0) {
+            return true;
+          }
+          if (!call.response.hasRemaining()) {
+            call.connection.decRpcCount();
+            if (numElements == 1) {    // last call fully processes.
+              done = true;             // no more data for this channel.
+            } else {
+              done = false;            // more calls pending to be sent.
+            }
+            if (LOG.isDebugEnabled()) {
+              LOG.debug(getName() + ": responding to #" + call.id + " from " +
+                        call.connection + " Wrote " + numBytes + " bytes.");
+            }
+          } else {
+            //
+            // If we were unable to write the entire response out, then 
+            // insert in Selector queue. 
+            //
+            call.connection.responseQueue.addFirst(call);
+            
+            if (inHandler) {
+              // set the serve time when the response has to be sent later
+              call.timestamp = System.currentTimeMillis();
+              
+              incPending();
+              try {
+                // Wakeup the thread blocked on select, only then can the call 
+                // to channel.register() complete.
+                writeSelector.wakeup();
+                channel.register(writeSelector, SelectionKey.OP_WRITE, call);
+              } catch (ClosedChannelException e) {
+                //Its ok. channel might be closed else where.
+                done = true;
+              } finally {
+                decPending();
+              }
+            }
+            if (LOG.isDebugEnabled()) {
+              LOG.debug(getName() + ": responding to #" + call.id + " from " +
+                        call.connection + " Wrote partial " + numBytes + 
+                        " bytes.");
+            }
+          }
+          error = false;              // everything went off well
+        }
+      } finally {
+        if (error && call != null) {
+          LOG.warn(getName()+", call " + call + ": output error");
+          done = true;               // error. no more data for this channel.
+          closeConnection(call.connection);
+        }
+      }
+      return done;
+    }
+
+    //
+    // Enqueue a response from the application.
+    //
+    void doRespond(Call call) throws IOException {
+      synchronized (call.connection.responseQueue) {
+        call.connection.responseQueue.addLast(call);
+        if (call.connection.responseQueue.size() == 1) {
+          processResponse(call.connection.responseQueue, true);
+        }
+      }
+    }
+
+    private synchronized void incPending() {   // call waiting to be enqueued.
+      pending++;
+    }
+
+    private synchronized void decPending() { // call done enqueueing.
+      pending--;
+      notify();
+    }
+
+    private synchronized void waitPending() throws InterruptedException {
+      while (pending > 0) {
+        wait();
+      }
+    }
+  }
+
+  /** Reads calls from a connection and queues them for handling. */
+  private class Connection {
+    private boolean versionRead = false; //if initial signature and
+                                         //version are read
+    private boolean headerRead = false;  //if the connection header that
+                                         //follows version is read.
+
+    private SocketChannel channel;
+    private ByteBuffer data;
+    private ByteBuffer dataLengthBuffer;
+    private LinkedList<Call> responseQueue;
+    private volatile int rpcCount = 0; // number of outstanding rpcs
+    private long lastContact;
+    private int dataLength;
+    private Socket socket;
+    // Cache the remote host & port info so that even if the socket is 
+    // disconnected, we can say where it used to connect to.
+    private String hostAddress;
+    private int remotePort;
+    
+    ConnectionHeader header = new ConnectionHeader();
+    Class<?> protocol;
+    
+    Subject user = null;
+
+    // Fake 'call' for failed authorization response
+    private static final int AUTHROIZATION_FAILED_CALLID = -1;
+    private final Call authFailedCall = 
+      new Call(AUTHROIZATION_FAILED_CALLID, null, null);
+    private ByteArrayOutputStream authFailedResponse = new ByteArrayOutputStream();
+    
+    public Connection(SelectionKey key, SocketChannel channel, 
+                      long lastContact) {
+      this.channel = channel;
+      this.lastContact = lastContact;
+      this.data = null;
+      this.dataLengthBuffer = ByteBuffer.allocate(4);
+      this.socket = channel.socket();
+      InetAddress addr = socket.getInetAddress();
+      if (addr == null) {
+        this.hostAddress = "*Unknown*";
+      } else {
+        this.hostAddress = addr.getHostAddress();
+      }
+      this.remotePort = socket.getPort();
+      this.responseQueue = new LinkedList<Call>();
+      if (socketSendBufferSize != 0) {
+        try {
+          socket.setSendBufferSize(socketSendBufferSize);
+        } catch (IOException e) {
+          LOG.warn("Connection: unable to set socket send buffer size to " +
+                   socketSendBufferSize);
+        }
+      }
+    }   
+
+    @Override
+    public String toString() {
+      return getHostAddress() + ":" + remotePort; 
+    }
+    
+    public String getHostAddress() {
+      return hostAddress;
+    }
+
+    public void setLastContact(long lastContact) {
+      this.lastContact = lastContact;
+    }
+
+    public long getLastContact() {
+      return lastContact;
+    }
+
+    /* Return true if the connection has no outstanding rpc */
+    private boolean isIdle() {
+      return rpcCount == 0;
+    }
+    
+    /* Decrement the outstanding RPC count */
+    private void decRpcCount() {
+      rpcCount--;
+    }
+    
+    /* Increment the outstanding RPC count */
+    private void incRpcCount() {
+      rpcCount++;
+    }
+    
+    private boolean timedOut(long currentTime) {
+      if (isIdle() && currentTime -  lastContact > maxIdleTime)
+        return true;
+      return false;
+    }
+
+    public int readAndProcess() throws IOException, InterruptedException {
+      while (true) {
+        /* Read at most one RPC. If the header is not read completely yet
+         * then iterate until we read first RPC or until there is no data left.
+         */    
+        int count = -1;
+        if (dataLengthBuffer.remaining() > 0) {
+          count = channelRead(channel, dataLengthBuffer);       
+          if (count < 0 || dataLengthBuffer.remaining() > 0) 
+            return count;
+        }
+      
+        if (!versionRead) {
+          //Every connection is expected to send the header.
+          ByteBuffer versionBuffer = ByteBuffer.allocate(1);
+          count = channelRead(channel, versionBuffer);
+          if (count <= 0) {
+            return count;
+          }
+          int version = versionBuffer.get(0);
+          
+          dataLengthBuffer.flip();          
+          if (!HEADER.equals(dataLengthBuffer) || version != CURRENT_VERSION) {
+            //Warning is ok since this is not supposed to happen.
+            LOG.warn("Incorrect header or version mismatch from " + 
+                     hostAddress + ":" + remotePort +
+                     " got version " + version + 
+                     " expected version " + CURRENT_VERSION);
+            return -1;
+          }
+          dataLengthBuffer.clear();
+          versionRead = true;
+          continue;
+        }
+        
+        if (data == null) {
+          dataLengthBuffer.flip();
+          dataLength = dataLengthBuffer.getInt();
+       
+          if (dataLength == Client.PING_CALL_ID) {
+            dataLengthBuffer.clear();
+            return 0;  //ping message
+          }
+          data = ByteBuffer.allocate(dataLength);
+          incRpcCount();  // Increment the rpc count
+        }
+        
+        count = channelRead(channel, data);
+        
+        if (data.remaining() == 0) {
+          dataLengthBuffer.clear();
+          data.flip();
+          if (headerRead) {
+            processData();
+            data = null;
+            return count;
+          } else {
+            processHeader();
+            headerRead = true;
+            data = null;
+            
+            // Authorize the connection
+            try {
+              authorize(user, header);
+              
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("Successfully authorized " + header);
+              }
+            } catch (AuthorizationException ae) {
+              authFailedCall.connection = this;
+              setupResponse(authFailedResponse, authFailedCall, 
+                            Status.FATAL, null, 
+                            ae.getClass().getName(), ae.getMessage());
+              responder.doRespond(authFailedCall);
+              
+              // Close this connection
+              return -1;
+            }
+
+            continue;
+          }
+        } 
+        return count;
+      }
+    }
+
+    /// Reads the connection header following version
+    private void processHeader() throws IOException {
+      DataInputStream in =
+        new DataInputStream(new ByteArrayInputStream(data.array()));
+      header.readFields(in);
+      try {
+        String protocolClassName = header.getProtocol();
+        if (protocolClassName != null) {
+          protocol = getProtocolClass(header.getProtocol(), conf);
+        }
+      } catch (ClassNotFoundException cnfe) {
+        throw new IOException("Unknown protocol: " + header.getProtocol());
+      }
+      
+      // TODO: Get the user name from the GSS API for Kerberbos-based security
+      // Create the user subject
+      user = SecurityUtil.getSubject(header.getUgi());
+    }
+    
+    private void processData() throws  IOException, InterruptedException {
+      DataInputStream dis =
+        new DataInputStream(new ByteArrayInputStream(data.array()));
+      int id = dis.readInt();                    // try to read an id
+        
+      if (LOG.isDebugEnabled())
+        LOG.debug(" got #" + id);
+
+      Writable param = ReflectionUtils.newInstance(paramClass, conf);           // read param
+      param.readFields(dis);        
+        
+      Call call = new Call(id, param, this);
+      callQueue.put(call);              // queue the call; maybe blocked here
+    }
+
+    private synchronized void close() throws IOException {
+      data = null;
+      dataLengthBuffer = null;
+      if (!channel.isOpen())
+        return;
+      try {socket.shutdownOutput();} catch(Exception e) {
+        LOG.warn("Ignoring socket shutdown exception");
+      }
+      if (channel.isOpen()) {
+        try {channel.close();} catch(Exception e) {}
+      }
+      try {socket.close();} catch(Exception e) {}
+    }
+  }
+
+  /** Handles queued calls . */
+  private class Handler extends Thread {
+    public Handler(int instanceNumber) {
+      this.setDaemon(true);
+      this.setName("IPC Server handler "+ instanceNumber + " on " + port);
+    }
+
+    @Override
+    public void run() {
+      LOG.info(getName() + ": starting");
+      SERVER.set(Server.this);
+      ByteArrayOutputStream buf = new ByteArrayOutputStream(10240);
+      while (running) {
+        try {
+          final Call call = callQueue.take(); // pop the queue; maybe blocked here
+
+          if (LOG.isDebugEnabled())
+            LOG.debug(getName() + ": has #" + call.id + " from " +
+                      call.connection);
+          
+          String errorClass = null;
+          String error = null;
+          Writable value = null;
+
+          CurCall.set(call);
+          try {
+            // Make the call as the user via Subject.doAs, thus associating
+            // the call with the Subject
+            value = 
+              Subject.doAs(call.connection.user, 
+                           new PrivilegedExceptionAction<Writable>() {
+                              @Override
+                              public Writable run() throws Exception {
+                                // make the call
+                                return call(call.connection.protocol, 
+                                            call.param, call.timestamp);
+
+                              }
+                           }
+                          );
+              
+          } catch (PrivilegedActionException pae) {
+            Exception e = pae.getException();
+            LOG.info(getName()+", call "+call+": error: " + e, e);
+            errorClass = e.getClass().getName();
+            error = StringUtils.stringifyException(e);
+          } catch (Throwable e) {
+            LOG.info(getName()+", call "+call+": error: " + e, e);
+            errorClass = e.getClass().getName();
+            error = StringUtils.stringifyException(e);
+          }
+          CurCall.set(null);
+
+          setupResponse(buf, call, 
+                        (error == null) ? Status.SUCCESS : Status.ERROR, 
+                        value, errorClass, error);
+          responder.doRespond(call);
+        } catch (InterruptedException e) {
+          if (running) {                          // unexpected -- log it
+            LOG.info(getName() + " caught: " +
+                     StringUtils.stringifyException(e));
+          }
+        } catch (Exception e) {
+          LOG.info(getName() + " caught: " +
+                   StringUtils.stringifyException(e));
+        }
+      }
+      LOG.info(getName() + ": exiting");
+    }
+
+  }
+  
+  protected Server(String bindAddress, int port,
+                  Class<? extends Writable> paramClass, int handlerCount, 
+                  Configuration conf)
+    throws IOException 
+  {
+    this(bindAddress, port, paramClass, handlerCount,  conf, Integer.toString(port));
+  }
+  /** Constructs a server listening on the named port and address.  Parameters passed must
+   * be of the named class.  The <code>handlerCount</handlerCount> determines
+   * the number of handler threads that will be used to process calls.
+   * 
+   */
+  protected Server(String bindAddress, int port, 
+                  Class<? extends Writable> paramClass, int handlerCount, 
+                  Configuration conf, String serverName) 
+    throws IOException {
+    this.bindAddress = bindAddress;
+    this.conf = conf;
+    this.port = port;
+    this.paramClass = paramClass;
+    this.handlerCount = handlerCount;
+    this.socketSendBufferSize = 0;
+    this.maxQueueSize = handlerCount * MAX_QUEUE_SIZE_PER_HANDLER;
+    this.callQueue  = new LinkedBlockingQueue<Call>(maxQueueSize); 
+    this.maxIdleTime = 2*conf.getInt("ipc.client.connection.maxidletime", 1000);
+    this.maxConnectionsToNuke = conf.getInt("ipc.client.kill.max", 10);
+    this.thresholdIdleConnections = conf.getInt("ipc.client.idlethreshold", 4000);
+    
+    // Start the listener here and let it bind to the port
+    listener = new Listener();
+    this.port = listener.getAddress().getPort();    
+    this.rpcMetrics = new RpcMetrics(serverName,
+                          Integer.toString(this.port), this);
+    this.tcpNoDelay = conf.getBoolean("ipc.server.tcpnodelay", false);
+
+
+    // Create the responder here
+    responder = new Responder();
+  }
+
+  private void closeConnection(Connection connection) {
+    synchronized (connectionList) {
+      if (connectionList.remove(connection))
+        numConnections--;
+    }
+    try {
+      connection.close();
+    } catch (IOException e) {
+    }
+  }
+  
+  /**
+   * Setup response for the IPC Call.
+   * 
+   * @param response buffer to serialize the response into
+   * @param call {@link Call} to which we are setting up the response
+   * @param status {@link Status} of the IPC call
+   * @param rv return value for the IPC Call, if the call was successful
+   * @param errorClass error class, if the the call failed
+   * @param error error message, if the call failed
+   * @throws IOException
+   */
+  private void setupResponse(ByteArrayOutputStream response, 
+                             Call call, Status status, 
+                             Writable rv, String errorClass, String error) 
+  throws IOException {
+    response.reset();
+    DataOutputStream out = new DataOutputStream(response);
+    out.writeInt(call.id);                // write call id
+    out.writeInt(status.state);           // write status
+
+    if (status == Status.SUCCESS) {
+      rv.write(out);
+    } else {
+      WritableUtils.writeString(out, errorClass);
+      WritableUtils.writeString(out, error);
+    }
+    call.setResponse(ByteBuffer.wrap(response.toByteArray()));
+  }
+  
+  Configuration getConf() {
+    return conf;
+  }
+  
+  /** Sets the socket buffer size used for responding to RPCs */
+  public void setSocketSendBufSize(int size) { this.socketSendBufferSize = size; }
+
+  /** Starts the service.  Must be called before any calls will be handled. */
+  public synchronized void start() throws IOException {
+    responder.start();
+    listener.start();
+    handlers = new Handler[handlerCount];
+    
+    for (int i = 0; i < handlerCount; i++) {
+      handlers[i] = new Handler(i);
+      handlers[i].start();
+    }
+  }
+
+  /** Stops the service.  No new calls will be handled after this is called. */
+  public synchronized void stop() {
+    LOG.info("Stopping server on " + port);
+    running = false;
+    if (handlers != null) {
+      for (int i = 0; i < handlerCount; i++) {
+        if (handlers[i] != null) {
+          handlers[i].interrupt();
+        }
+      }
+    }
+    listener.interrupt();
+    listener.doStop();
+    responder.interrupt();
+    notifyAll();
+    if (this.rpcMetrics != null) {
+      this.rpcMetrics.shutdown();
+    }
+  }
+
+  /** Wait for the server to be stopped.
+   * Does not wait for all subthreads to finish.
+   *  See {@link #stop()}.
+   */
+  public synchronized void join() throws InterruptedException {
+    while (running) {
+      wait();
+    }
+  }
+
+  /**
+   * Return the socket (ip+port) on which the RPC server is listening to.
+   * @return the socket (ip+port) on which the RPC server is listening to.
+   */
+  public synchronized InetSocketAddress getListenerAddress() {
+    return listener.getAddress();
+  }
+  
+  /** 
+   * Called for each call. 
+   * @deprecated Use {@link #call(Class, Writable, long)} instead
+   */
+  @Deprecated
+  public Writable call(Writable param, long receiveTime) throws IOException {
+    return call(null, param, receiveTime);
+  }
+  
+  /** Called for each call. */
+  public abstract Writable call(Class<?> protocol,
+                               Writable param, long receiveTime)
+  throws IOException;
+  
+  /**
+   * Authorize the incoming client connection.
+   * 
+   * @param user client user
+   * @param connection incoming connection
+   * @throws AuthorizationException when the client isn't authorized to talk the protocol
+   */
+  public void authorize(Subject user, ConnectionHeader connection) 
+  throws AuthorizationException {}
+  
+  /**
+   * The number of open RPC conections
+   * @return the number of open rpc connections
+   */
+  public int getNumOpenConnections() {
+    return numConnections;
+  }
+  
+  /**
+   * The number of rpc calls in the queue.
+   * @return The number of rpc calls in the queue.
+   */
+  public int getCallQueueLen() {
+    return callQueue.size();
+  }
+  
+  
+  /**
+   * When the read or write buffer size is larger than this limit, i/o will be 
+   * done in chunks of this size. Most RPC requests and responses would be
+   * be smaller.
+   */
+  private static int NIO_BUFFER_LIMIT = 8*1024; //should not be more than 64KB.
+  
+  /**
+   * This is a wrapper around {@link WritableByteChannel#write(ByteBuffer)}.
+   * If the amount of data is large, it writes to channel in smaller chunks. 
+   * This is to avoid jdk from creating many direct buffers as the size of 
+   * buffer increases. This also minimizes extra copies in NIO layer
+   * as a result of multiple write operations required to write a large 
+   * buffer.  
+   *
+   * @see WritableByteChannel#write(ByteBuffer)
+   */
+  private static int channelWrite(WritableByteChannel channel, 
+                                  ByteBuffer buffer) throws IOException {
+    
+    return (buffer.remaining() <= NIO_BUFFER_LIMIT) ?
+           channel.write(buffer) : channelIO(null, channel, buffer);
+  }
+  
+  
+  /**
+   * This is a wrapper around {@link ReadableByteChannel#read(ByteBuffer)}.
+   * If the amount of data is large, it writes to channel in smaller chunks. 
+   * This is to avoid jdk from creating many direct buffers as the size of 
+   * ByteBuffer increases. There should not be any performance degredation.
+   * 
+   * @see ReadableByteChannel#read(ByteBuffer)
+   */
+  private static int channelRead(ReadableByteChannel channel, 
+                                 ByteBuffer buffer) throws IOException {
+    
+    return (buffer.remaining() <= NIO_BUFFER_LIMIT) ?
+           channel.read(buffer) : channelIO(channel, null, buffer);
+  }
+  
+  /**
+   * Helper for {@link #channelRead(ReadableByteChannel, ByteBuffer)}
+   * and {@link #channelWrite(WritableByteChannel, ByteBuffer)}. Only
+   * one of readCh or writeCh should be non-null.
+   * 
+   * @see #channelRead(ReadableByteChannel, ByteBuffer)
+   * @see #channelWrite(WritableByteChannel, ByteBuffer)
+   */
+  private static int channelIO(ReadableByteChannel readCh, 
+                               WritableByteChannel writeCh,
+                               ByteBuffer buf) throws IOException {
+    
+    int originalLimit = buf.limit();
+    int initialRemaining = buf.remaining();
+    int ret = 0;
+    
+    while (buf.remaining() > 0) {
+      try {
+        int ioSize = Math.min(buf.remaining(), NIO_BUFFER_LIMIT);
+        buf.limit(buf.position() + ioSize);
+        
+        ret = (readCh == null) ? writeCh.write(buf) : readCh.read(buf); 
+        
+        if (ret < ioSize) {
+          break;
+        }
+
+      } finally {
+        buf.limit(originalLimit);        
+      }
+    }
+
+    int nBytes = initialRemaining - buf.remaining(); 
+    return (nBytes > 0) ? nBytes : ret;
+  }      
+}
diff --git a/src/java/org/apache/hadoop/ipc/Status.java b/src/java/org/apache/hadoop/ipc/Status.java
new file mode 100644
index 00000000000..16fd871ffa6
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/Status.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ipc;
+
+/**
+ * Status of a Hadoop IPC call.
+ */
+enum Status {
+  SUCCESS (0),
+  ERROR (1),
+  FATAL (-1);
+  
+  int state;
+  private Status(int state) {
+    this.state = state;
+  }
+}
diff --git a/src/java/org/apache/hadoop/ipc/VersionedProtocol.java b/src/java/org/apache/hadoop/ipc/VersionedProtocol.java
new file mode 100644
index 00000000000..ef5187522f7
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/VersionedProtocol.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ipc;
+
+import java.io.IOException;
+
+/**
+ * Superclass of all protocols that use Hadoop RPC.
+ * Subclasses of this interface are also supposed to have
+ * a static final long versionID field.
+ */
+public interface VersionedProtocol {
+  
+  /**
+   * Return protocol version corresponding to protocol interface.
+   * @param protocol The classname of the protocol interface
+   * @param clientVersion The version of the protocol that the client speaks
+   * @return the version that the server will speak
+   */
+  public long getProtocolVersion(String protocol, 
+                                 long clientVersion) throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/ipc/metrics/RpcActivityMBean.java b/src/java/org/apache/hadoop/ipc/metrics/RpcActivityMBean.java
new file mode 100644
index 00000000000..e2b33b78743
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/metrics/RpcActivityMBean.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ipc.metrics;
+
+import javax.management.ObjectName;
+
+import org.apache.hadoop.metrics.util.MBeanUtil;
+import org.apache.hadoop.metrics.util.MetricsDynamicMBeanBase;
+import org.apache.hadoop.metrics.util.MetricsRegistry;
+
+
+
+/**
+ * 
+ * This is the JMX MBean for reporting the RPC layer Activity.
+ * The MBean is register using the name
+ *        "hadoop:service=<RpcServiceName>,name=RpcActivityForPort<port>"
+ * 
+ * Many of the activity metrics are sampled and averaged on an interval 
+ * which can be specified in the metrics config file.
+ * <p>
+ * For the metrics that are sampled and averaged, one must specify 
+ * a metrics context that does periodic update calls. Most metrics contexts do.
+ * The default Null metrics context however does NOT. So if you aren't
+ * using any other metrics context then you can turn on the viewing and averaging
+ * of sampled metrics by  specifying the following two lines
+ *  in the hadoop-meterics.properties file:
+ *  <pre>
+ *        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
+ *        rpc.period=10
+ *  </pre>
+ *<p>
+ * Note that the metrics are collected regardless of the context used.
+ * The context with the update thread is used to average the data periodically
+ *
+ *
+ *
+ * Impl details: We use a dynamic mbean that gets the list of the metrics
+ * from the metrics registry passed as an argument to the constructor
+ */
+
+public class RpcActivityMBean extends MetricsDynamicMBeanBase {
+  final private ObjectName mbeanName;
+
+  /**
+   * 
+   * @param mr - the metrics registry that has all the metrics
+   * @param serviceName - the service name for the rpc service 
+   * @param port - the rpc port.
+   */
+  public RpcActivityMBean(final MetricsRegistry mr, final String serviceName, final String port) {
+
+    
+    super(mr, "Rpc layer statistics");
+    mbeanName = MBeanUtil.registerMBean(serviceName,
+          "RpcActivityForPort" + port, this);
+  }
+  
+
+  public void shutdown() {
+    if (mbeanName != null)
+      MBeanUtil.unregisterMBean(mbeanName);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java b/src/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java
new file mode 100644
index 00000000000..a1fbccd06d4
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ipc.metrics;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.metrics.MetricsContext;
+import org.apache.hadoop.metrics.MetricsRecord;
+import org.apache.hadoop.metrics.MetricsUtil;
+import org.apache.hadoop.metrics.Updater;
+import org.apache.hadoop.metrics.util.MetricsBase;
+import org.apache.hadoop.metrics.util.MetricsIntValue;
+import org.apache.hadoop.metrics.util.MetricsRegistry;
+import org.apache.hadoop.metrics.util.MetricsTimeVaryingRate;
+
+/**
+ * 
+ * This class is for maintaining  the various RPC statistics
+ * and publishing them through the metrics interfaces.
+ * This also registers the JMX MBean for RPC.
+ * <p>
+ * This class has a number of metrics variables that are publicly accessible;
+ * these variables (objects) have methods to update their values;
+ * for example:
+ *  <p> {@link #rpcQueueTime}.inc(time)
+ *
+ */
+public class RpcMetrics implements Updater {
+  public MetricsRegistry registry = new MetricsRegistry();
+  private MetricsRecord metricsRecord;
+  private Server myServer;
+  private static Log LOG = LogFactory.getLog(RpcMetrics.class);
+  RpcActivityMBean rpcMBean;
+  
+  public RpcMetrics(String hostName, String port, Server server) {
+    myServer = server;
+    MetricsContext context = MetricsUtil.getContext("rpc");
+    metricsRecord = MetricsUtil.createRecord(context, "metrics");
+
+    metricsRecord.setTag("port", port);
+
+    LOG.info("Initializing RPC Metrics with hostName=" 
+        + hostName + ", port=" + port);
+
+    context.registerUpdater(this);
+    
+    // Need to clean up the interface to RpcMgt - don't need both metrics and server params
+    rpcMBean = new RpcActivityMBean(registry, hostName, port);
+  }
+  
+  
+  /**
+   * The metrics variables are public:
+   *  - they can be set directly by calling their set/inc methods
+   *  -they can also be read directly - e.g. JMX does this.
+   */
+
+  public MetricsTimeVaryingRate rpcQueueTime =
+          new MetricsTimeVaryingRate("RpcQueueTime", registry);
+  public MetricsTimeVaryingRate rpcProcessingTime =
+          new MetricsTimeVaryingRate("RpcProcessingTime", registry);
+  public MetricsIntValue numOpenConnections = 
+          new MetricsIntValue("NumOpenConnections", registry);
+  public MetricsIntValue callQueueLen = 
+          new MetricsIntValue("callQueueLen", registry);
+  
+  /**
+   * Push the metrics to the monitoring subsystem on doUpdate() call.
+   */
+  public void doUpdates(MetricsContext context) {
+    
+    synchronized (this) {
+      // ToFix - fix server to use the following two metrics directly so
+      // the metrics do not have be copied here.
+      numOpenConnections.set(myServer.getNumOpenConnections());
+      callQueueLen.set(myServer.getCallQueueLen());
+      for (MetricsBase m : registry.getMetricsList()) {
+        m.pushMetric(metricsRecord);
+      }
+    }
+    metricsRecord.update();
+  }
+
+  public void shutdown() {
+    if (rpcMBean != null) 
+      rpcMBean.shutdown();
+  }
+}
diff --git a/src/java/org/apache/hadoop/ipc/metrics/RpcMgt.java b/src/java/org/apache/hadoop/ipc/metrics/RpcMgt.java
new file mode 100644
index 00000000000..443c1947fe2
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/metrics/RpcMgt.java
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ipc.metrics;
+
+
+import javax.management.ObjectName;
+
+import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.metrics.util.MBeanUtil;
+
+
+/**
+ * This class implements the RpcMgt MBean
+ *
+ */
+class RpcMgt implements RpcMgtMBean {
+  private RpcMetrics myMetrics;
+  private Server myServer;
+  private ObjectName mbeanName;
+  
+  RpcMgt(final String serviceName, final String port,
+                final RpcMetrics metrics, Server server) {
+    myMetrics = metrics;
+    myServer = server;
+    mbeanName = MBeanUtil.registerMBean(serviceName,
+                    "RpcStatisticsForPort" + port, this);
+  }
+
+  public void shutdown() {
+    if (mbeanName != null)
+      MBeanUtil.unregisterMBean(mbeanName);
+  }
+  
+  /**
+   * @inheritDoc
+   */
+  public long getRpcOpsAvgProcessingTime() {
+    return myMetrics.rpcProcessingTime.getPreviousIntervalAverageTime();
+  }
+  
+  /**
+   * @inheritDoc
+   */
+  public long getRpcOpsAvgProcessingTimeMax() {
+    return myMetrics.rpcProcessingTime.getMaxTime();
+  }
+
+  /**
+   * @inheritDoc
+   */
+  public long getRpcOpsAvgProcessingTimeMin() {
+    return myMetrics.rpcProcessingTime.getMinTime();
+  }
+
+  /**
+   * @inheritDoc
+   */
+  public long getRpcOpsAvgQueueTime() {
+    return myMetrics.rpcQueueTime.getPreviousIntervalAverageTime();
+  }
+  
+  /**
+   * @inheritDoc
+   */
+  public long getRpcOpsAvgQueueTimeMax() {
+    return myMetrics.rpcQueueTime.getMaxTime();
+  }
+
+  /**
+   * @inheritDoc
+   */
+  public long getRpcOpsAvgQueueTimeMin() {
+    return myMetrics.rpcQueueTime.getMinTime();
+  }
+
+  /**
+   * @inheritDoc
+   */
+  public int getRpcOpsNumber() {
+    return myMetrics.rpcProcessingTime.getPreviousIntervalNumOps() ;
+  }
+
+  /**
+   * @inheritDoc
+   */
+  public int getNumOpenConnections() {
+    return myServer.getNumOpenConnections();
+  }
+  
+  /**
+   * @inheritDoc
+   */
+  public int getCallQueueLen() {
+    return myServer.getCallQueueLen();
+  }
+
+  /**
+   * @inheritDoc
+   */
+  public void resetAllMinMax() {
+    myMetrics.rpcProcessingTime.resetMinMax();
+    myMetrics.rpcQueueTime.resetMinMax();
+  }
+}
diff --git a/src/java/org/apache/hadoop/ipc/metrics/RpcMgtMBean.java b/src/java/org/apache/hadoop/ipc/metrics/RpcMgtMBean.java
new file mode 100644
index 00000000000..c92bbac574a
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/metrics/RpcMgtMBean.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ipc.metrics;
+
+
+/**
+ * 
+ * This is the JMX management interface for the RPC layer.
+ * Many of the statistics are sampled and averaged on an interval 
+ * which can be specified in the metrics config file.
+ * <p>
+ * For the statistics that are sampled and averaged, one must specify 
+ * a metrics context that does periodic update calls. Most do.
+ * The default Null metrics context however does NOT. So if you aren't
+ * using any other metrics context then you can turn on the viewing and averaging
+ * of sampled metrics by  specifying the following two lines
+ *  in the hadoop-meterics.properties file:
+ *  <pre>
+ *        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
+ *        rpc.period=10
+ *  </pre>
+ *<p>
+ * Note that the metrics are collected regardless of the context used.
+ * The context with the update thread is used to average the data periodically
+ *
+ */
+public interface RpcMgtMBean {
+  
+  /**
+   * Number of RPC Operations in the last interval
+   * @return number of operations
+   */
+  int getRpcOpsNumber();
+  
+  /**
+   * Average time for RPC Operations in last interval
+   * @return time in msec
+   */
+  long getRpcOpsAvgProcessingTime();
+  
+  /**
+   * The Minimum RPC Operation Processing Time since reset was called
+   * @return time in msec
+   */
+  long getRpcOpsAvgProcessingTimeMin();
+  
+  
+  /**
+   * The Maximum RPC Operation Processing Time since reset was called
+   * @return time in msec
+   */
+  long getRpcOpsAvgProcessingTimeMax();
+  
+  
+  /**
+   * The Average RPC Operation Queued Time in the last interval
+   * @return time in msec
+   */
+  long getRpcOpsAvgQueueTime();
+  
+  
+  /**
+   * The Minimum RPC Operation Queued Time since reset was called
+   * @return time in msec
+   */
+  long getRpcOpsAvgQueueTimeMin();
+  
+  /**
+   * The Maximum RPC Operation Queued Time since reset was called
+   * @return time in msec
+   */
+  long getRpcOpsAvgQueueTimeMax();
+  
+  /**
+   * Reset all min max times
+   */
+  void resetAllMinMax();
+  
+  /**
+   * The number of open RPC conections
+   * @return the number of open rpc connections
+   */
+  public int getNumOpenConnections();
+  
+  /**
+   * The number of rpc calls in the queue.
+   * @return The number of rpc calls in the queue.
+   */
+  public int getCallQueueLen();
+}
diff --git a/src/java/org/apache/hadoop/ipc/package.html b/src/java/org/apache/hadoop/ipc/package.html
new file mode 100644
index 00000000000..3efd81a2978
--- /dev/null
+++ b/src/java/org/apache/hadoop/ipc/package.html
@@ -0,0 +1,23 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+Tools to help define network clients and servers.
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/log/LogLevel.java b/src/java/org/apache/hadoop/log/LogLevel.java
new file mode 100644
index 00000000000..99fd3d0e7e7
--- /dev/null
+++ b/src/java/org/apache/hadoop/log/LogLevel.java
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.log;
+
+import java.io.*;
+import java.net.*;
+import java.util.regex.Pattern;
+
+import javax.servlet.*;
+import javax.servlet.http.*;
+
+import org.apache.commons.logging.*;
+import org.apache.commons.logging.impl.*;
+import org.apache.hadoop.util.ServletUtil;
+
+/**
+ * Change log level in runtime.
+ */
+public class LogLevel {
+  public static final String USAGES = "\nUSAGES:\n"
+    + "java " + LogLevel.class.getName()
+    + " -getlevel <host:port> <name>\n"
+    + "java " + LogLevel.class.getName()
+    + " -setlevel <host:port> <name> <level>\n";
+
+  /**
+   * A command line implementation
+   */
+  public static void main(String[] args) {
+    if (args.length == 3 && "-getlevel".equals(args[0])) {
+      process("http://" + args[1] + "/logLevel?log=" + args[2]);
+      return;
+    }
+    else if (args.length == 4 && "-setlevel".equals(args[0])) {
+      process("http://" + args[1] + "/logLevel?log=" + args[2]
+              + "&level=" + args[3]);
+      return;
+    }
+
+    System.err.println(USAGES);
+    System.exit(-1);
+  }
+
+  private static void process(String urlstring) {
+    try {
+      URL url = new URL(urlstring);
+      System.out.println("Connecting to " + url);
+      URLConnection connection = url.openConnection();
+      connection.connect();
+
+      BufferedReader in = new BufferedReader(new InputStreamReader(
+          connection.getInputStream()));
+      for(String line; (line = in.readLine()) != null; )
+        if (line.startsWith(MARKER)) {
+          System.out.println(TAG.matcher(line).replaceAll(""));
+        }
+      in.close();
+    } catch (IOException ioe) {
+      System.err.println("" + ioe);
+    }
+  }
+
+  static final String MARKER = "<!-- OUTPUT -->";
+  static final Pattern TAG = Pattern.compile("<[^>]*>");
+
+  /**
+   * A servlet implementation
+   */
+  public static class Servlet extends HttpServlet {
+    private static final long serialVersionUID = 1L;
+
+    public void doGet(HttpServletRequest request, HttpServletResponse response
+        ) throws ServletException, IOException {
+      PrintWriter out = ServletUtil.initHTML(response, "Log Level");
+      String logName = ServletUtil.getParameter(request, "log");
+      String level = ServletUtil.getParameter(request, "level");
+
+      if (logName != null) {
+        out.println("<br /><hr /><h3>Results</h3>");
+        out.println(MARKER
+            + "Submitted Log Name: <b>" + logName + "</b><br />");
+
+        Log log = LogFactory.getLog(logName);
+        out.println(MARKER
+            + "Log Class: <b>" + log.getClass().getName() +"</b><br />");
+        if (level != null) {
+          out.println(MARKER + "Submitted Level: <b>" + level + "</b><br />");
+        }
+
+        if (log instanceof Log4JLogger) {
+          process(((Log4JLogger)log).getLogger(), level, out);
+        }
+        else if (log instanceof Jdk14Logger) {
+          process(((Jdk14Logger)log).getLogger(), level, out);
+        }
+        else {
+          out.println("Sorry, " + log.getClass() + " not supported.<br />");
+        }
+      }
+
+      out.println(FORMS);
+      out.println(ServletUtil.HTML_TAIL);
+    }
+
+    static final String FORMS = "\n<br /><hr /><h3>Get / Set</h3>"
+        + "\n<form>Log: <input type='text' size='50' name='log' /> "
+        + "<input type='submit' value='Get Log Level' />"
+        + "</form>"
+        + "\n<form>Log: <input type='text' size='50' name='log' /> "
+        + "Level: <input type='text' name='level' /> "
+        + "<input type='submit' value='Set Log Level' />"
+        + "</form>";
+
+    private static void process(org.apache.log4j.Logger log, String level,
+        PrintWriter out) throws IOException {
+      if (level != null) {
+        log.setLevel(org.apache.log4j.Level.toLevel(level));
+        out.println(MARKER + "Setting Level to " + level + " ...<br />");
+      }
+      out.println(MARKER
+          + "Effective level: <b>" + log.getEffectiveLevel() + "</b><br />");
+    }
+
+    private static void process(java.util.logging.Logger log, String level,
+        PrintWriter out) throws IOException {
+      if (level != null) {
+        log.setLevel(java.util.logging.Level.parse(level));
+        out.println(MARKER + "Setting Level to " + level + " ...<br />");
+      }
+
+      java.util.logging.Level lev;
+      for(; (lev = log.getLevel()) == null; log = log.getParent());
+      out.println(MARKER + "Effective level: <b>" + lev + "</b><br />");
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/ContextFactory.java b/src/java/org/apache/hadoop/metrics/ContextFactory.java
new file mode 100644
index 00000000000..67bd9f95006
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/ContextFactory.java
@@ -0,0 +1,204 @@
+/*
+ * ContextFactory.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Properties;
+import org.apache.hadoop.metrics.spi.AbstractMetricsContext;
+import org.apache.hadoop.metrics.spi.NullContext;
+
+/**
+ * Factory class for creating MetricsContext objects.  To obtain an instance
+ * of this class, use the static <code>getFactory()</code> method.
+ */
+public class ContextFactory {
+    
+  private static final String PROPERTIES_FILE = 
+    "/hadoop-metrics.properties";
+  private static final String CONTEXT_CLASS_SUFFIX =
+    ".class";
+  private static final String DEFAULT_CONTEXT_CLASSNAME =
+    "org.apache.hadoop.metrics.spi.NullContext";
+    
+  private static ContextFactory theFactory = null;
+    
+  private Map<String,Object> attributeMap = new HashMap<String,Object>();
+  private Map<String,MetricsContext> contextMap = 
+    new HashMap<String,MetricsContext>();
+    
+  // Used only when contexts, or the ContextFactory itself, cannot be
+  // created.
+  private static Map<String,MetricsContext> nullContextMap = 
+    new HashMap<String,MetricsContext>();
+    
+  /** Creates a new instance of ContextFactory */
+  protected ContextFactory() {
+  }
+    
+  /**
+   * Returns the value of the named attribute, or null if there is no 
+   * attribute of that name.
+   *
+   * @param attributeName the attribute name
+   * @return the attribute value
+   */
+  public Object getAttribute(String attributeName) {
+    return attributeMap.get(attributeName);
+  }
+    
+  /**
+   * Returns the names of all the factory's attributes.
+   * 
+   * @return the attribute names
+   */
+  public String[] getAttributeNames() {
+    String[] result = new String[attributeMap.size()];
+    int i = 0;
+    // for (String attributeName : attributeMap.keySet()) {
+    Iterator it = attributeMap.keySet().iterator();
+    while (it.hasNext()) {
+      result[i++] = (String) it.next();
+    }
+    return result;
+  }
+    
+  /**
+   * Sets the named factory attribute to the specified value, creating it
+   * if it did not already exist.  If the value is null, this is the same as
+   * calling removeAttribute.
+   *
+   * @param attributeName the attribute name
+   * @param value the new attribute value
+   */
+  public void setAttribute(String attributeName, Object value) {
+    attributeMap.put(attributeName, value);
+  }
+
+  /**
+   * Removes the named attribute if it exists.
+   *
+   * @param attributeName the attribute name
+   */
+  public void removeAttribute(String attributeName) {
+    attributeMap.remove(attributeName);
+  }
+    
+  /**
+   * Returns the named MetricsContext instance, constructing it if necessary 
+   * using the factory's current configuration attributes. <p/>
+   * 
+   * When constructing the instance, if the factory property 
+   * <i>contextName</i>.class</code> exists, 
+   * its value is taken to be the name of the class to instantiate.  Otherwise,
+   * the default is to create an instance of 
+   * <code>org.apache.hadoop.metrics.spi.NullContext</code>, which is a 
+   * dummy "no-op" context which will cause all metric data to be discarded.
+   * 
+   * @param contextName the name of the context
+   * @return the named MetricsContext
+   */
+  public synchronized MetricsContext getContext(String refName, String contextName)
+      throws IOException, ClassNotFoundException,
+             InstantiationException, IllegalAccessException {
+    MetricsContext metricsContext = contextMap.get(refName);
+    if (metricsContext == null) {
+      String classNameAttribute = refName + CONTEXT_CLASS_SUFFIX;
+      String className = (String) getAttribute(classNameAttribute);
+      if (className == null) {
+        className = DEFAULT_CONTEXT_CLASSNAME;
+      }
+      Class contextClass = Class.forName(className);
+      metricsContext = (MetricsContext) contextClass.newInstance();
+      metricsContext.init(contextName, this);
+      contextMap.put(contextName, metricsContext);
+    }
+    return metricsContext;
+  }
+
+  public synchronized MetricsContext getContext(String contextName)
+    throws IOException, ClassNotFoundException, InstantiationException,
+           IllegalAccessException {
+    return getContext(contextName, contextName);
+  }
+  
+  /** 
+   * Returns all MetricsContexts built by this factory.
+   */
+  public synchronized Collection<MetricsContext> getAllContexts() {
+    // Make a copy to avoid race conditions with creating new contexts.
+    return new ArrayList<MetricsContext>(contextMap.values());
+  }
+    
+  /**
+   * Returns a "null" context - one which does nothing.
+   */
+  public static synchronized MetricsContext getNullContext(String contextName) {
+    MetricsContext nullContext = nullContextMap.get(contextName);
+    if (nullContext == null) {
+      nullContext = new NullContext();
+      nullContextMap.put(contextName, nullContext);
+    }
+    return nullContext;
+  }
+    
+  /**
+   * Returns the singleton ContextFactory instance, constructing it if 
+   * necessary. <p/>
+   * 
+   * When the instance is constructed, this method checks if the file 
+   * <code>hadoop-metrics.properties</code> exists on the class path.  If it 
+   * exists, it must be in the format defined by java.util.Properties, and all 
+   * the properties in the file are set as attributes on the newly created
+   * ContextFactory instance.
+   *
+   * @return the singleton ContextFactory instance
+   */
+  public static synchronized ContextFactory getFactory() throws IOException {
+    if (theFactory == null) {
+      theFactory = new ContextFactory();
+      theFactory.setAttributes();
+    }
+    return theFactory;
+  }
+    
+  private void setAttributes() throws IOException {
+    InputStream is = getClass().getResourceAsStream(PROPERTIES_FILE);
+    if (is != null) {
+      Properties properties = new Properties();
+      properties.load(is);
+      //for (Object propertyNameObj : properties.keySet()) {
+      Iterator it = properties.keySet().iterator();
+      while (it.hasNext()) {
+        String propertyName = (String) it.next();
+        String propertyValue = properties.getProperty(propertyName);
+        setAttribute(propertyName, propertyValue);
+      }
+      is.close();
+    }
+  }
+    
+}
diff --git a/src/java/org/apache/hadoop/metrics/MetricsContext.java b/src/java/org/apache/hadoop/metrics/MetricsContext.java
new file mode 100644
index 00000000000..588a5720153
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/MetricsContext.java
@@ -0,0 +1,118 @@
+/*
+ * MetricsContext.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+
+import org.apache.hadoop.metrics.spi.OutputRecord;
+
+/**
+ * The main interface to the metrics package. 
+ */
+public interface MetricsContext {
+    
+  /**
+   * Default period in seconds at which data is sent to the metrics system.
+   */
+  public static final int DEFAULT_PERIOD = 5;
+
+  /**
+   * Initialize this context.
+   * @param contextName The given name for this context
+   * @param factory The creator of this context
+   */
+  public void init(String contextName, ContextFactory factory);
+
+  /**
+   * Returns the context name.
+   *
+   * @return the context name
+   */
+  public abstract String getContextName();
+    
+  /**
+   * Starts or restarts monitoring, the emitting of metrics records as they are 
+   * updated. 
+   */
+  public abstract void startMonitoring()
+    throws IOException;
+
+  /**
+   * Stops monitoring.  This does not free any data that the implementation
+   * may have buffered for sending at the next timer event. It
+   * is OK to call <code>startMonitoring()</code> again after calling 
+   * this.
+   * @see #close()
+   */
+  public abstract void stopMonitoring();
+    
+  /**
+   * Returns true if monitoring is currently in progress.
+   */
+  public abstract boolean isMonitoring();
+    
+  /**
+   * Stops monitoring and also frees any buffered data, returning this 
+   * object to its initial state.  
+   */
+  public abstract void close();
+    
+  /**
+   * Creates a new MetricsRecord instance with the given <code>recordName</code>.
+   * Throws an exception if the metrics implementation is configured with a fixed
+   * set of record names and <code>recordName</code> is not in that set.
+   *
+   * @param recordName the name of the record
+   * @throws MetricsException if recordName conflicts with configuration data
+   */
+  public abstract MetricsRecord createRecord(String recordName);
+    
+  /**
+   * Registers a callback to be called at regular time intervals, as 
+   * determined by the implementation-class specific configuration.
+   *
+   * @param updater object to be run periodically; it should updated
+   * some metrics records and then return
+   */
+  public abstract void registerUpdater(Updater updater);
+
+  /**
+   * Removes a callback, if it exists.
+   * 
+   * @param updater object to be removed from the callback list
+   */
+  public abstract void unregisterUpdater(Updater updater);
+  
+  /**
+   * Returns the timer period.
+   */
+  public abstract int getPeriod();
+  
+  /**
+   * Retrieves all the records managed by this MetricsContext.
+   * Useful for monitoring systems that are polling-based.
+   * 
+   * @return A non-null map from all record names to the records managed.
+   */
+   Map<String, Collection<OutputRecord>> getAllRecords();
+}
diff --git a/src/java/org/apache/hadoop/metrics/MetricsException.java b/src/java/org/apache/hadoop/metrics/MetricsException.java
new file mode 100644
index 00000000000..8e4f7a0497d
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/MetricsException.java
@@ -0,0 +1,42 @@
+/*
+ * MetricsException.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics;
+
+/**
+ * General-purpose, unchecked metrics exception.
+ */
+public class MetricsException extends RuntimeException {
+    
+  private static final long serialVersionUID = -1643257498540498497L;
+
+  /** Creates a new instance of MetricsException */
+  public MetricsException() {
+  }
+    
+  /** Creates a new instance of MetricsException 
+   *
+   * @param message an error message
+   */
+  public MetricsException(String message) {
+    super(message);
+  }
+    
+}
diff --git a/src/java/org/apache/hadoop/metrics/MetricsRecord.java b/src/java/org/apache/hadoop/metrics/MetricsRecord.java
new file mode 100644
index 00000000000..cec80f225a2
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/MetricsRecord.java
@@ -0,0 +1,246 @@
+/*
+ * MetricsRecord.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics;
+
+/**
+ * A named and optionally tagged set of records to be sent to the metrics
+ * system. <p/>
+ *
+ * A record name identifies the kind of data to be reported. For example, a
+ * program reporting statistics relating to the disks on a computer might use
+ * a record name "diskStats".<p/>
+ *
+ * A record has zero or more <i>tags</i>. A tag has a name and a value. To
+ * continue the example, the "diskStats" record might use a tag named
+ * "diskName" to identify a particular disk.  Sometimes it is useful to have
+ * more than one tag, so there might also be a "diskType" with value "ide" or
+ * "scsi" or whatever.<p/>
+ *
+ * A record also has zero or more <i>metrics</i>.  These are the named
+ * values that are to be reported to the metrics system.  In the "diskStats"
+ * example, possible metric names would be "diskPercentFull", "diskPercentBusy", 
+ * "kbReadPerSecond", etc.<p/>
+ * 
+ * The general procedure for using a MetricsRecord is to fill in its tag and
+ * metric values, and then call <code>update()</code> to pass the record to the
+ * client library.
+ * Metric data is not immediately sent to the metrics system
+ * each time that <code>update()</code> is called. 
+ * An internal table is maintained, identified by the record name. This
+ * table has columns 
+ * corresponding to the tag and the metric names, and rows 
+ * corresponding to each unique set of tag values. An update
+ * either modifies an existing row in the table, or adds a new row with a set of
+ * tag values that are different from all the other rows.  Note that if there
+ * are no tags, then there can be at most one row in the table. <p/>
+ * 
+ * Once a row is added to the table, its data will be sent to the metrics system 
+ * on every timer period, whether or not it has been updated since the previous
+ * timer period.  If this is inappropriate, for example if metrics were being
+ * reported by some transient object in an application, the <code>remove()</code>
+ * method can be used to remove the row and thus stop the data from being
+ * sent.<p/>
+ *
+ * Note that the <code>update()</code> method is atomic.  This means that it is
+ * safe for different threads to be updating the same metric.  More precisely,
+ * it is OK for different threads to call <code>update()</code> on MetricsRecord instances 
+ * with the same set of tag names and tag values.  Different threads should 
+ * <b>not</b> use the same MetricsRecord instance at the same time.
+ */
+public interface MetricsRecord {
+    
+  /**
+   * Returns the record name. 
+   *
+   * @return the record name
+   */
+  public abstract String getRecordName();
+    
+  /**
+   * Sets the named tag to the specified value.  The tagValue may be null, 
+   * which is treated the same as an empty String.
+   *
+   * @param tagName name of the tag
+   * @param tagValue new value of the tag
+   * @throws MetricsException if the tagName conflicts with the configuration
+   */
+  public abstract void setTag(String tagName, String tagValue);
+    
+  /**
+   * Sets the named tag to the specified value.
+   *
+   * @param tagName name of the tag
+   * @param tagValue new value of the tag
+   * @throws MetricsException if the tagName conflicts with the configuration
+   */
+  public abstract void setTag(String tagName, int tagValue);
+    
+  /**
+   * Sets the named tag to the specified value.
+   *
+   * @param tagName name of the tag
+   * @param tagValue new value of the tag
+   * @throws MetricsException if the tagName conflicts with the configuration
+   */
+  public abstract void setTag(String tagName, long tagValue);
+    
+  /**
+   * Sets the named tag to the specified value.
+   *
+   * @param tagName name of the tag
+   * @param tagValue new value of the tag
+   * @throws MetricsException if the tagName conflicts with the configuration
+   */
+  public abstract void setTag(String tagName, short tagValue);
+    
+  /**
+   * Sets the named tag to the specified value.
+   *
+   * @param tagName name of the tag
+   * @param tagValue new value of the tag
+   * @throws MetricsException if the tagName conflicts with the configuration
+   */
+  public abstract void setTag(String tagName, byte tagValue);
+    
+  /**
+   * Removes any tag of the specified name.
+   *
+   * @param tagName name of a tag
+   */
+  public abstract void removeTag(String tagName);
+  
+  /**
+   * Sets the named metric to the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue new value of the metric
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public abstract void setMetric(String metricName, int metricValue);
+    
+  /**
+   * Sets the named metric to the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue new value of the metric
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public abstract void setMetric(String metricName, long metricValue);
+    
+  /**
+   * Sets the named metric to the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue new value of the metric
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public abstract void setMetric(String metricName, short metricValue);
+    
+  /**
+   * Sets the named metric to the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue new value of the metric
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public abstract void setMetric(String metricName, byte metricValue);
+    
+  /**
+   * Sets the named metric to the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue new value of the metric
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public abstract void setMetric(String metricName, float metricValue);
+    
+  /**
+   * Increments the named metric by the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue incremental value
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public abstract void incrMetric(String metricName, int metricValue);
+    
+  /**
+   * Increments the named metric by the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue incremental value
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public abstract void incrMetric(String metricName, long metricValue);
+    
+  /**
+   * Increments the named metric by the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue incremental value
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public abstract void incrMetric(String metricName, short metricValue);
+    
+  /**
+   * Increments the named metric by the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue incremental value
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public abstract void incrMetric(String metricName, byte metricValue);
+    
+  /**
+   * Increments the named metric by the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue incremental value
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public abstract void incrMetric(String metricName, float metricValue);
+    
+  /**
+   * Updates the table of buffered data which is to be sent periodically.
+   * If the tag values match an existing row, that row is updated; 
+   * otherwise, a new row is added.
+   */
+  public abstract void update();
+    
+  /**
+   * Removes, from the buffered data table, all rows having tags 
+   * that equal the tags that have been set on this record. For example,
+   * if there are no tags on this record, all rows for this record name
+   * would be removed.  Or, if there is a single tag on this record, then
+   * just rows containing a tag with the same name and value would be removed.
+   */
+  public abstract void remove();
+    
+}
diff --git a/src/java/org/apache/hadoop/metrics/MetricsServlet.java b/src/java/org/apache/hadoop/metrics/MetricsServlet.java
new file mode 100644
index 00000000000..44c0bd39654
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/MetricsServlet.java
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.hadoop.metrics.spi.OutputRecord;
+import org.apache.hadoop.metrics.spi.AbstractMetricsContext.MetricMap;
+import org.apache.hadoop.metrics.spi.AbstractMetricsContext.TagMap;
+import org.mortbay.util.ajax.JSON;
+import org.mortbay.util.ajax.JSON.Output;
+
+/**
+ * A servlet to print out metrics data.  By default, the servlet returns a 
+ * textual representation (no promises are made for parseability), and
+ * users can use "?format=json" for parseable output.
+ */
+public class MetricsServlet extends HttpServlet {
+  
+  /**
+   * A helper class to hold a TagMap and MetricMap.
+   */
+  static class TagsMetricsPair implements JSON.Convertible {
+    final TagMap tagMap;
+    final MetricMap metricMap;
+    
+    public TagsMetricsPair(TagMap tagMap, MetricMap metricMap) {
+      this.tagMap = tagMap;
+      this.metricMap = metricMap;
+    }
+
+    @SuppressWarnings("unchecked")
+    public void fromJSON(Map map) {
+      throw new UnsupportedOperationException();
+    }
+
+    /** Converts to JSON by providing an array. */
+    public void toJSON(Output out) {
+      out.add(new Object[] { tagMap, metricMap });
+    }
+  }
+  
+  /**
+   * Collects all metric data, and returns a map:
+   *   contextName -> recordName -> [ (tag->tagValue), (metric->metricValue) ].
+   * The values are either String or Number.  The final value is implemented
+   * as a list of TagsMetricsPair.
+   */
+   Map<String, Map<String, List<TagsMetricsPair>>> makeMap(
+       Collection<MetricsContext> contexts) throws IOException {
+    Map<String, Map<String, List<TagsMetricsPair>>> map = 
+      new TreeMap<String, Map<String, List<TagsMetricsPair>>>();
+
+    for (MetricsContext context : contexts) {
+      Map<String, List<TagsMetricsPair>> records = 
+        new TreeMap<String, List<TagsMetricsPair>>();
+      map.put(context.getContextName(), records);
+    
+      for (Map.Entry<String, Collection<OutputRecord>> r : 
+          context.getAllRecords().entrySet()) {
+        List<TagsMetricsPair> metricsAndTags = 
+          new ArrayList<TagsMetricsPair>();
+        records.put(r.getKey(), metricsAndTags);
+        for (OutputRecord outputRecord : r.getValue()) {
+          TagMap tagMap = outputRecord.getTagsCopy();
+          MetricMap metricMap = outputRecord.getMetricsCopy();
+          metricsAndTags.add(new TagsMetricsPair(tagMap, metricMap));
+        }
+      }
+    }
+    return map;
+  }
+  
+  @Override
+  public void doGet(HttpServletRequest request, HttpServletResponse response)
+      throws ServletException, IOException {
+    PrintWriter out = new PrintWriter(response.getOutputStream());
+    String format = request.getParameter("format");
+    Collection<MetricsContext> allContexts = 
+      ContextFactory.getFactory().getAllContexts();
+    if ("json".equals(format)) {
+      // Uses Jetty's built-in JSON support to convert the map into JSON.
+      out.print(new JSON().toJSON(makeMap(allContexts)));
+    } else {
+      printMap(out, makeMap(allContexts));
+    }
+    out.close();
+  }
+  
+  /**
+   * Prints metrics data in a multi-line text form.
+   */
+  void printMap(PrintWriter out, Map<String, Map<String, List<TagsMetricsPair>>> map) {
+    for (Map.Entry<String, Map<String, List<TagsMetricsPair>>> context : map.entrySet()) {
+      out.println(context.getKey());
+      for (Map.Entry<String, List<TagsMetricsPair>> record : context.getValue().entrySet()) {
+        indent(out, 1);
+        out.println(record.getKey());
+        for (TagsMetricsPair pair : record.getValue()) {
+          indent(out, 2);
+          // Prints tag values in the form "{key=value,key=value}:"
+          out.print("{");
+          boolean first = true;
+          for (Map.Entry<String, Object> tagValue : pair.tagMap.entrySet()) {
+            if (first) {
+              first = false;
+            } else {
+              out.print(",");
+            }
+            out.print(tagValue.getKey());
+            out.print("=");
+            out.print(tagValue.getValue().toString());
+          }
+          out.println("}:");
+          
+          // Now print metric values, one per line
+          for (Map.Entry<String, Number> metricValue : 
+              pair.metricMap.entrySet()) {
+            indent(out, 3);
+            out.print(metricValue.getKey());
+            out.print("=");
+            out.println(metricValue.getValue().toString());
+          }
+        }
+      }
+    }    
+  }
+  
+  private void indent(PrintWriter out, int indent) {
+    for (int i = 0; i < indent; ++i) {
+      out.append("  ");
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/MetricsUtil.java b/src/java/org/apache/hadoop/metrics/MetricsUtil.java
new file mode 100644
index 00000000000..09b9de62ea5
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/MetricsUtil.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Utility class to simplify creation and reporting of hadoop metrics.
+ *
+ * For examples of usage, see NameNodeMetrics.
+ * @see org.apache.hadoop.metrics.MetricsRecord
+ * @see org.apache.hadoop.metrics.MetricsContext
+ * @see org.apache.hadoop.metrics.ContextFactory
+ */
+public class MetricsUtil {
+    
+  public static final Log LOG =
+    LogFactory.getLog(MetricsUtil.class);
+
+  /**
+   * Don't allow creation of a new instance of Metrics
+   */
+  private MetricsUtil() {}
+    
+  public static MetricsContext getContext(String contextName) {
+    return getContext(contextName, contextName);
+  }
+
+  /**
+   * Utility method to return the named context.
+   * If the desired context cannot be created for any reason, the exception
+   * is logged, and a null context is returned.
+   */
+  public static MetricsContext getContext(String refName, String contextName) {
+    MetricsContext metricsContext;
+    try {
+      metricsContext =
+        ContextFactory.getFactory().getContext(refName, contextName);
+      if (!metricsContext.isMonitoring()) {
+        metricsContext.startMonitoring();
+      }
+    } catch (Exception ex) {
+      LOG.error("Unable to create metrics context " + contextName, ex);
+      metricsContext = ContextFactory.getNullContext(contextName);
+    }
+    return metricsContext;
+  }
+
+  /**
+   * Utility method to create and return new metrics record instance within the
+   * given context. This record is tagged with the host name.
+   *
+   * @param context the context
+   * @param recordName name of the record
+   * @return newly created metrics record
+   */
+  public static MetricsRecord createRecord(MetricsContext context, 
+                                           String recordName) 
+  {
+    MetricsRecord metricsRecord = context.createRecord(recordName);
+    metricsRecord.setTag("hostName", getHostName());
+    return metricsRecord;        
+  }
+    
+  /**
+   * Returns the host name.  If the host name is unobtainable, logs the
+   * exception and returns "unknown".
+   */
+  private static String getHostName() {
+    String hostName = null;
+    try {
+      hostName = InetAddress.getLocalHost().getHostName();
+    } 
+    catch (UnknownHostException ex) {
+      LOG.info("Unable to obtain hostName", ex);
+      hostName = "unknown";
+    }
+    return hostName;
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/metrics/Updater.java b/src/java/org/apache/hadoop/metrics/Updater.java
new file mode 100644
index 00000000000..e418ec09c29
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/Updater.java
@@ -0,0 +1,33 @@
+/*
+ * Updater.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics;
+
+/**
+ * Call-back interface.  See <code>MetricsContext.registerUpdater()</code>.
+ */
+public interface Updater {
+    
+  /**
+   * Timer-based call-back from the metric library. 
+   */
+  public abstract void doUpdates(MetricsContext context);
+
+}
diff --git a/src/java/org/apache/hadoop/metrics/file/FileContext.java b/src/java/org/apache/hadoop/metrics/file/FileContext.java
new file mode 100644
index 00000000000..16193276974
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/file/FileContext.java
@@ -0,0 +1,139 @@
+/*
+ * FileContext.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics.file;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.apache.hadoop.metrics.ContextFactory;
+import org.apache.hadoop.metrics.spi.AbstractMetricsContext;
+import org.apache.hadoop.metrics.spi.OutputRecord;
+
+/**
+ * Metrics context for writing metrics to a file.<p/>
+ *
+ * This class is configured by setting ContextFactory attributes which in turn
+ * are usually configured through a properties file.  All the attributes are
+ * prefixed by the contextName. For example, the properties file might contain:
+ * <pre>
+ * myContextName.fileName=/tmp/metrics.log
+ * myContextName.period=5
+ * </pre>
+ */
+public class FileContext extends AbstractMetricsContext {
+    
+  /* Configuration attribute names */
+  protected static final String FILE_NAME_PROPERTY = "fileName";
+  protected static final String PERIOD_PROPERTY = "period";
+    
+  private File file = null;              // file for metrics to be written to
+  private PrintWriter writer = null;
+    
+  /** Creates a new instance of FileContext */
+  public FileContext() {}
+    
+  public void init(String contextName, ContextFactory factory) {
+    super.init(contextName, factory);
+        
+    String fileName = getAttribute(FILE_NAME_PROPERTY);
+    if (fileName != null) {
+      file = new File(fileName);
+    }
+        
+    parseAndSetPeriod(PERIOD_PROPERTY);
+  }
+
+  /**
+   * Returns the configured file name, or null.
+   */
+  public String getFileName() {
+    if (file == null) {
+      return null;
+    } else {
+      return file.getName();
+    }
+  }
+    
+  /**
+   * Starts or restarts monitoring, by opening in append-mode, the
+   * file specified by the <code>fileName</code> attribute,
+   * if specified. Otherwise the data will be written to standard
+   * output.
+   */
+  public void startMonitoring()
+    throws IOException 
+  {
+    if (file == null) {
+      writer = new PrintWriter(new BufferedOutputStream(System.out));
+    } else {
+      writer = new PrintWriter(new FileWriter(file, true));
+    }
+    super.startMonitoring();
+  }
+    
+  /**
+   * Stops monitoring, closing the file.
+   * @see #close()
+   */
+  public void stopMonitoring() {
+    super.stopMonitoring();
+        
+    if (writer != null) {
+      writer.close();
+      writer = null;
+    }
+  }
+    
+  /**
+   * Emits a metrics record to a file.
+   */
+  public void emitRecord(String contextName, String recordName, OutputRecord outRec) {
+    writer.print(contextName);
+    writer.print(".");
+    writer.print(recordName);
+    String separator = ": ";
+    for (String tagName : outRec.getTagNames()) {
+      writer.print(separator);
+      separator = ", ";
+      writer.print(tagName);
+      writer.print("=");
+      writer.print(outRec.getTag(tagName));
+    }
+    for (String metricName : outRec.getMetricNames()) {
+      writer.print(separator);
+      separator = ", ";
+      writer.print(metricName);
+      writer.print("=");
+      writer.print(outRec.getMetric(metricName));
+    }
+    writer.println();
+  }
+    
+  /**
+   * Flushes the output writer, forcing updates to disk.
+   */
+  public void flush() {
+    writer.flush();
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/file/package.html b/src/java/org/apache/hadoop/metrics/file/package.html
new file mode 100644
index 00000000000..73584864e27
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/file/package.html
@@ -0,0 +1,43 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+Implementation of the metrics package that writes the metrics to a file.
+Programmers should not normally need to use this package directly. Instead
+they should use org.hadoop.metrics.
+
+<p/>
+These are the implementation specific factory attributes 
+(See ContextFactory.getFactory()):
+
+<dl>
+    <dt><i>contextName</i>.fileName</dt>
+    <dd>The path of the file to which metrics in context <i>contextName</i>
+    are to be appended.  If this attribute is not specified, the metrics
+    are written to standard output by default.</dd>
+    
+    <dt><i>contextName</i>.period</dt>
+    <dd>The period in seconds on which the metric data is written to the
+    file.</dd>
+    
+</dl>
+
+
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java b/src/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java
new file mode 100644
index 00000000000..1affb02f727
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java
@@ -0,0 +1,231 @@
+/*
+ * GangliaContext.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics.ganglia;
+
+import java.io.IOException;
+import java.net.DatagramPacket;
+import java.net.DatagramSocket;
+import java.net.SocketAddress;
+import java.net.SocketException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.metrics.ContextFactory;
+import org.apache.hadoop.metrics.MetricsException;
+import org.apache.hadoop.metrics.spi.AbstractMetricsContext;
+import org.apache.hadoop.metrics.spi.OutputRecord;
+import org.apache.hadoop.metrics.spi.Util;
+
+/**
+ * Context for sending metrics to Ganglia.
+ * 
+ */
+public class GangliaContext extends AbstractMetricsContext {
+    
+  private static final String PERIOD_PROPERTY = "period";
+  private static final String SERVERS_PROPERTY = "servers";
+  private static final String UNITS_PROPERTY = "units";
+  private static final String SLOPE_PROPERTY = "slope";
+  private static final String TMAX_PROPERTY = "tmax";
+  private static final String DMAX_PROPERTY = "dmax";
+    
+  private static final String DEFAULT_UNITS = "";
+  private static final String DEFAULT_SLOPE = "both";
+  private static final int DEFAULT_TMAX = 60;
+  private static final int DEFAULT_DMAX = 0;
+  private static final int DEFAULT_PORT = 8649;
+  private static final int BUFFER_SIZE = 1500;       // as per libgmond.c
+
+  private final Log LOG = LogFactory.getLog(this.getClass());    
+
+  private static final Map<Class,String> typeTable = new HashMap<Class,String>(5);
+    
+  static {
+    typeTable.put(String.class, "string");
+    typeTable.put(Byte.class, "int8");
+    typeTable.put(Short.class, "int16");
+    typeTable.put(Integer.class, "int32");
+    typeTable.put(Long.class, "float");
+    typeTable.put(Float.class, "float");
+  }
+    
+  private byte[] buffer = new byte[BUFFER_SIZE];
+  private int offset;
+    
+  private List<? extends SocketAddress> metricsServers;
+  private Map<String,String> unitsTable;
+  private Map<String,String> slopeTable;
+  private Map<String,String> tmaxTable;
+  private Map<String,String> dmaxTable;
+    
+  private DatagramSocket datagramSocket;
+    
+  /** Creates a new instance of GangliaContext */
+  public GangliaContext() {
+  }
+    
+  public void init(String contextName, ContextFactory factory) {
+    super.init(contextName, factory);
+    parseAndSetPeriod(PERIOD_PROPERTY);
+        
+    metricsServers = 
+      Util.parse(getAttribute(SERVERS_PROPERTY), DEFAULT_PORT); 
+        
+    unitsTable = getAttributeTable(UNITS_PROPERTY);
+    slopeTable = getAttributeTable(SLOPE_PROPERTY);
+    tmaxTable  = getAttributeTable(TMAX_PROPERTY);
+    dmaxTable  = getAttributeTable(DMAX_PROPERTY);
+        
+    try {
+      datagramSocket = new DatagramSocket();
+    }
+    catch (SocketException se) {
+      se.printStackTrace();
+    }
+  }
+
+  public void emitRecord(String contextName, String recordName,
+    OutputRecord outRec) 
+  throws IOException {
+    // Setup so that the records have the proper leader names so they are
+    // unambiguous at the ganglia level, and this prevents a lot of rework
+    StringBuilder sb = new StringBuilder();
+    sb.append(contextName);
+    sb.append('.');
+    sb.append(recordName);
+    sb.append('.');
+    int sbBaseLen = sb.length();
+
+    // emit each metric in turn
+    for (String metricName : outRec.getMetricNames()) {
+      Object metric = outRec.getMetric(metricName);
+      String type = typeTable.get(metric.getClass());
+      if (type != null) {
+        sb.append(metricName);
+        emitMetric(sb.toString(), type, metric.toString());
+        sb.setLength(sbBaseLen);
+      } else {
+        LOG.warn("Unknown metrics type: " + metric.getClass());
+      }
+    }
+  }
+    
+  private void emitMetric(String name, String type,  String value) 
+  throws IOException {
+    String units = getUnits(name);
+    int slope = getSlope(name);
+    int tmax = getTmax(name);
+    int dmax = getDmax(name);
+        
+    offset = 0;
+    xdr_int(0);             // metric_user_defined
+    xdr_string(type);
+    xdr_string(name);
+    xdr_string(value);
+    xdr_string(units);
+    xdr_int(slope);
+    xdr_int(tmax);
+    xdr_int(dmax);
+        
+    for (SocketAddress socketAddress : metricsServers) {
+      DatagramPacket packet = 
+        new DatagramPacket(buffer, offset, socketAddress);
+      datagramSocket.send(packet);
+    }
+  }
+    
+  private String getUnits(String metricName) {
+    String result = unitsTable.get(metricName);
+    if (result == null) {
+      result = DEFAULT_UNITS;
+    }
+    return result;
+  }
+    
+  private int getSlope(String metricName) {
+    String slopeString = slopeTable.get(metricName);
+    if (slopeString == null) {
+      slopeString = DEFAULT_SLOPE; 
+    }
+    return ("zero".equals(slopeString) ? 0 : 3); // see gmetric.c
+  }
+    
+  private int getTmax(String metricName) {
+    if (tmaxTable == null) {
+      return DEFAULT_TMAX;
+    }
+    String tmaxString = tmaxTable.get(metricName);
+    if (tmaxString == null) {
+      return DEFAULT_TMAX;
+    }
+    else {
+      return Integer.parseInt(tmaxString);
+    }
+  }
+    
+  private int getDmax(String metricName) {
+    String dmaxString = dmaxTable.get(metricName);
+    if (dmaxString == null) {
+      return DEFAULT_DMAX;
+    }
+    else {
+      return Integer.parseInt(dmaxString);
+    }
+  }
+    
+  /**
+   * Puts a string into the buffer by first writing the size of the string
+   * as an int, followed by the bytes of the string, padded if necessary to
+   * a multiple of 4.
+   */
+  private void xdr_string(String s) {
+    byte[] bytes = s.getBytes();
+    int len = bytes.length;
+    xdr_int(len);
+    System.arraycopy(bytes, 0, buffer, offset, len);
+    offset += len;
+    pad();
+  }
+
+  /**
+   * Pads the buffer with zero bytes up to the nearest multiple of 4.
+   */
+  private void pad() {
+    int newOffset = ((offset + 3) / 4) * 4;
+    while (offset < newOffset) {
+      buffer[offset++] = 0;
+    }
+  }
+        
+  /**
+   * Puts an integer into the buffer as 4 bytes, big-endian.
+   */
+  private void xdr_int(int i) {
+    buffer[offset++] = (byte)((i >> 24) & 0xff);
+    buffer[offset++] = (byte)((i >> 16) & 0xff);
+    buffer[offset++] = (byte)((i >> 8) & 0xff);
+    buffer[offset++] = (byte)(i & 0xff);
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/ganglia/package.html b/src/java/org/apache/hadoop/metrics/ganglia/package.html
new file mode 100644
index 00000000000..87598e50332
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/ganglia/package.html
@@ -0,0 +1,74 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+<!--
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+-->
+
+Implementation of the metrics package that sends metric data to 
+<a href="http://ganglia.sourceforge.net/">Ganglia</a>.
+Programmers should not normally need to use this package directly. Instead
+they should use org.hadoop.metrics.
+
+<p/>
+These are the implementation specific factory attributes 
+(See ContextFactory.getFactory()):
+
+<dl>
+    <dt><i>contextName</i>.servers</dt>
+    <dd>Space and/or comma separated sequence of servers to which UDP
+    messages should be sent.</dd>
+    
+    <dt><i>contextName</i>.period</dt>
+    <dd>The period in seconds on which the metric data is sent to the
+    server(s).</dd>
+    
+    <dt><i>contextName</i>.units.<i>recordName</i>.<i>metricName</i></dt>
+    <dd>The units for the specified metric in the specified record.</dd>
+    
+    <dt><i>contextName</i>.slope.<i>recordName</i>.<i>metricName</i></dt>
+    <dd>The slope for the specified metric in the specified record.</dd>
+    
+    <dt><i>contextName</i>.tmax.<i>recordName</i>.<i>metricName</i></dt>
+    <dd>The tmax for the specified metric in the specified record.</dd>
+    
+    <dt><i>contextName</i>.dmax.<i>recordName</i>.<i>metricName</i></dt>
+    <dd>The dmax for the specified metric in the specified record.</dd>
+    
+</dl>
+
+
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/metrics/jvm/EventCounter.java b/src/java/org/apache/hadoop/metrics/jvm/EventCounter.java
new file mode 100644
index 00000000000..deb476eed69
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/jvm/EventCounter.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.jvm;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.log4j.AppenderSkeleton;
+import org.apache.log4j.Level;
+import org.apache.log4j.spi.LoggingEvent;
+
+/**
+ * A log4J Appender that simply counts logging events in three levels:
+ * fatal, error and warn.
+ */
+public class EventCounter extends AppenderSkeleton {
+        
+    private static final int FATAL = 0;
+    private static final int ERROR = 1;
+    private static final int WARN  = 2;
+    private static final int INFO  = 3;
+    
+    private static class EventCounts {
+        private final long[] counts = { 0, 0, 0, 0 };
+    
+        private synchronized void incr(int i) { 
+            ++counts[i]; 
+        }
+        
+        private synchronized long get(int i) { 
+            return counts[i]; 
+        }
+    }
+    private static EventCounts counts = new EventCounts();
+    
+    public static long getFatal() { 
+        return counts.get(FATAL); 
+    }
+    
+    public static long getError() { 
+        return counts.get(ERROR); 
+    }
+    
+    public static long getWarn() { 
+        return counts.get(WARN);  
+    }
+    
+    public static long getInfo() {
+        return counts.get(INFO);
+    }
+    
+    public void append(LoggingEvent event) {
+        Level level = event.getLevel();
+        if (level == Level.INFO) {
+            counts.incr(INFO);
+        }
+        else if (level == Level.WARN) {
+            counts.incr(WARN);
+        }
+        else if (level == Level.ERROR) {
+            counts.incr(ERROR);
+        }
+        else if (level == Level.FATAL) {
+            counts.incr(FATAL);
+        }
+
+    }
+    
+    // Strange: these two methods are abstract in AppenderSkeleton, but not
+    // included in the javadoc (log4j 1.2.13).
+    
+    public void close() {
+    }
+    public boolean requiresLayout() {
+        return false;
+    }
+    
+    
+    
+}
diff --git a/src/java/org/apache/hadoop/metrics/jvm/JvmMetrics.java b/src/java/org/apache/hadoop/metrics/jvm/JvmMetrics.java
new file mode 100644
index 00000000000..c51916875bc
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/jvm/JvmMetrics.java
@@ -0,0 +1,191 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.jvm;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryMXBean;
+import java.lang.management.MemoryUsage;
+import java.lang.management.ThreadInfo;
+import java.lang.management.ThreadMXBean;
+import org.apache.hadoop.metrics.MetricsContext;
+import org.apache.hadoop.metrics.MetricsRecord;
+import org.apache.hadoop.metrics.MetricsUtil;
+import org.apache.hadoop.metrics.Updater;
+
+import static java.lang.Thread.State.*;
+import java.lang.management.GarbageCollectorMXBean;
+import java.util.List;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Singleton class which reports Java Virtual Machine metrics to the metrics API.  
+ * Any application can create an instance of this class in order to emit
+ * Java VM metrics.  
+ */
+public class JvmMetrics implements Updater {
+    
+    private static final float M = 1024*1024;
+    private static JvmMetrics theInstance = null;
+    private static Log log = LogFactory.getLog(JvmMetrics.class);
+    
+    private MetricsRecord metrics;
+    
+    // garbage collection counters
+    private long gcCount = 0;
+    private long gcTimeMillis = 0;
+    
+    // logging event counters
+    private long fatalCount = 0;
+    private long errorCount = 0;
+    private long warnCount  = 0;
+    private long infoCount  = 0;
+    
+    public synchronized static JvmMetrics init(String processName, String sessionId) {
+      return init(processName, sessionId, "metrics");
+    }
+    
+    public synchronized static JvmMetrics init(String processName, String sessionId,
+      String recordName) {
+        if (theInstance != null) {
+            log.info("Cannot initialize JVM Metrics with processName=" + 
+                     processName + ", sessionId=" + sessionId + 
+                     " - already initialized");
+        }
+        else {
+            log.info("Initializing JVM Metrics with processName=" 
+                    + processName + ", sessionId=" + sessionId);
+            theInstance = new JvmMetrics(processName, sessionId, recordName);
+        }
+        return theInstance;
+    }
+    
+    /** Creates a new instance of JvmMetrics */
+    private JvmMetrics(String processName, String sessionId,
+      String recordName) {
+        MetricsContext context = MetricsUtil.getContext("jvm");
+        metrics = MetricsUtil.createRecord(context, recordName);
+        metrics.setTag("processName", processName);
+        metrics.setTag("sessionId", sessionId);
+        context.registerUpdater(this);
+    }
+    
+    /**
+     * This will be called periodically (with the period being configuration
+     * dependent).
+     */
+    public void doUpdates(MetricsContext context) {
+        doMemoryUpdates();
+        doGarbageCollectionUpdates();
+        doThreadUpdates();
+        doEventCountUpdates();
+        metrics.update();
+    }
+    
+    private void doMemoryUpdates() {
+        MemoryMXBean memoryMXBean =
+               ManagementFactory.getMemoryMXBean();
+        MemoryUsage memNonHeap =
+                memoryMXBean.getNonHeapMemoryUsage();
+        MemoryUsage memHeap =
+                memoryMXBean.getHeapMemoryUsage();
+        metrics.setMetric("memNonHeapUsedM", memNonHeap.getUsed()/M);
+        metrics.setMetric("memNonHeapCommittedM", memNonHeap.getCommitted()/M);
+        metrics.setMetric("memHeapUsedM", memHeap.getUsed()/M);
+        metrics.setMetric("memHeapCommittedM", memHeap.getCommitted()/M);
+    }
+    
+    private void doGarbageCollectionUpdates() {
+        List<GarbageCollectorMXBean> gcBeans =
+                ManagementFactory.getGarbageCollectorMXBeans();
+        long count = 0;
+        long timeMillis = 0;
+        for (GarbageCollectorMXBean gcBean : gcBeans) {
+            count += gcBean.getCollectionCount();
+            timeMillis += gcBean.getCollectionTime();
+        }
+        metrics.incrMetric("gcCount", (int)(count - gcCount));
+        metrics.incrMetric("gcTimeMillis", (int)(timeMillis - gcTimeMillis));
+        
+        gcCount = count;
+        gcTimeMillis = timeMillis;
+    }
+    
+    private void doThreadUpdates() {
+        ThreadMXBean threadMXBean =
+                ManagementFactory.getThreadMXBean();
+        long threadIds[] = 
+                threadMXBean.getAllThreadIds();
+        ThreadInfo[] threadInfos =
+                threadMXBean.getThreadInfo(threadIds, 0);
+        
+        int threadsNew = 0;
+        int threadsRunnable = 0;
+        int threadsBlocked = 0;
+        int threadsWaiting = 0;
+        int threadsTimedWaiting = 0;
+        int threadsTerminated = 0;
+        
+        for (ThreadInfo threadInfo : threadInfos) {
+            // threadInfo is null if the thread is not alive or doesn't exist
+            if (threadInfo == null) continue;
+            Thread.State state = threadInfo.getThreadState();
+            if (state == NEW) {
+                threadsNew++;
+            } 
+            else if (state == RUNNABLE) {
+                threadsRunnable++;
+            }
+            else if (state == BLOCKED) {
+                threadsBlocked++;
+            }
+            else if (state == WAITING) {
+                threadsWaiting++;
+            } 
+            else if (state == TIMED_WAITING) {
+                threadsTimedWaiting++;
+            }
+            else if (state == TERMINATED) {
+                threadsTerminated++;
+            }
+        }
+        metrics.setMetric("threadsNew", threadsNew);
+        metrics.setMetric("threadsRunnable", threadsRunnable);
+        metrics.setMetric("threadsBlocked", threadsBlocked);
+        metrics.setMetric("threadsWaiting", threadsWaiting);
+        metrics.setMetric("threadsTimedWaiting", threadsTimedWaiting);
+        metrics.setMetric("threadsTerminated", threadsTerminated);
+    }
+    
+    private void doEventCountUpdates() {
+        long newFatal = EventCounter.getFatal();
+        long newError = EventCounter.getError();
+        long newWarn  = EventCounter.getWarn();
+        long newInfo  = EventCounter.getInfo();
+        
+        metrics.incrMetric("logFatal", (int)(newFatal - fatalCount));
+        metrics.incrMetric("logError", (int)(newError - errorCount));
+        metrics.incrMetric("logWarn",  (int)(newWarn - warnCount));
+        metrics.incrMetric("logInfo",  (int)(newInfo - infoCount));
+        
+        fatalCount = newFatal;
+        errorCount = newError;
+        warnCount  = newWarn;
+        infoCount  = newInfo;
+    }
+}
diff --git a/src/java/org/apache/hadoop/metrics/package.html b/src/java/org/apache/hadoop/metrics/package.html
new file mode 100644
index 00000000000..dd16e382dac
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/package.html
@@ -0,0 +1,159 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+  <head>
+    <title>org.apache.hadoop.metrics</title>
+  </head>
+<body>
+This package defines an API for reporting performance metric information.
+<p/>
+The API is abstract so that it can be implemented on top of
+a variety of metrics client libraries.  The choice of 
+client library is a configuration option, and different 
+modules within the same application can use
+different metrics implementation libraries.
+<p/>
+Sub-packages:
+<dl>
+    <dt><code>org.apache.hadoop.metrics.spi</code></dt>
+    <dd>The abstract Server Provider Interface package. Those wishing to
+    integrate the metrics API with a particular metrics client library should 
+    extend this package.</dd>
+    
+    <dt><code>org.apache.hadoop.metrics.file</code></dt>
+    <dd>An implementation package which writes the metric data to 
+    a file, or sends it to the standard output stream.</dd>
+ 
+    <dt> <code>org.apache.hadoop.metrics.ganglia</code></dt>
+    <dd>An implementation package which sends metric data to 
+    <a href="http://ganglia.sourceforge.net/">Ganglia</a>.</dd>
+</dl>
+
+<h3>Introduction to the Metrics API</h3>
+
+Here is a simple example of how to use this package to report a single
+metric value:
+<pre>
+    private ContextFactory contextFactory = ContextFactory.getFactory();
+    
+    void reportMyMetric(float myMetric) {
+        MetricsContext myContext = contextFactory.getContext("myContext");
+        MetricsRecord myRecord = myContext.getRecord("myRecord");
+        myRecord.setMetric("myMetric", myMetric);
+        myRecord.update();
+    }
+</pre>
+  
+In this example there are three names:
+<dl>
+  <dt><i>myContext</i></dt>
+  <dd>The context name will typically identify either the application, or else a
+  module within an application or library.</dd>
+  
+  <dt><i>myRecord</i></dt>
+  <dd>The record name generally identifies some entity for which a set of
+  metrics are to be reported.  For example, you could have a record named 
+  "cacheStats" for reporting a number of statistics relating to the usage of
+  some cache in your application.</dd>
+  
+  <dt><i>myMetric</i></dt>
+  <dd>This identifies a particular metric.  For example, you might have metrics
+  named "cache_hits" and "cache_misses".
+  </dd>
+</dl>
+
+<h3>Tags</h3>
+
+In some cases it is useful to have multiple records with the same name. For 
+example, suppose that you want to report statistics about each disk on a computer. 
+In this case, the record name would be something like "diskStats", but you also
+need to identify the disk which is done by adding a <i>tag</i> to the record.
+The code could look something like this:
+<pre>
+    private MetricsRecord diskStats =
+            contextFactory.getContext("myContext").getRecord("diskStats");
+            
+    void reportDiskMetrics(String diskName, float diskBusy, float diskUsed) {
+        diskStats.setTag("diskName", diskName);
+        diskStats.setMetric("diskBusy", diskBusy);
+        diskStats.setMetric("diskUsed", diskUsed);
+        diskStats.update();
+    }
+</pre>
+
+<h3>Buffering and Callbacks</h3>
+
+Data is not sent immediately to the metrics system when 
+<code>MetricsRecord.update()</code> is called. Instead it is stored in an
+internal table, and the contents of the table are sent periodically.
+This can be important for two reasons:
+<ol>
+    <li>It means that a programmer is free to put calls to this API in an 
+    inner loop, since updates can be very frequent without slowing down
+    the application significantly.</li>
+    <li>Some implementations can gain efficiency by combining many metrics 
+    into a single UDP message.</li>
+</ol>
+
+The API provides a timer-based callback via the 
+<code>registerUpdater()</code> method.  The benefit of this
+versus using <code>java.util.Timer</code> is that the callbacks will be done 
+immediately before sending the data, making the data as current as possible.
+
+<h3>Configuration</h3>
+
+It is possible to programmatically examine and modify configuration data
+before creating a context, like this:
+<pre>
+    ContextFactory factory = ContextFactory.getFactory();
+    ... examine and/or modify factory attributes ...
+    MetricsContext context = factory.getContext("myContext");
+</pre>
+The factory attributes can be examined and modified using the following
+<code>ContextFactory</code>methods:
+<ul>
+    <li><code>Object getAttribute(String attributeName)</code></li>
+    <li><code>String[] getAttributeNames()</code></li>
+    <li><code>void setAttribute(String name, Object value)</code></li>
+    <li><code>void removeAttribute(attributeName)</code></li>
+</ul>
+
+<p/>
+<code>ContextFactory.getFactory()</code> initializes the factory attributes by
+reading the properties file <code>hadoop-metrics.properties</code> if it exists 
+on the class path.
+
+<p/>
+A factory attribute named:
+<pre>
+<i>contextName</i>.class
+</pre>
+should have as its value the fully qualified name of the class to be 
+instantiated by a call of the <code>CodeFactory</code> method
+<code>getContext(<i>contextName</i>)</code>.  If this factory attribute is not 
+specified, the default is to instantiate 
+<code>org.apache.hadoop.metrics.file.FileContext</code>.
+
+<p/>
+Other factory attributes are specific to a particular implementation of this 
+API and are documented elsewhere.  For example, configuration attributes for
+the file and Ganglia implementations can be found in the javadoc for 
+their respective packages.
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/metrics/spi/AbstractMetricsContext.java b/src/java/org/apache/hadoop/metrics/spi/AbstractMetricsContext.java
new file mode 100644
index 00000000000..e6f85ae3781
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/spi/AbstractMetricsContext.java
@@ -0,0 +1,475 @@
+/*
+ * AbstractMetricsContext.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics.spi;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.metrics.ContextFactory;
+import org.apache.hadoop.metrics.MetricsContext;
+import org.apache.hadoop.metrics.MetricsException;
+import org.apache.hadoop.metrics.MetricsRecord;
+import org.apache.hadoop.metrics.Updater;
+
+/**
+ * The main class of the Service Provider Interface.  This class should be
+ * extended in order to integrate the Metrics API with a specific metrics
+ * client library. <p/>
+ *
+ * This class implements the internal table of metric data, and the timer
+ * on which data is to be sent to the metrics system.  Subclasses must
+ * override the abstract <code>emitRecord</code> method in order to transmit
+ * the data. <p/>
+ */
+public abstract class AbstractMetricsContext implements MetricsContext {
+    
+  private int period = MetricsContext.DEFAULT_PERIOD;
+  private Timer timer = null;
+    
+  private Set<Updater> updaters = new HashSet<Updater>(1);
+  private volatile boolean isMonitoring = false;
+    
+  private ContextFactory factory = null;
+  private String contextName = null;
+    
+  public static class TagMap extends TreeMap<String,Object> {
+    private static final long serialVersionUID = 3546309335061952993L;
+    TagMap() {
+      super();
+    }
+    TagMap(TagMap orig) {
+      super(orig);
+    }
+    /**
+     * Returns true if this tagmap contains every tag in other.
+     */
+    public boolean containsAll(TagMap other) {
+      for (Map.Entry<String,Object> entry : other.entrySet()) {
+        Object value = get(entry.getKey());
+        if (value == null || !value.equals(entry.getValue())) {
+          // either key does not exist here, or the value is different
+          return false;
+        }
+      }
+      return true;
+    }
+  }
+  
+  public static class MetricMap extends TreeMap<String,Number> {
+    private static final long serialVersionUID = -7495051861141631609L;
+    MetricMap() {
+      super();
+    }
+    MetricMap(MetricMap orig) {
+      super(orig);
+    }
+  }
+            
+  static class RecordMap extends HashMap<TagMap,MetricMap> {
+    private static final long serialVersionUID = 259835619700264611L;
+  }
+    
+  private Map<String,RecordMap> bufferedData = new HashMap<String,RecordMap>();
+    
+
+  /**
+   * Creates a new instance of AbstractMetricsContext
+   */
+  protected AbstractMetricsContext() {
+  }
+    
+  /**
+   * Initializes the context.
+   */
+  public void init(String contextName, ContextFactory factory) 
+  {
+    this.contextName = contextName;
+    this.factory = factory;
+  }
+    
+  /**
+   * Convenience method for subclasses to access factory attributes.
+   */
+  protected String getAttribute(String attributeName) {
+    String factoryAttribute = contextName + "." + attributeName;
+    return (String) factory.getAttribute(factoryAttribute);  
+  }
+    
+  /**
+   * Returns an attribute-value map derived from the factory attributes
+   * by finding all factory attributes that begin with 
+   * <i>contextName</i>.<i>tableName</i>.  The returned map consists of
+   * those attributes with the contextName and tableName stripped off.
+   */
+  protected Map<String,String> getAttributeTable(String tableName) {
+    String prefix = contextName + "." + tableName + ".";
+    Map<String,String> result = new HashMap<String,String>();
+    for (String attributeName : factory.getAttributeNames()) {
+      if (attributeName.startsWith(prefix)) {
+        String name = attributeName.substring(prefix.length());
+        String value = (String) factory.getAttribute(attributeName);
+        result.put(name, value);
+      }
+    }
+    return result;
+  }
+    
+  /**
+   * Returns the context name.
+   */
+  public String getContextName() {
+    return contextName;
+  }
+    
+  /**
+   * Returns the factory by which this context was created.
+   */
+  public ContextFactory getContextFactory() {
+    return factory;
+  }
+    
+  /**
+   * Starts or restarts monitoring, the emitting of metrics records.
+   */
+  public synchronized void startMonitoring()
+    throws IOException {
+    if (!isMonitoring) {
+      startTimer();
+      isMonitoring = true;
+    }
+  }
+    
+  /**
+   * Stops monitoring.  This does not free buffered data. 
+   * @see #close()
+   */
+  public synchronized void stopMonitoring() {
+    if (isMonitoring) {
+      stopTimer();
+      isMonitoring = false;
+    }
+  }
+    
+  /**
+   * Returns true if monitoring is currently in progress.
+   */
+  public boolean isMonitoring() {
+    return isMonitoring;
+  }
+    
+  /**
+   * Stops monitoring and frees buffered data, returning this
+   * object to its initial state.  
+   */
+  public synchronized void close() {
+    stopMonitoring();
+    clearUpdaters();
+  } 
+    
+  /**
+   * Creates a new AbstractMetricsRecord instance with the given <code>recordName</code>.
+   * Throws an exception if the metrics implementation is configured with a fixed
+   * set of record names and <code>recordName</code> is not in that set.
+   * 
+   * @param recordName the name of the record
+   * @throws MetricsException if recordName conflicts with configuration data
+   */
+  public final synchronized MetricsRecord createRecord(String recordName) {
+    if (bufferedData.get(recordName) == null) {
+      bufferedData.put(recordName, new RecordMap());
+    }
+    return newRecord(recordName);
+  }
+    
+  /**
+   * Subclasses should override this if they subclass MetricsRecordImpl.
+   * @param recordName the name of the record
+   * @return newly created instance of MetricsRecordImpl or subclass
+   */
+  protected MetricsRecord newRecord(String recordName) {
+    return new MetricsRecordImpl(recordName, this);
+  }
+    
+  /**
+   * Registers a callback to be called at time intervals determined by
+   * the configuration.
+   *
+   * @param updater object to be run periodically; it should update
+   * some metrics records 
+   */
+  public synchronized void registerUpdater(final Updater updater) {
+    if (!updaters.contains(updater)) {
+      updaters.add(updater);
+    }
+  }
+    
+  /**
+   * Removes a callback, if it exists.
+   *
+   * @param updater object to be removed from the callback list
+   */
+  public synchronized void unregisterUpdater(Updater updater) {
+    updaters.remove(updater);
+  }
+    
+  private synchronized void clearUpdaters() {
+    updaters.clear();
+  }
+    
+  /**
+   * Starts timer if it is not already started
+   */
+  private synchronized void startTimer() {
+    if (timer == null) {
+      timer = new Timer("Timer thread for monitoring " + getContextName(), 
+                        true);
+      TimerTask task = new TimerTask() {
+          public void run() {
+            try {
+              timerEvent();
+            }
+            catch (IOException ioe) {
+              ioe.printStackTrace();
+            }
+          }
+        };
+      long millis = period * 1000;
+      timer.scheduleAtFixedRate(task, millis, millis);
+    }
+  }
+    
+  /**
+   * Stops timer if it is running
+   */
+  private synchronized void stopTimer() {
+    if (timer != null) {
+      timer.cancel();
+      timer = null;
+    }
+  }
+    
+  /**
+   * Timer callback.
+   */
+  private void timerEvent() throws IOException {
+    if (isMonitoring) {
+      Collection<Updater> myUpdaters;
+      synchronized (this) {
+        myUpdaters = new ArrayList<Updater>(updaters);
+      }     
+      // Run all the registered updates without holding a lock
+      // on this context
+      for (Updater updater : myUpdaters) {
+        try {
+          updater.doUpdates(this);
+        }
+        catch (Throwable throwable) {
+          throwable.printStackTrace();
+        }
+      }
+      emitRecords();
+    }
+  }
+    
+  /**
+   *  Emits the records.
+   */
+  private synchronized void emitRecords() throws IOException {
+    for (String recordName : bufferedData.keySet()) {
+      RecordMap recordMap = bufferedData.get(recordName);
+      synchronized (recordMap) {
+        Set<Entry<TagMap, MetricMap>> entrySet = recordMap.entrySet ();
+        for (Entry<TagMap, MetricMap> entry : entrySet) {
+          OutputRecord outRec = new OutputRecord(entry.getKey(), entry.getValue());
+          emitRecord(contextName, recordName, outRec);
+        }
+      }
+    }
+    flush();
+  }
+  
+  /**
+   * Retrieves all the records managed by this MetricsContext.
+   * Useful for monitoring systems that are polling-based.
+   * @return A non-null collection of all monitoring records.
+   */
+  public synchronized Map<String, Collection<OutputRecord>> getAllRecords() {
+    Map<String, Collection<OutputRecord>> out = new TreeMap<String, Collection<OutputRecord>>();
+    for (String recordName : bufferedData.keySet()) {
+      RecordMap recordMap = bufferedData.get(recordName);
+      synchronized (recordMap) {
+        List<OutputRecord> records = new ArrayList<OutputRecord>();
+        Set<Entry<TagMap, MetricMap>> entrySet = recordMap.entrySet();
+        for (Entry<TagMap, MetricMap> entry : entrySet) {
+          OutputRecord outRec = new OutputRecord(entry.getKey(), entry.getValue());
+          records.add(outRec);
+        }
+        out.put(recordName, records);
+      }
+    }
+    return out;
+  }
+
+  /**
+   * Sends a record to the metrics system.
+   */
+  protected abstract void emitRecord(String contextName, String recordName, 
+                                     OutputRecord outRec) throws IOException;
+    
+  /**
+   * Called each period after all records have been emitted, this method does nothing.
+   * Subclasses may override it in order to perform some kind of flush.
+   */
+  protected void flush() throws IOException {
+  }
+    
+  /**
+   * Called by MetricsRecordImpl.update().  Creates or updates a row in
+   * the internal table of metric data.
+   */
+  protected void update(MetricsRecordImpl record) {
+    String recordName = record.getRecordName();
+    TagMap tagTable = record.getTagTable();
+    Map<String,MetricValue> metricUpdates = record.getMetricTable();
+        
+    RecordMap recordMap = getRecordMap(recordName);
+    synchronized (recordMap) {
+      MetricMap metricMap = recordMap.get(tagTable);
+      if (metricMap == null) {
+        metricMap = new MetricMap();
+        TagMap tagMap = new TagMap(tagTable); // clone tags
+        recordMap.put(tagMap, metricMap);
+      }
+
+      Set<Entry<String, MetricValue>> entrySet = metricUpdates.entrySet();
+      for (Entry<String, MetricValue> entry : entrySet) {
+        String metricName = entry.getKey ();
+        MetricValue updateValue = entry.getValue ();
+        Number updateNumber = updateValue.getNumber();
+        Number currentNumber = metricMap.get(metricName);
+        if (currentNumber == null || updateValue.isAbsolute()) {
+          metricMap.put(metricName, updateNumber);
+        }
+        else {
+          Number newNumber = sum(updateNumber, currentNumber);
+          metricMap.put(metricName, newNumber);
+        }
+      }
+    }
+  }
+    
+  private synchronized RecordMap getRecordMap(String recordName) {
+    return bufferedData.get(recordName);
+  }
+    
+  /**
+   * Adds two numbers, coercing the second to the type of the first.
+   *
+   */
+  private Number sum(Number a, Number b) {
+    if (a instanceof Integer) {
+      return Integer.valueOf(a.intValue() + b.intValue());
+    }
+    else if (a instanceof Float) {
+      return new Float(a.floatValue() + b.floatValue());
+    }
+    else if (a instanceof Short) {
+      return Short.valueOf((short)(a.shortValue() + b.shortValue()));
+    }
+    else if (a instanceof Byte) {
+      return Byte.valueOf((byte)(a.byteValue() + b.byteValue()));
+    }
+    else if (a instanceof Long) {
+      return Long.valueOf((a.longValue() + b.longValue()));
+    }
+    else {
+      // should never happen
+      throw new MetricsException("Invalid number type");
+    }
+            
+  }
+    
+  /**
+   * Called by MetricsRecordImpl.remove().  Removes all matching rows in
+   * the internal table of metric data.  A row matches if it has the same
+   * tag names and values as record, but it may also have additional
+   * tags.
+   */    
+  protected void remove(MetricsRecordImpl record) {
+    String recordName = record.getRecordName();
+    TagMap tagTable = record.getTagTable();
+        
+    RecordMap recordMap = getRecordMap(recordName);
+    synchronized (recordMap) {
+      Iterator<TagMap> it = recordMap.keySet().iterator();
+      while (it.hasNext()) {
+        TagMap rowTags = it.next();
+        if (rowTags.containsAll(tagTable)) {
+          it.remove();
+        }
+      }
+    }
+  }
+    
+  /**
+   * Returns the timer period.
+   */
+  public int getPeriod() {
+    return period;
+  }
+    
+  /**
+   * Sets the timer period
+   */
+  protected void setPeriod(int period) {
+    this.period = period;
+  }
+  
+  /**
+   * If a period is set in the attribute passed in, override
+   * the default with it.
+   */
+  protected void parseAndSetPeriod(String attributeName) {
+    String periodStr = getAttribute(attributeName);
+    if (periodStr != null) {
+      int period = 0;
+      try {
+        period = Integer.parseInt(periodStr);
+      } catch (NumberFormatException nfe) {
+      }
+      if (period <= 0) {
+        throw new MetricsException("Invalid period: " + periodStr);
+      }
+      setPeriod(period);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/spi/CompositeContext.java b/src/java/org/apache/hadoop/metrics/spi/CompositeContext.java
new file mode 100644
index 00000000000..782fb30485c
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/spi/CompositeContext.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.spi;
+
+import java.io.IOException;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.metrics.ContextFactory;
+import org.apache.hadoop.metrics.MetricsContext;
+import org.apache.hadoop.metrics.MetricsException;
+import org.apache.hadoop.metrics.MetricsRecord;
+import org.apache.hadoop.metrics.MetricsUtil;
+import org.apache.hadoop.metrics.Updater;
+
+public class CompositeContext extends AbstractMetricsContext {
+
+  private static final Log LOG = LogFactory.getLog(CompositeContext.class);
+  private static final String ARITY_LABEL = "arity";
+  private static final String SUB_FMT = "%s.sub%d";
+  private final ArrayList<MetricsContext> subctxt =
+    new ArrayList<MetricsContext>();
+
+  public CompositeContext() {
+  }
+
+  public void init(String contextName, ContextFactory factory) {
+    super.init(contextName, factory);
+    int nKids;
+    try {
+      String sKids = getAttribute(ARITY_LABEL);
+      nKids = Integer.valueOf(sKids);
+    } catch (Exception e) {
+      LOG.error("Unable to initialize composite metric " + contextName +
+                ": could not init arity", e);
+      return;
+    }
+    for (int i = 0; i < nKids; ++i) {
+      MetricsContext ctxt = MetricsUtil.getContext(
+          String.format(SUB_FMT, contextName, i), contextName);
+      if (null != ctxt) {
+        subctxt.add(ctxt);
+      }
+    }
+  }
+
+  @Override
+  public MetricsRecord newRecord(String recordName) {
+    return (MetricsRecord) Proxy.newProxyInstance(
+        MetricsRecord.class.getClassLoader(),
+        new Class[] { MetricsRecord.class },
+        new MetricsRecordDelegator(recordName, subctxt));
+  }
+
+  @Override
+  protected void emitRecord(String contextName, String recordName,
+      OutputRecord outRec) throws IOException {
+    for (MetricsContext ctxt : subctxt) {
+      try {
+        ((AbstractMetricsContext)ctxt).emitRecord(
+          contextName, recordName, outRec);
+        if (contextName == null || recordName == null || outRec == null) {
+          throw new IOException(contextName + ":" + recordName + ":" + outRec);
+        }
+      } catch (IOException e) {
+        LOG.warn("emitRecord failed: " + ctxt.getContextName(), e);
+      }
+    }
+  }
+
+  @Override
+  protected void flush() throws IOException {
+    for (MetricsContext ctxt : subctxt) {
+      try {
+        ((AbstractMetricsContext)ctxt).flush();
+      } catch (IOException e) {
+        LOG.warn("flush failed: " + ctxt.getContextName(), e);
+      }
+    }
+  }
+
+  @Override
+  public void startMonitoring() throws IOException {
+    for (MetricsContext ctxt : subctxt) {
+      try {
+        ctxt.startMonitoring();
+      } catch (IOException e) {
+        LOG.warn("startMonitoring failed: " + ctxt.getContextName(), e);
+      }
+    }
+  }
+
+  @Override
+  public void stopMonitoring() {
+    for (MetricsContext ctxt : subctxt) {
+      ctxt.stopMonitoring();
+    }
+  }
+
+  /**
+   * Return true if all subcontexts are monitoring.
+   */
+  @Override
+  public boolean isMonitoring() {
+    boolean ret = true;
+    for (MetricsContext ctxt : subctxt) {
+      ret &= ctxt.isMonitoring();
+    }
+    return ret;
+  }
+
+  @Override
+  public void close() {
+    for (MetricsContext ctxt : subctxt) {
+      ctxt.close();
+    }
+  }
+
+  @Override
+  public void registerUpdater(Updater updater) {
+    for (MetricsContext ctxt : subctxt) {
+      ctxt.registerUpdater(updater);
+    }
+  }
+
+  @Override
+  public void unregisterUpdater(Updater updater) {
+    for (MetricsContext ctxt : subctxt) {
+      ctxt.unregisterUpdater(updater);
+    }
+  }
+
+  private static class MetricsRecordDelegator implements InvocationHandler {
+    private static final Method m_getRecordName = initMethod();
+    private static Method initMethod() {
+      try {
+        return MetricsRecord.class.getMethod("getRecordName", new Class[0]);
+      } catch (Exception e) {
+        throw new RuntimeException("Internal error", e);
+      }
+    }
+
+    private final String recordName;
+    private final ArrayList<MetricsRecord> subrecs;
+
+    MetricsRecordDelegator(String recordName, ArrayList<MetricsContext> ctxts) {
+      this.recordName = recordName;
+      this.subrecs = new ArrayList<MetricsRecord>(ctxts.size());
+      for (MetricsContext ctxt : ctxts) {
+        subrecs.add(ctxt.createRecord(recordName));
+      }
+    }
+
+    public Object invoke(Object p, Method m, Object[] args) throws Throwable {
+      if (m_getRecordName.equals(m)) {
+        return recordName;
+      }
+      assert Void.TYPE.equals(m.getReturnType());
+      for (MetricsRecord rec : subrecs) {
+        m.invoke(rec, args);
+      }
+      return null;
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/metrics/spi/MetricValue.java b/src/java/org/apache/hadoop/metrics/spi/MetricValue.java
new file mode 100644
index 00000000000..4a6929b8507
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/spi/MetricValue.java
@@ -0,0 +1,52 @@
+/*
+ * MetricValue.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics.spi;
+
+/**
+ * A Number that is either an absolute or an incremental amount.
+ */
+public class MetricValue {
+    
+  public static final boolean ABSOLUTE = false;
+  public static final boolean INCREMENT = true;
+    
+  private boolean isIncrement;
+  private Number number;
+    
+  /** Creates a new instance of MetricValue */
+  public MetricValue(Number number, boolean isIncrement) {
+    this.number = number;
+    this.isIncrement = isIncrement;
+  }
+
+  public boolean isIncrement() {
+    return isIncrement;
+  }
+    
+  public boolean isAbsolute() {
+    return !isIncrement;
+  }
+
+  public Number getNumber() {
+    return number;
+  }
+    
+}
diff --git a/src/java/org/apache/hadoop/metrics/spi/MetricsRecordImpl.java b/src/java/org/apache/hadoop/metrics/spi/MetricsRecordImpl.java
new file mode 100644
index 00000000000..e3bac564117
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/spi/MetricsRecordImpl.java
@@ -0,0 +1,275 @@
+/*
+ * MetricsRecordImpl.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics.spi;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+import org.apache.hadoop.metrics.MetricsRecord;
+import org.apache.hadoop.metrics.spi.AbstractMetricsContext.TagMap;
+
+/**
+ * An implementation of MetricsRecord.  Keeps a back-pointer to the context
+ * from which it was created, and delegates back to it on <code>update</code>
+ * and <code>remove()</code>.
+ */
+public class MetricsRecordImpl implements MetricsRecord {
+    
+  private TagMap tagTable = new TagMap();
+  private Map<String,MetricValue> metricTable = new LinkedHashMap<String,MetricValue>();
+    
+  private String recordName;
+  private AbstractMetricsContext context;
+    
+    
+  /** Creates a new instance of FileRecord */
+  protected MetricsRecordImpl(String recordName, AbstractMetricsContext context)
+  {
+    this.recordName = recordName;
+    this.context = context;
+  }
+    
+  /**
+   * Returns the record name. 
+   *
+   * @return the record name
+   */
+  public String getRecordName() {
+    return recordName;
+  }
+    
+  /**
+   * Sets the named tag to the specified value.
+   *
+   * @param tagName name of the tag
+   * @param tagValue new value of the tag
+   * @throws MetricsException if the tagName conflicts with the configuration
+   */
+  public void setTag(String tagName, String tagValue) {
+    if (tagValue == null) {
+      tagValue = "";
+    }
+    tagTable.put(tagName, tagValue);
+  }
+    
+  /**
+   * Sets the named tag to the specified value.
+   *
+   * @param tagName name of the tag
+   * @param tagValue new value of the tag
+   * @throws MetricsException if the tagName conflicts with the configuration
+   */
+  public void setTag(String tagName, int tagValue) {
+    tagTable.put(tagName, Integer.valueOf(tagValue));
+  }
+    
+  /**
+   * Sets the named tag to the specified value.
+   *
+   * @param tagName name of the tag
+   * @param tagValue new value of the tag
+   * @throws MetricsException if the tagName conflicts with the configuration
+   */
+  public void setTag(String tagName, long tagValue) {
+    tagTable.put(tagName, Long.valueOf(tagValue));
+  }
+    
+  /**
+   * Sets the named tag to the specified value.
+   *
+   * @param tagName name of the tag
+   * @param tagValue new value of the tag
+   * @throws MetricsException if the tagName conflicts with the configuration
+   */
+  public void setTag(String tagName, short tagValue) {
+    tagTable.put(tagName, Short.valueOf(tagValue));
+  }
+    
+  /**
+   * Sets the named tag to the specified value.
+   *
+   * @param tagName name of the tag
+   * @param tagValue new value of the tag
+   * @throws MetricsException if the tagName conflicts with the configuration
+   */
+  public void setTag(String tagName, byte tagValue) {
+    tagTable.put(tagName, Byte.valueOf(tagValue));
+  }
+    
+  /**
+   * Removes any tag of the specified name.
+   */
+  public void removeTag(String tagName) {
+    tagTable.remove(tagName);
+  }
+  
+  /**
+   * Sets the named metric to the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue new value of the metric
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public void setMetric(String metricName, int metricValue) {
+    setAbsolute(metricName, Integer.valueOf(metricValue));
+  }
+    
+  /**
+   * Sets the named metric to the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue new value of the metric
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public void setMetric(String metricName, long metricValue) {
+    setAbsolute(metricName, Long.valueOf(metricValue));
+  }
+    
+  /**
+   * Sets the named metric to the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue new value of the metric
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public void setMetric(String metricName, short metricValue) {
+    setAbsolute(metricName, Short.valueOf(metricValue));
+  }
+    
+  /**
+   * Sets the named metric to the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue new value of the metric
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public void setMetric(String metricName, byte metricValue) {
+    setAbsolute(metricName, Byte.valueOf(metricValue));
+  }
+    
+  /**
+   * Sets the named metric to the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue new value of the metric
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public void setMetric(String metricName, float metricValue) {
+    setAbsolute(metricName, new Float(metricValue));
+  }
+    
+  /**
+   * Increments the named metric by the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue incremental value
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public void incrMetric(String metricName, int metricValue) {
+    setIncrement(metricName, Integer.valueOf(metricValue));
+  }
+    
+  /**
+   * Increments the named metric by the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue incremental value
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public void incrMetric(String metricName, long metricValue) {
+    setIncrement(metricName, Long.valueOf(metricValue));
+  }
+    
+  /**
+   * Increments the named metric by the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue incremental value
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public void incrMetric(String metricName, short metricValue) {
+    setIncrement(metricName, Short.valueOf(metricValue));
+  }
+    
+  /**
+   * Increments the named metric by the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue incremental value
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public void incrMetric(String metricName, byte metricValue) {
+    setIncrement(metricName, Byte.valueOf(metricValue));
+  }
+    
+  /**
+   * Increments the named metric by the specified value.
+   *
+   * @param metricName name of the metric
+   * @param metricValue incremental value
+   * @throws MetricsException if the metricName or the type of the metricValue 
+   * conflicts with the configuration
+   */
+  public void incrMetric(String metricName, float metricValue) {
+    setIncrement(metricName, new Float(metricValue));
+  }
+    
+  private void setAbsolute(String metricName, Number metricValue) {
+    metricTable.put(metricName, new MetricValue(metricValue, MetricValue.ABSOLUTE));
+  }
+    
+  private void setIncrement(String metricName, Number metricValue) {
+    metricTable.put(metricName, new MetricValue(metricValue, MetricValue.INCREMENT));
+  }
+    
+  /**
+   * Updates the table of buffered data which is to be sent periodically.
+   * If the tag values match an existing row, that row is updated; 
+   * otherwise, a new row is added.
+   */
+  public void update() {
+    context.update(this);
+  }
+    
+  /**
+   * Removes the row, if it exists, in the buffered data table having tags 
+   * that equal the tags that have been set on this record. 
+   */
+  public void remove() {
+    context.remove(this);
+  }
+
+  TagMap getTagTable() {
+    return tagTable;
+  }
+
+  Map<String, MetricValue> getMetricTable() {
+    return metricTable;
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/spi/NoEmitMetricsContext.java b/src/java/org/apache/hadoop/metrics/spi/NoEmitMetricsContext.java
new file mode 100644
index 00000000000..9e9893426a5
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/spi/NoEmitMetricsContext.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.spi;
+
+import org.apache.hadoop.metrics.ContextFactory;
+import org.apache.hadoop.metrics.MetricsServlet;
+
+/** 
+ * A MetricsContext that does not emit data, but, unlike NullContextWithUpdate,
+ * does save it for retrieval with getAllRecords().
+ * 
+ * This is useful if you want to support {@link MetricsServlet}, but
+ * not emit metrics in any other way.
+ */
+public class NoEmitMetricsContext extends AbstractMetricsContext {
+    
+    private static final String PERIOD_PROPERTY = "period";
+      
+    /** Creates a new instance of NullContextWithUpdateThread */
+    public NoEmitMetricsContext() {
+    }
+    
+    public void init(String contextName, ContextFactory factory) {
+      super.init(contextName, factory);
+      parseAndSetPeriod(PERIOD_PROPERTY);
+    }
+     
+    /**
+     * Do-nothing version of emitRecord
+     */
+    protected void emitRecord(String contextName, String recordName,
+                              OutputRecord outRec) {
+    }
+}
diff --git a/src/java/org/apache/hadoop/metrics/spi/NullContext.java b/src/java/org/apache/hadoop/metrics/spi/NullContext.java
new file mode 100644
index 00000000000..11cccb5b0af
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/spi/NullContext.java
@@ -0,0 +1,58 @@
+/*
+ * NullContext.java
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics.spi;
+
+/**
+ * Null metrics context: a metrics context which does nothing.  Used as the
+ * default context, so that no performance data is emitted if no configuration
+ * data is found.
+ * 
+ */
+public class NullContext extends AbstractMetricsContext {
+    
+  /** Creates a new instance of NullContext */
+  public NullContext() {
+  }
+    
+  /**
+   * Do-nothing version of startMonitoring
+   */
+  public void startMonitoring() {
+  }
+    
+  /**
+   * Do-nothing version of emitRecord
+   */
+  protected void emitRecord(String contextName, String recordName,
+                            OutputRecord outRec) 
+  {}
+    
+  /**
+   * Do-nothing version of update
+   */
+  protected void update(MetricsRecordImpl record) {
+  }
+    
+  /**
+   * Do-nothing version of remove
+   */
+  protected void remove(MetricsRecordImpl record) {
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/spi/NullContextWithUpdateThread.java b/src/java/org/apache/hadoop/metrics/spi/NullContextWithUpdateThread.java
new file mode 100644
index 00000000000..5efe5f0fb77
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/spi/NullContextWithUpdateThread.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics.spi;
+
+import org.apache.hadoop.metrics.ContextFactory;
+import org.apache.hadoop.metrics.MetricsException;
+
+/**
+ * A null context which has a thread calling 
+ * periodically when monitoring is started. This keeps the data sampled 
+ * correctly.
+ * In all other respects, this is like the NULL context: No data is emitted.
+ * This is suitable for Monitoring systems like JMX which reads the metrics
+ *  when someone reads the data from JMX.
+ * 
+ * The default impl of start and stop monitoring:
+ *  is the AbstractMetricsContext is good enough.
+ * 
+ */
+
+public class NullContextWithUpdateThread extends AbstractMetricsContext {
+  
+  private static final String PERIOD_PROPERTY = "period";
+    
+  /** Creates a new instance of NullContextWithUpdateThread */
+  public NullContextWithUpdateThread() {
+  }
+  
+  public void init(String contextName, ContextFactory factory) {
+    super.init(contextName, factory);
+    parseAndSetPeriod(PERIOD_PROPERTY);
+  }
+   
+    
+  /**
+   * Do-nothing version of emitRecord
+   */
+  protected void emitRecord(String contextName, String recordName,
+                            OutputRecord outRec) 
+  {}
+    
+  /**
+   * Do-nothing version of update
+   */
+  protected void update(MetricsRecordImpl record) {
+  }
+    
+  /**
+   * Do-nothing version of remove
+   */
+  protected void remove(MetricsRecordImpl record) {
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/spi/OutputRecord.java b/src/java/org/apache/hadoop/metrics/spi/OutputRecord.java
new file mode 100644
index 00000000000..4fa54158956
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/spi/OutputRecord.java
@@ -0,0 +1,90 @@
+/*
+ * OutputRecord.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics.spi;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.metrics.spi.AbstractMetricsContext.MetricMap;
+import org.apache.hadoop.metrics.spi.AbstractMetricsContext.TagMap;
+
+/**
+ * Represents a record of metric data to be sent to a metrics system.
+ */
+public class OutputRecord {
+    
+  private TagMap tagMap;
+  private MetricMap metricMap;
+    
+  /** Creates a new instance of OutputRecord */
+  OutputRecord(TagMap tagMap, MetricMap metricMap) {
+    this.tagMap = tagMap;
+    this.metricMap = metricMap;
+  }
+    
+  /**
+   * Returns the set of tag names
+   */
+  public Set<String> getTagNames() {
+    return Collections.unmodifiableSet(tagMap.keySet());
+  }
+    
+  /**
+   * Returns a tag object which is can be a String, Integer, Short or Byte.
+   *
+   * @return the tag value, or null if there is no such tag
+   */
+  public Object getTag(String name) {
+    return tagMap.get(name);
+  }
+    
+  /**
+   * Returns the set of metric names.
+   */
+  public Set<String> getMetricNames() {
+    return Collections.unmodifiableSet(metricMap.keySet());
+  }
+    
+  /**
+   * Returns the metric object which can be a Float, Integer, Short or Byte.
+   */
+  public Number getMetric(String name) {
+    return metricMap.get(name);
+  }
+  
+
+  /**
+   * Returns a copy of this record's tags.
+   */
+  public TagMap getTagsCopy() {
+    return new TagMap(tagMap);
+  }
+  
+  /**
+   * Returns a copy of this record's metrics.
+   */
+  public MetricMap getMetricsCopy() {
+    return new MetricMap(metricMap);
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/spi/Util.java b/src/java/org/apache/hadoop/metrics/spi/Util.java
new file mode 100644
index 00000000000..d7c1912976f
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/spi/Util.java
@@ -0,0 +1,67 @@
+/*
+ * Util.java
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.hadoop.metrics.spi;
+
+import java.net.InetSocketAddress;
+import java.net.SocketAddress;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Static utility methods
+ */
+public class Util {
+    
+  /**
+   * This class is not intended to be instantiated
+   */
+  private Util() {}
+    
+  /**
+   * Parses a space and/or comma separated sequence of server specifications
+   * of the form <i>hostname</i> or <i>hostname:port</i>.  If 
+   * the specs string is null, defaults to localhost:defaultPort.
+   * 
+   * @return a list of InetSocketAddress objects.
+   */
+  public static List<InetSocketAddress> parse(String specs, int defaultPort) {
+    List<InetSocketAddress> result = new ArrayList<InetSocketAddress>(1);
+    if (specs == null) {
+      result.add(new InetSocketAddress("localhost", defaultPort));
+    }
+    else {
+      String[] specStrings = specs.split("[ ,]+");
+      for (String specString : specStrings) {
+        int colon = specString.indexOf(':');
+        if (colon < 0 || colon == specString.length() - 1) {
+          result.add(new InetSocketAddress(specString, defaultPort));
+        } else {
+          String hostname = specString.substring(0, colon);
+          int port = Integer.parseInt(specString.substring(colon+1));
+          result.add(new InetSocketAddress(hostname, port));
+        }
+      }
+    }
+    return result;
+  }
+    
+}
diff --git a/src/java/org/apache/hadoop/metrics/spi/package.html b/src/java/org/apache/hadoop/metrics/spi/package.html
new file mode 100644
index 00000000000..b72552f761f
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/spi/package.html
@@ -0,0 +1,36 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+  <head>
+    <title>org.apache.hadoop.metrics.spi</title>
+  </head>
+  <body>
+The Service Provider Interface for the Metrics API.  This package provides
+an interface allowing a variety of metrics reporting implementations to be
+plugged in to the Metrics API.  Examples of such implementations can be found 
+in the packages <code>org.apache.hadoop.metrics.file</code> and
+<code>org.apache.hadoop.metrics.ganglia</code>.<p/>
+
+Plugging in an implementation involves writing a concrete subclass of 
+<code>AbstractMetricsContext</code>.  The subclass should get its
+ configuration information using the <code>getAttribute(<i>attributeName</i>)</code>
+ method.
+  </body>
+</html>
diff --git a/src/java/org/apache/hadoop/metrics/util/MBeanUtil.java b/src/java/org/apache/hadoop/metrics/util/MBeanUtil.java
new file mode 100644
index 00000000000..ded1a5a1958
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/util/MBeanUtil.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.util;
+
+import java.lang.management.ManagementFactory;
+
+import javax.management.InstanceNotFoundException;
+import javax.management.MBeanServer;
+import javax.management.MalformedObjectNameException;
+import javax.management.ObjectName;
+import javax.management.InstanceAlreadyExistsException;
+
+
+/**
+ * This util class provides a method to register an MBean using
+ * our standard naming convention as described in the doc
+ *  for {link {@link #registerMBean(String, String, Object)}
+ *
+ */
+public class MBeanUtil {
+	
+  /**
+   * Register the MBean using our standard MBeanName format
+   * "hadoop:service=<serviceName>,name=<nameName>"
+   * Where the <serviceName> and <nameName> are the supplied parameters
+   *    
+   * @param serviceName
+   * @param nameName
+   * @param theMbean - the MBean to register
+   * @return the named used to register the MBean
+   */	
+  static public ObjectName registerMBean(final String serviceName, 
+		  							final String nameName,
+		  							final Object theMbean) {
+    final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+    ObjectName name = getMBeanName(serviceName, nameName);
+    try {
+      mbs.registerMBean(theMbean, name);
+      return name;
+    } catch (InstanceAlreadyExistsException ie) {
+      // Ignore if instance already exists 
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+    return null;
+  }
+  
+  static public void unregisterMBean(ObjectName mbeanName) {
+    final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+    if (mbeanName == null) 
+        return;
+    try {
+      mbs.unregisterMBean(mbeanName);
+    } catch (InstanceNotFoundException e ) {
+      // ignore
+    } catch (Exception e) {
+      e.printStackTrace();
+    } 
+  }
+  
+  static private ObjectName getMBeanName(final String serviceName,
+		  								 final String nameName) {
+    ObjectName name = null;
+    try {
+      name = new ObjectName("hadoop:" +
+                  "service=" + serviceName + ",name=" + nameName);
+    } catch (MalformedObjectNameException e) {
+      e.printStackTrace();
+    }
+    return name;
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsBase.java b/src/java/org/apache/hadoop/metrics/util/MetricsBase.java
new file mode 100644
index 00000000000..1cbcf3212a6
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/util/MetricsBase.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.util;
+
+import org.apache.hadoop.metrics.MetricsRecord;
+
+/**
+ * 
+ * This is base class for all metrics
+ *
+ */
+public abstract class MetricsBase {
+  public static final String NO_DESCRIPTION = "NoDescription";
+  final private String name;
+  final private String description;
+  
+  protected MetricsBase(final String nam) {
+    name = nam;
+    description = NO_DESCRIPTION;
+  }
+  
+  protected MetricsBase(final String nam, final String desc) {
+    name = nam;
+    description = desc;
+  }
+  
+  public abstract void pushMetric(final MetricsRecord mr);
+  
+  public String getName() { return name; }
+  public String getDescription() { return description; };
+
+}
diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java b/src/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java
new file mode 100644
index 00000000000..d65cce0597e
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java
@@ -0,0 +1,226 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.util;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.management.Attribute;
+import javax.management.AttributeList;
+import javax.management.AttributeNotFoundException;
+import javax.management.DynamicMBean;
+import javax.management.InvalidAttributeValueException;
+import javax.management.MBeanAttributeInfo;
+import javax.management.MBeanException;
+import javax.management.MBeanInfo;
+import javax.management.MBeanOperationInfo;
+import javax.management.ReflectionException;
+
+import org.apache.hadoop.metrics.MetricsUtil;
+
+
+
+/**
+ * This abstract base class facilitates creating dynamic mbeans automatically from
+ * metrics. 
+ * The metrics constructors registers metrics in a registry. 
+ * Different categories of metrics should be in differnt classes with their own
+ * registry (as in NameNodeMetrics and DataNodeMetrics).
+ * Then the MBean can be created passing the registry to the constructor.
+ * The MBean should be then registered using a mbean name (example):
+ *  MetricsHolder myMetrics = new MetricsHolder(); // has metrics and registry
+ *  MetricsTestMBean theMBean = new MetricsTestMBean(myMetrics.mregistry);
+ *  ObjectName mbeanName = MBeanUtil.registerMBean("ServiceFoo",
+ *                "TestStatistics", theMBean);
+ * 
+ *
+ */
+public abstract class MetricsDynamicMBeanBase implements DynamicMBean {
+  private final static String AVG_TIME = "AvgTime";
+  private final static String MIN_TIME = "MinTime";
+  private final static String MAX_TIME = "MaxTime";
+  private final static String NUM_OPS = "NumOps";
+  private final static String RESET_ALL_MIN_MAX_OP = "resetAllMinMax";
+  private MetricsRegistry metricsRegistry;
+  private MBeanInfo mbeanInfo;
+  private Map<String, MetricsBase> metricsRateAttributeMod;
+  private int numEntriesInRegistry = 0;
+  private String mbeanDescription;
+  
+  protected MetricsDynamicMBeanBase(final MetricsRegistry mr, final String aMBeanDescription) {
+    metricsRegistry = mr;
+    mbeanDescription = aMBeanDescription;
+    createMBeanInfo();
+  }
+  
+  private void updateMbeanInfoIfMetricsListChanged()  {
+    if (numEntriesInRegistry != metricsRegistry.size())
+      createMBeanInfo();
+  }
+  
+  private void createMBeanInfo() {
+    metricsRateAttributeMod = new HashMap<String, MetricsBase>();
+    boolean needsMinMaxResetOperation = false;
+    List<MBeanAttributeInfo> attributesInfo = new ArrayList<MBeanAttributeInfo>();
+    MBeanOperationInfo[] operationsInfo = null;
+    numEntriesInRegistry = metricsRegistry.size();
+    
+    for (MetricsBase o : metricsRegistry.getMetricsList()) {
+
+      if (MetricsTimeVaryingRate.class.isInstance(o)) {
+        // For each of the metrics there are 3 different attributes
+        attributesInfo.add(new MBeanAttributeInfo(o.getName() + NUM_OPS, "java.lang.Integer",
+            o.getDescription(), true, false, false));
+        attributesInfo.add(new MBeanAttributeInfo(o.getName() + AVG_TIME, "java.lang.Long",
+            o.getDescription(), true, false, false));
+        attributesInfo.add(new MBeanAttributeInfo(o.getName() + MIN_TIME, "java.lang.Long",
+            o.getDescription(), true, false, false));
+        attributesInfo.add(new MBeanAttributeInfo(o.getName() + MAX_TIME, "java.lang.Long",
+            o.getDescription(), true, false, false));
+        needsMinMaxResetOperation = true;  // the min and max can be reset.
+        
+        // Note the special attributes (AVG_TIME, MIN_TIME, ..) are derived from metrics 
+        // Rather than check for the suffix we store them in a map.
+        metricsRateAttributeMod.put(o.getName() + NUM_OPS, o);
+        metricsRateAttributeMod.put(o.getName() + AVG_TIME, o);
+        metricsRateAttributeMod.put(o.getName() + MIN_TIME, o);
+        metricsRateAttributeMod.put(o.getName() + MAX_TIME, o);
+        
+      }  else if ( MetricsIntValue.class.isInstance(o) || MetricsTimeVaryingInt.class.isInstance(o) ) {
+        attributesInfo.add(new MBeanAttributeInfo(o.getName(), "java.lang.Integer",
+            o.getDescription(), true, false, false)); 
+      } else if ( MetricsLongValue.class.isInstance(o) || MetricsTimeVaryingLong.class.isInstance(o) ) {
+        attributesInfo.add(new MBeanAttributeInfo(o.getName(), "java.lang.Long",
+            o.getDescription(), true, false, false));     
+      } else {
+        MetricsUtil.LOG.error("unknown metrics type: " + o.getClass().getName());
+      }
+
+      if (needsMinMaxResetOperation) {
+        operationsInfo = new MBeanOperationInfo[] {
+            new MBeanOperationInfo(RESET_ALL_MIN_MAX_OP, "Reset (zero) All Min Max",
+                    null, "void", MBeanOperationInfo.ACTION) };
+      }
+    }
+    MBeanAttributeInfo[] attrArray = new MBeanAttributeInfo[attributesInfo.size()];
+    mbeanInfo =  new MBeanInfo(this.getClass().getName(), mbeanDescription, 
+        attributesInfo.toArray(attrArray), null, operationsInfo, null);
+  }
+  
+  @Override
+  public Object getAttribute(String attributeName) throws AttributeNotFoundException,
+      MBeanException, ReflectionException {
+    if (attributeName == null || attributeName.equals("")) 
+      throw new IllegalArgumentException();
+    
+    updateMbeanInfoIfMetricsListChanged();
+    
+    Object o = metricsRateAttributeMod.get(attributeName);
+    if (o == null) {
+      o = metricsRegistry.get(attributeName);
+    }
+    if (o == null)
+      throw new AttributeNotFoundException();
+    
+    if (o instanceof MetricsIntValue)
+      return ((MetricsIntValue) o).get();
+    else if (o instanceof MetricsLongValue)
+      return ((MetricsLongValue) o).get();
+    else if (o instanceof MetricsTimeVaryingInt)
+      return ((MetricsTimeVaryingInt) o).getPreviousIntervalValue();
+    else if (o instanceof MetricsTimeVaryingLong)
+      return ((MetricsTimeVaryingLong) o).getPreviousIntervalValue();
+    else if (o instanceof MetricsTimeVaryingRate) {
+      MetricsTimeVaryingRate or = (MetricsTimeVaryingRate) o;
+      if (attributeName.endsWith(NUM_OPS))
+        return or.getPreviousIntervalNumOps();
+      else if (attributeName.endsWith(AVG_TIME))
+        return or.getPreviousIntervalAverageTime();
+      else if (attributeName.endsWith(MIN_TIME))
+        return or.getMinTime();
+      else if (attributeName.endsWith(MAX_TIME))
+        return or.getMaxTime();
+      else {
+        MetricsUtil.LOG.error("Unexpected attrubute suffix");
+        throw new AttributeNotFoundException();
+      }
+    } else {
+        MetricsUtil.LOG.error("unknown metrics type: " + o.getClass().getName());
+        throw new AttributeNotFoundException();
+    }
+  }
+
+  @Override
+  public AttributeList getAttributes(String[] attributeNames) {
+    if (attributeNames == null || attributeNames.length == 0) 
+      throw new IllegalArgumentException();
+    
+    updateMbeanInfoIfMetricsListChanged();
+    
+    AttributeList result = new AttributeList(attributeNames.length);
+    for (String iAttributeName : attributeNames) {
+      try {
+        Object value = getAttribute(iAttributeName);
+        result.add(new Attribute(iAttributeName, value));
+      } catch (Exception e) {
+        continue;
+      } 
+    }
+    return result;
+  }
+
+  @Override
+  public MBeanInfo getMBeanInfo() {
+    return mbeanInfo;
+  }
+
+  @Override
+  public Object invoke(String actionName, Object[] parms, String[] signature)
+      throws MBeanException, ReflectionException {
+    
+    if (actionName == null || actionName.equals("")) 
+      throw new IllegalArgumentException();
+    
+    
+    // Right now we support only one fixed operation (if it applies)
+    if (!(actionName.equals(RESET_ALL_MIN_MAX_OP)) || 
+        mbeanInfo.getOperations().length != 1) {
+      throw new ReflectionException(new NoSuchMethodException(actionName));
+    }
+    for (MetricsBase m : metricsRegistry.getMetricsList())  {
+      if ( MetricsTimeVaryingRate.class.isInstance(m) ) {
+        MetricsTimeVaryingRate.class.cast(m).resetMinMax();
+      }
+    }
+    return null;
+  }
+
+  @Override
+  public void setAttribute(Attribute attribute)
+      throws AttributeNotFoundException, InvalidAttributeValueException,
+      MBeanException, ReflectionException {
+    throw new ReflectionException(new NoSuchMethodException("set" + attribute));
+  }
+
+  @Override
+  public AttributeList setAttributes(AttributeList attributes) {
+    return null;
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsIntValue.java b/src/java/org/apache/hadoop/metrics/util/MetricsIntValue.java
new file mode 100644
index 00000000000..d467677aaa1
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/util/MetricsIntValue.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.util;
+
+import org.apache.hadoop.metrics.MetricsRecord;
+import org.apache.hadoop.util.StringUtils;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * The MetricsIntValue class is for a metric that is not time varied
+ * but changes only when it is set. 
+ * Each time its value is set, it is published only *once* at the next update
+ * call.
+ *
+ */
+public class MetricsIntValue extends MetricsBase {  
+
+  private static final Log LOG =
+    LogFactory.getLog("org.apache.hadoop.metrics.util");
+
+  private int value;
+  private boolean changed;
+  
+  
+  /**
+   * Constructor - create a new metric
+   * @param nam the name of the metrics to be used to publish the metric
+   * @param registry - where the metrics object will be registered
+   */
+  public MetricsIntValue(final String nam, final MetricsRegistry registry, final String description) {
+    super(nam, description);
+    value = 0;
+    changed = false;
+    registry.add(nam, this);
+  }
+  
+  /**
+   * Constructor - create a new metric
+   * @param nam the name of the metrics to be used to publish the metric
+   * @param registry - where the metrics object will be registered
+   * A description of {@link #NO_DESCRIPTION} is used
+   */
+  public MetricsIntValue(final String nam, MetricsRegistry registry) {
+    this(nam, registry, NO_DESCRIPTION);
+  }
+  
+  
+  
+  /**
+   * Set the value
+   * @param newValue
+   */
+  public synchronized void set(final int newValue) {
+    value = newValue;
+    changed = true;
+  }
+  
+  /**
+   * Get value
+   * @return the value last set
+   */
+  public synchronized int get() { 
+    return value;
+  } 
+  
+
+  /**
+   * Push the metric to the mr.
+   * The metric is pushed only if it was updated since last push
+   * 
+   * Note this does NOT push to JMX
+   * (JMX gets the info via {@link #get()}
+   *
+   * @param mr
+   */
+  public synchronized void pushMetric(final MetricsRecord mr) {
+    if (changed) {
+      try {
+        mr.setMetric(getName(), value);
+      } catch (Exception e) {
+        LOG.info("pushMetric failed for " + getName() + "\n" +
+            StringUtils.stringifyException(e));
+      }
+    }
+    changed = false;
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsLongValue.java b/src/java/org/apache/hadoop/metrics/util/MetricsLongValue.java
new file mode 100644
index 00000000000..639b6a7bd54
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/util/MetricsLongValue.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.util;
+
+import org.apache.hadoop.metrics.MetricsRecord;
+
+
+/**
+ * The MetricsLongValue class is for a metric that is not time varied
+ * but changes only when it is set. 
+ * Each time its value is set, it is published only *once* at the next update
+ * call.
+ *
+ */
+public class MetricsLongValue extends MetricsBase{  
+  private long value;
+  private boolean changed;
+  
+  /**
+   * Constructor - create a new metric
+   * @param nam the name of the metrics to be used to publish the metric
+   * @param registry - where the metrics object will be registered
+   */
+  public MetricsLongValue(final String nam, final MetricsRegistry registry, final String description) {
+    super(nam, description);
+    value = 0;
+    changed = false;
+    registry.add(nam, this);
+  }
+  
+  /**
+   * Constructor - create a new metric
+   * @param nam the name of the metrics to be used to publish the metric
+   * @param registry - where the metrics object will be registered
+   * A description of {@link #NO_DESCRIPTION} is used
+   */
+  public MetricsLongValue(final String nam, MetricsRegistry registry) {
+    this(nam, registry, NO_DESCRIPTION);
+  }
+  
+  /**
+   * Set the value
+   * @param newValue
+   */
+  public synchronized void set(final long newValue) {
+    value = newValue;
+    changed = true;
+  }
+  
+  /**
+   * Get value
+   * @return the value last set
+   */
+  public synchronized long get() { 
+    return value;
+  } 
+ 
+
+  /**
+   * Push the metric to the mr.
+   * The metric is pushed only if it was updated since last push
+   * 
+   * Note this does NOT push to JMX
+   * (JMX gets the info via {@link #get()}
+   *
+   * @param mr
+   */
+  public synchronized void pushMetric(final MetricsRecord mr) {
+    if (changed) 
+      mr.setMetric(getName(), value);
+    changed = false;
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsRegistry.java b/src/java/org/apache/hadoop/metrics/util/MetricsRegistry.java
new file mode 100644
index 00000000000..faf4b63524b
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/util/MetricsRegistry.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.util;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 
+ * This is the registry for metrics.
+ * Related set of metrics should be declared in a holding class and registered
+ * in a registry for those metrics which is also stored in the the holding class.
+ *
+ */
+public class MetricsRegistry {
+  private Map<String, MetricsBase> metricsList = new HashMap<String, MetricsBase>();
+
+  public MetricsRegistry() {
+  }
+  
+  /**
+   * 
+   * @return number of metrics in the registry
+   */
+  public int size() {
+    return metricsList.size();
+  }
+  
+  /**
+   * Add a new metrics to the registry
+   * @param metricsName - the name
+   * @param theMetricsObj - the metrics
+   * @throws IllegalArgumentException if a name is already registered
+   */
+  public synchronized void add(final String metricsName, final MetricsBase theMetricsObj) {
+    if (metricsList.containsKey(metricsName)) {
+      throw new IllegalArgumentException("Duplicate metricsName:" + metricsName);
+    }
+    metricsList.put(metricsName, theMetricsObj);
+  }
+
+  
+  /**
+   * 
+   * @param metricsName
+   * @return the metrics if there is one registered by the supplied name.
+   *         Returns null if none is registered
+   */
+  public synchronized MetricsBase get(final String metricsName) {
+    return metricsList.get(metricsName);
+  }
+  
+  
+  /**
+   * 
+   * @return the list of metrics names
+   */
+  public synchronized Collection<String> getKeyList() {
+    return metricsList.keySet();
+  }
+  
+  /**
+   * 
+   * @return the list of metrics
+   */
+  public synchronized Collection<MetricsBase> getMetricsList() {
+    return metricsList.values();
+  }
+}
diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingInt.java b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingInt.java
new file mode 100644
index 00000000000..96b4fe14880
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingInt.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.util;
+
+import org.apache.hadoop.metrics.MetricsRecord;
+import org.apache.hadoop.util.StringUtils;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * The MetricsTimeVaryingInt class is for a metric that naturally
+ * varies over time (e.g. number of files created). The metrics is accumulated
+ * over an interval (set in the metrics config file); the metrics is
+ *  published at the end of each interval and then 
+ * reset to zero. Hence the counter has the value in the current interval. 
+ * 
+ * Note if one wants a time associated with the metric then use
+ * @see org.apache.hadoop.metrics.util.MetricsTimeVaryingRate
+ *
+ */
+public class MetricsTimeVaryingInt extends MetricsBase {
+
+  private static final Log LOG =
+    LogFactory.getLog("org.apache.hadoop.metrics.util");
+  
+  private int currentValue;
+  private int previousIntervalValue;
+  
+  
+  /**
+   * Constructor - create a new metric
+   * @param nam the name of the metrics to be used to publish the metric
+   * @param registry - where the metrics object will be registered
+   * @param description - the description
+   */
+  public MetricsTimeVaryingInt(final String nam,
+                               final MetricsRegistry registry,
+                               final String description) {
+    super(nam, description);
+    currentValue = 0;
+    previousIntervalValue = 0;
+    registry.add(nam, this);
+  }
+  
+  /**
+   * Constructor - create a new metric
+   * @param nam the name of the metrics to be used to publish the metric
+   * @param registry - where the metrics object will be registered
+   * A description of {@link #NO_DESCRIPTION} is used
+   */
+  public MetricsTimeVaryingInt(final String nam, final MetricsRegistry registry) {
+    this(nam, registry, NO_DESCRIPTION);
+  }
+  
+
+  
+  /**
+   * Inc metrics for incr vlaue
+   * @param incr - number of operations
+   */
+  public synchronized void inc(final int incr) {
+    currentValue += incr;
+  }
+  
+  /**
+   * Inc metrics by one
+   */
+  public synchronized void inc() {
+    currentValue++;
+  }
+
+  private synchronized void intervalHeartBeat() {
+     previousIntervalValue = currentValue;
+     currentValue = 0;
+  }
+  
+  /**
+   * Push the delta  metrics to the mr.
+   * The delta is since the last push/interval.
+   * 
+   * Note this does NOT push to JMX
+   * (JMX gets the info via {@link #previousIntervalValue}
+   *
+   * @param mr
+   */
+  public synchronized void pushMetric(final MetricsRecord mr) {
+    intervalHeartBeat();
+    try {
+      mr.incrMetric(getName(), getPreviousIntervalValue());
+    } catch (Exception e) {
+      LOG.info("pushMetric failed for " + getName() + "\n" +
+          StringUtils.stringifyException(e));
+    }
+  }
+  
+  
+  /**
+   * The Value at the Previous interval
+   * @return prev interval value
+   */
+  public synchronized int getPreviousIntervalValue() { 
+    return previousIntervalValue;
+  }
+  
+  /**
+   * The Value at the current interval
+   * @return prev interval value
+   */
+  public synchronized int getCurrentIntervalValue() { 
+    return currentValue;
+  } 
+}
diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingLong.java b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingLong.java
new file mode 100644
index 00000000000..929303c832f
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingLong.java
@@ -0,0 +1,124 @@
+package org.apache.hadoop.metrics.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.hadoop.metrics.MetricsRecord;
+import org.apache.hadoop.util.StringUtils;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * The MetricsTimeVaryingLong class is for a metric that naturally
+ * varies over time (e.g. number of files created). The metrics is accumulated
+ * over an interval (set in the metrics config file); the metrics is
+ *  published at the end of each interval and then 
+ * reset to zero. Hence the counter has the value in the current interval. 
+ * 
+ * Note if one wants a time associated with the metric then use
+ * @see org.apache.hadoop.metrics.util.MetricsTimeVaryingRate
+ *
+ */
+public class MetricsTimeVaryingLong extends MetricsBase{
+
+  private static final Log LOG =
+    LogFactory.getLog("org.apache.hadoop.metrics.util");
+ 
+  private long currentValue;
+  private long previousIntervalValue;
+  
+  /**
+   * Constructor - create a new metric
+   * @param nam the name of the metrics to be used to publish the metric
+   * @param registry - where the metrics object will be registered
+   */
+  public MetricsTimeVaryingLong(final String nam, MetricsRegistry registry, final String description) {
+    super(nam, description);
+    currentValue = 0;
+    previousIntervalValue = 0;
+    registry.add(nam, this);
+  }
+  
+  
+  /**
+   * Constructor - create a new metric
+   * @param nam the name of the metrics to be used to publish the metric
+   * @param registry - where the metrics object will be registered
+   * A description of {@link #NO_DESCRIPTION} is used
+   */
+  public MetricsTimeVaryingLong(final String nam, MetricsRegistry registry) {
+    this(nam, registry, NO_DESCRIPTION);
+  }
+  
+  /**
+   * Inc metrics for incr vlaue
+   * @param incr - number of operations
+   */
+  public synchronized void inc(final long incr) {
+    currentValue += incr;
+  }
+  
+  /**
+   * Inc metrics by one
+   */
+  public synchronized void inc() {
+    currentValue++;
+  }
+
+  private synchronized void intervalHeartBeat() {
+     previousIntervalValue = currentValue;
+     currentValue = 0;
+  }
+  
+  /**
+   * Push the delta  metrics to the mr.
+   * The delta is since the last push/interval.
+   * 
+   * Note this does NOT push to JMX
+   * (JMX gets the info via {@link #previousIntervalValue}
+   *
+   * @param mr
+   */
+  public synchronized void pushMetric(final MetricsRecord mr) {
+    intervalHeartBeat();
+    try {
+      mr.incrMetric(getName(), getPreviousIntervalValue());
+    } catch (Exception e) {
+      LOG.info("pushMetric failed for " + getName() + "\n" +
+          StringUtils.stringifyException(e));
+    }
+  }
+  
+  
+  /**
+   * The Value at the Previous interval
+   * @return prev interval value
+   */
+  public synchronized long getPreviousIntervalValue() { 
+    return previousIntervalValue;
+  } 
+  
+  /**
+   * The Value at the current interval
+   * @return prev interval value
+   */
+  public synchronized long getCurrentIntervalValue() { 
+    return currentValue;
+  } 
+}
diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingRate.java b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingRate.java
new file mode 100644
index 00000000000..7d05af325da
--- /dev/null
+++ b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingRate.java
@@ -0,0 +1,196 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.metrics.util;
+
+import org.apache.hadoop.metrics.MetricsRecord;
+import org.apache.hadoop.util.StringUtils;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * The MetricsTimeVaryingRate class is for a rate based metric that
+ * naturally varies over time (e.g. time taken to create a file).
+ * The rate is averaged at each interval heart beat (the interval
+ * is set in the metrics config file).
+ * This class also keeps track of the min and max rates along with 
+ * a method to reset the min-max.
+ *
+ */
+public class MetricsTimeVaryingRate extends MetricsBase {
+
+  private static final Log LOG =
+    LogFactory.getLog("org.apache.hadoop.metrics.util");
+
+  static class Metrics {
+    int numOperations = 0;
+    long time = 0;  // total time or average time
+
+    void set(final Metrics resetTo) {
+      numOperations = resetTo.numOperations;
+      time = resetTo.time;
+    }
+    
+    void reset() {
+      numOperations = 0;
+      time = 0;
+    }
+  }
+  
+  static class MinMax {
+    long minTime = -1;
+    long maxTime = 0;
+    
+    void set(final MinMax newVal) {
+      minTime = newVal.minTime;
+      maxTime = newVal.maxTime;
+    }
+    
+    void reset() {
+      minTime = -1;
+      maxTime = 0;
+    }
+    void update(final long time) { // update min max
+      minTime = (minTime == -1) ? time : Math.min(minTime, time);
+      minTime = Math.min(minTime, time);
+      maxTime = Math.max(maxTime, time);
+    }
+  }
+  private Metrics currentData;
+  private Metrics previousIntervalData;
+  private MinMax minMax;
+  
+  
+  /**
+   * Constructor - create a new metric
+   * @param nam the name of the metrics to be used to publish the metric
+   * @param registry - where the metrics object will be registered
+   */
+  public MetricsTimeVaryingRate(final String nam, final MetricsRegistry registry, final String description) {
+    super(nam, description);
+    currentData = new Metrics();
+    previousIntervalData = new Metrics();
+    minMax = new MinMax();
+    registry.add(nam, this);
+  }
+  
+  /**
+   * Constructor - create a new metric
+   * @param nam the name of the metrics to be used to publish the metric
+   * @param registry - where the metrics object will be registered
+   * A description of {@link #NO_DESCRIPTION} is used
+   */
+  public MetricsTimeVaryingRate(final String nam, MetricsRegistry registry) {
+    this(nam, registry, NO_DESCRIPTION);
+
+  }
+  
+  
+  /**
+   * Increment the metrics for numOps operations
+   * @param numOps - number of operations
+   * @param time - time for numOps operations
+   */
+  public synchronized void inc(final int numOps, final long time) {
+    currentData.numOperations += numOps;
+    currentData.time += time;
+    long timePerOps = time/numOps;
+    minMax.update(timePerOps);
+  }
+  
+  /**
+   * Increment the metrics for one operation
+   * @param time for one operation
+   */
+  public synchronized void inc(final long time) {
+    currentData.numOperations++;
+    currentData.time += time;
+    minMax.update(time);
+  }
+  
+  
+
+  private synchronized void intervalHeartBeat() {
+     previousIntervalData.numOperations = currentData.numOperations;
+     previousIntervalData.time = (currentData.numOperations == 0) ?
+                             0 : currentData.time / currentData.numOperations;
+     currentData.reset();
+  }
+  
+  /**
+   * Push the delta  metrics to the mr.
+   * The delta is since the last push/interval.
+   * 
+   * Note this does NOT push to JMX
+   * (JMX gets the info via {@link #getPreviousIntervalAverageTime()} and
+   * {@link #getPreviousIntervalNumOps()}
+   *
+   * @param mr
+   */
+  public synchronized void pushMetric(final MetricsRecord mr) {
+    intervalHeartBeat();
+    try {
+      mr.incrMetric(getName() + "_num_ops", getPreviousIntervalNumOps());
+      mr.setMetric(getName() + "_avg_time", getPreviousIntervalAverageTime());
+    } catch (Exception e) {
+      LOG.info("pushMetric failed for " + getName() + "\n" +
+          StringUtils.stringifyException(e));
+    }
+  }
+  
+  /**
+   * The number of operations in the previous interval
+   * @return - ops in prev interval
+   */
+  public synchronized int getPreviousIntervalNumOps() { 
+    return previousIntervalData.numOperations;
+  }
+  
+  /**
+   * The average rate of an operation in the previous interval
+   * @return - the average rate.
+   */
+  public synchronized long getPreviousIntervalAverageTime() {
+    return previousIntervalData.time;
+  } 
+  
+  /**
+   * The min time for a single operation since the last reset
+   *  {@link #resetMinMax()}
+   * @return min time for an operation
+   */
+  public synchronized long getMinTime() {
+    return  minMax.minTime;
+  }
+  
+  /**
+   * The max time for a single operation since the last reset
+   *  {@link #resetMinMax()}
+   * @return max time for an operation
+   */
+  public synchronized long getMaxTime() {
+    return minMax.maxTime;
+  }
+  
+  /**
+   * Reset the min max values
+   */
+  public synchronized void resetMinMax() {
+    minMax.reset();
+  }
+}
diff --git a/src/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java b/src/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java
new file mode 100644
index 00000000000..0490e3cabf4
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * A cached implementation of DNSToSwitchMapping that takes an
+ * raw DNSToSwitchMapping and stores the resolved network location in 
+ * a cache. The following calls to a resolved network location
+ * will get its location from the cache. 
+ *
+ */
+public class CachedDNSToSwitchMapping implements DNSToSwitchMapping {
+  private Map<String, String> cache = new ConcurrentHashMap<String, String>();
+  protected DNSToSwitchMapping rawMapping;
+  
+  public CachedDNSToSwitchMapping(DNSToSwitchMapping rawMapping) {
+    this.rawMapping = rawMapping;
+  }
+  
+  public List<String> resolve(List<String> names) {
+    // normalize all input names to be in the form of IP addresses
+    names = NetUtils.normalizeHostNames(names);
+    
+    List <String> result = new ArrayList<String>(names.size());
+    if (names.isEmpty()) {
+      return result;
+    }
+
+
+    // find out all names without cached resolved location
+    List<String> unCachedHosts = new ArrayList<String>(names.size());
+    for (String name : names) {
+      if (cache.get(name) == null) {
+        unCachedHosts.add(name);
+      } 
+    }
+    
+    // Resolve those names
+    List<String> rNames = rawMapping.resolve(unCachedHosts);
+    
+    // Cache the result
+    if (rNames != null) {
+      for (int i=0; i<unCachedHosts.size(); i++) {
+        cache.put(unCachedHosts.get(i), rNames.get(i));
+      }
+    }
+    
+    // Construct the result
+    for (String name : names) {
+      //now everything is in the cache
+      String networkLocation = cache.get(name);
+      if (networkLocation != null) {
+        result.add(networkLocation);
+      } else { //resolve all or nothing
+        return null;
+      }
+    }
+    return result;
+  }
+}
diff --git a/src/java/org/apache/hadoop/net/DNS.java b/src/java/org/apache/hadoop/net/DNS.java
new file mode 100644
index 00000000000..97fec6e3052
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/DNS.java
@@ -0,0 +1,279 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.net;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.net.InetAddress;
+import java.net.NetworkInterface;
+import java.net.SocketException;
+import java.net.UnknownHostException;
+import java.util.Enumeration;
+import java.util.Vector;
+
+import javax.naming.NamingException;
+import javax.naming.directory.Attributes;
+import javax.naming.directory.DirContext;
+import javax.naming.directory.InitialDirContext;
+
+/**
+ * 
+ * A class that provides direct and reverse lookup functionalities, allowing
+ * the querying of specific network interfaces or nameservers.
+ * 
+ * 
+ */
+public class DNS {
+
+  private static final Log LOG = LogFactory.getLog(DNS.class);
+
+  /**
+   * The cached hostname -initially null.
+   */
+
+  private static final String cachedHostname = resolveLocalHostname();
+  private static final String cachedHostAddress = resolveLocalHostIPAddress();
+  private static final String LOCALHOST = "localhost";
+
+  /**
+   * Returns the hostname associated with the specified IP address by the
+   * provided nameserver.
+   *
+   * Loopback addresses 
+   * @param hostIp The address to reverse lookup
+   * @param ns The host name of a reachable DNS server
+   * @return The host name associated with the provided IP
+   * @throws NamingException If a NamingException is encountered
+   */
+  public static String reverseDns(InetAddress hostIp, String ns)
+    throws NamingException {
+    //
+    // Builds the reverse IP lookup form
+    // This is formed by reversing the IP numbers and appending in-addr.arpa
+    //
+    String[] parts = hostIp.getHostAddress().split("\\.");
+    String reverseIP = parts[3] + "." + parts[2] + "." + parts[1] + "."
+      + parts[0] + ".in-addr.arpa";
+
+    DirContext ictx = new InitialDirContext();
+    Attributes attribute;
+    try {
+      attribute = ictx.getAttributes("dns://"               // Use "dns:///" if the default
+                         + ((ns == null) ? "" : ns) +
+                         // nameserver is to be used
+                         "/" + reverseIP, new String[] { "PTR" });
+    } finally {
+      ictx.close();
+    }
+
+    return attribute.get("PTR").get().toString();
+  }
+
+  /**
+   * Returns all the IPs associated with the provided interface, if any, in
+   * textual form.
+   * 
+   * @param strInterface
+   *            The name of the network interface to query (e.g. eth0)
+   * @return A string vector of all the IPs associated with the provided
+   *         interface
+   * @throws UnknownHostException
+   *             If an UnknownHostException is encountered in querying the
+   *             default interface
+   * 
+   */
+  public static String[] getIPs(String strInterface)
+    throws UnknownHostException {
+    try {
+      NetworkInterface netIF = NetworkInterface.getByName(strInterface);
+      if (netIF == null) {
+        return new String[] { cachedHostAddress };
+      } else {
+        Vector<String> ips = new Vector<String>();
+        Enumeration e = netIF.getInetAddresses();
+        while (e.hasMoreElements()) {
+          ips.add(((InetAddress) e.nextElement()).getHostAddress());
+        }
+        return ips.toArray(new String[] {});
+      }
+    } catch (SocketException e) {
+      return new String[]  { cachedHostAddress };
+    }
+  }
+
+
+    /**
+   * Returns the first available IP address associated with the provided
+   * network interface
+   *
+   * @param strInterface
+   *            The name of the network interface to query (e.g. eth0)
+   * @return The IP address in text form
+   * @throws UnknownHostException
+   *             If one is encountered in querying the default interface
+   */
+  public static String getDefaultIP(String strInterface)
+    throws UnknownHostException {
+    String[] ips = getIPs(strInterface);
+    return ips[0];
+  }
+
+  /**
+   * Returns all the host names associated by the provided nameserver with the
+   * address bound to the specified network interface
+   *
+   * @param strInterface
+   *            The name of the network interface to query (e.g. eth0)
+   * @param nameserver
+   *            The DNS host name
+   * @return A string vector of all host names associated with the IPs tied to
+   *         the specified interface
+   * @throws UnknownHostException if the hostname cannot be determined
+   */
+  public static String[] getHosts(String strInterface, String nameserver)
+    throws UnknownHostException {
+    String[] ips = getIPs(strInterface);
+    Vector<String> hosts = new Vector<String>();
+    for (int ctr = 0; ctr < ips.length; ctr++)
+      try {
+        hosts.add(reverseDns(InetAddress.getByName(ips[ctr]),
+                             nameserver));
+      } catch (UnknownHostException ignored) {
+      } catch (NamingException ignored) {
+      }
+
+    if (hosts.isEmpty()) {
+      return new String[] { cachedHostname };
+    } else {
+      return hosts.toArray(new String[hosts.size()]);
+    }
+  }
+
+
+  /**
+   * Determine the local hostname; retrieving it from cache if it is known
+   * If we cannot determine our host name, return "localhost"
+   * @return the local hostname or "localhost"
+   */
+  private static String resolveLocalHostname() {
+    String localhost;
+    try {
+      localhost = InetAddress.getLocalHost().getCanonicalHostName();
+    } catch (UnknownHostException e) {
+      LOG.info("Unable to determine local hostname "
+              + "-falling back to \"" + LOCALHOST + "\"", e);
+      localhost = LOCALHOST;
+    }
+    return localhost;
+  }
+
+
+  /**
+   * Get the IPAddress of the local host as a string.
+   * This will be a loop back value if the local host address cannot be
+   * determined.
+   * If the loopback address of "localhost" does not resolve, then the system's
+   * network is in such a state that nothing is going to work. A message is
+   * logged at the error level and a null pointer returned, a pointer
+   * which will trigger failures later on the application
+   * @return the IPAddress of the local host or null for a serious problem.
+   */
+  private static String resolveLocalHostIPAddress() {
+    String address;
+      try {
+        address = InetAddress.getLocalHost().getHostAddress();
+      } catch (UnknownHostException e) {
+        LOG.info("Unable to determine address of the host"
+                + "-falling back to \"" + LOCALHOST + "\" address", e);
+        try {
+          address = InetAddress.getByName(LOCALHOST).getHostAddress();
+        } catch (UnknownHostException noLocalHostAddressException) {
+          //at this point, deep trouble
+          LOG.error("Unable to determine local loopback address "
+                  + "of \"" + LOCALHOST + "\" " +
+                  "-this system's network configuration is unsupported", e);
+          address = null;
+        }
+      }
+    return address;
+  }
+
+    /**
+   * Returns all the host names associated by the default nameserver with the
+   * address bound to the specified network interface
+   * 
+   * @param strInterface
+   *            The name of the network interface to query (e.g. eth0)
+   * @return The list of host names associated with IPs bound to the network
+   *         interface
+   * @throws UnknownHostException
+   *             If one is encountered while querying the default interface
+   * 
+   */
+  public static String[] getHosts(String strInterface)
+    throws UnknownHostException {
+    return getHosts(strInterface, null);
+  }
+
+  /**
+   * Returns the default (first) host name associated by the provided
+   * nameserver with the address bound to the specified network interface
+   * 
+   * @param strInterface
+   *            The name of the network interface to query (e.g. eth0)
+   * @param nameserver
+   *            The DNS host name
+   * @return The default host names associated with IPs bound to the network
+   *         interface
+   * @throws UnknownHostException
+   *             If one is encountered while querying the default interface
+   */
+  public static String getDefaultHost(String strInterface, String nameserver)
+    throws UnknownHostException {
+    if ("default".equals(strInterface)) {
+      return cachedHostname;
+    }
+
+    if ("default".equals(nameserver)) {
+      return getDefaultHost(strInterface);
+    }
+
+    String[] hosts = getHosts(strInterface, nameserver);
+    return hosts[0];
+  }
+
+  /**
+   * Returns the default (first) host name associated by the default
+   * nameserver with the address bound to the specified network interface
+   * 
+   * @param strInterface
+   *            The name of the network interface to query (e.g. eth0).
+   *            Must not be null.
+   * @return The default host name associated with IPs bound to the network
+   *         interface
+   * @throws UnknownHostException
+   *             If one is encountered while querying the default interface
+   */
+  public static String getDefaultHost(String strInterface)
+    throws UnknownHostException {
+    return getDefaultHost(strInterface, null);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/net/DNSToSwitchMapping.java b/src/java/org/apache/hadoop/net/DNSToSwitchMapping.java
new file mode 100644
index 00000000000..f71b95025d1
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/DNSToSwitchMapping.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net;
+
+import java.util.List;
+
+/**
+ * An interface that should be implemented to allow pluggable 
+ * DNS-name/IP-address to RackID resolvers.
+ *
+ */
+public interface DNSToSwitchMapping {
+  /**
+   * Resolves a list of DNS-names/IP-addresses and returns back a list of
+   * switch information (network paths). One-to-one correspondence must be 
+   * maintained between the elements in the lists. 
+   * Consider an element in the argument list - x.y.com. The switch information
+   * that is returned must be a network path of the form /foo/rack, 
+   * where / is the root, and 'foo' is the switch where 'rack' is connected.
+   * Note the hostname/ip-address is not part of the returned path.
+   * The network topology of the cluster would determine the number of
+   * components in the network path.
+   * @param names
+   * @return list of resolved network paths
+   */
+  public List<String> resolve(List<String> names);
+}
diff --git a/src/java/org/apache/hadoop/net/NetUtils.java b/src/java/org/apache/hadoop/net/NetUtils.java
new file mode 100644
index 00000000000..ce07fab858e
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/NetUtils.java
@@ -0,0 +1,440 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.Socket;
+import java.net.SocketAddress;
+import java.net.URI;
+import java.net.UnknownHostException;
+import java.nio.channels.SocketChannel;
+import java.util.Map.Entry;
+import java.util.regex.Pattern;
+import java.util.*;
+
+import javax.net.SocketFactory;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.ipc.VersionedProtocol;
+import org.apache.hadoop.util.ReflectionUtils;
+
+public class NetUtils {
+  private static final Log LOG = LogFactory.getLog(NetUtils.class);
+  
+  private static Map<String, String> hostToResolved = 
+                                     new HashMap<String, String>();
+
+  /**
+   * Get the socket factory for the given class according to its
+   * configuration parameter
+   * <tt>hadoop.rpc.socket.factory.class.&lt;ClassName&gt;</tt>. When no
+   * such parameter exists then fall back on the default socket factory as
+   * configured by <tt>hadoop.rpc.socket.factory.class.default</tt>. If
+   * this default socket factory is not configured, then fall back on the JVM
+   * default socket factory.
+   * 
+   * @param conf the configuration
+   * @param clazz the class (usually a {@link VersionedProtocol})
+   * @return a socket factory
+   */
+  public static SocketFactory getSocketFactory(Configuration conf,
+      Class<?> clazz) {
+
+    SocketFactory factory = null;
+
+    String propValue =
+        conf.get("hadoop.rpc.socket.factory.class." + clazz.getSimpleName());
+    if ((propValue != null) && (propValue.length() > 0))
+      factory = getSocketFactoryFromProperty(conf, propValue);
+
+    if (factory == null)
+      factory = getDefaultSocketFactory(conf);
+
+    return factory;
+  }
+
+  /**
+   * Get the default socket factory as specified by the configuration
+   * parameter <tt>hadoop.rpc.socket.factory.default</tt>
+   * 
+   * @param conf the configuration
+   * @return the default socket factory as specified in the configuration or
+   *         the JVM default socket factory if the configuration does not
+   *         contain a default socket factory property.
+   */
+  public static SocketFactory getDefaultSocketFactory(Configuration conf) {
+
+    String propValue = conf.get("hadoop.rpc.socket.factory.class.default");
+    if ((propValue == null) || (propValue.length() == 0))
+      return SocketFactory.getDefault();
+
+    return getSocketFactoryFromProperty(conf, propValue);
+  }
+
+  /**
+   * Get the socket factory corresponding to the given proxy URI. If the
+   * given proxy URI corresponds to an absence of configuration parameter,
+   * returns null. If the URI is malformed raises an exception.
+   * 
+   * @param propValue the property which is the class name of the
+   *        SocketFactory to instantiate; assumed non null and non empty.
+   * @return a socket factory as defined in the property value.
+   */
+  public static SocketFactory getSocketFactoryFromProperty(
+      Configuration conf, String propValue) {
+
+    try {
+      Class<?> theClass = conf.getClassByName(propValue);
+      return (SocketFactory) ReflectionUtils.newInstance(theClass, conf);
+
+    } catch (ClassNotFoundException cnfe) {
+      throw new RuntimeException("Socket Factory class not found: " + cnfe);
+    }
+  }
+
+  /**
+   * Util method to build socket addr from either:
+   *   <host>:<post>
+   *   <fs>://<host>:<port>/<path>
+   */
+  public static InetSocketAddress createSocketAddr(String target) {
+    return createSocketAddr(target, -1);
+  }
+
+  /**
+   * Util method to build socket addr from either:
+   *   <host>
+   *   <host>:<post>
+   *   <fs>://<host>:<port>/<path>
+   */
+  public static InetSocketAddress createSocketAddr(String target,
+                                                   int defaultPort) {
+    int colonIndex = target.indexOf(':');
+    if (colonIndex < 0 && defaultPort == -1) {
+      throw new RuntimeException("Not a host:port pair: " + target);
+    }
+    String hostname;
+    int port = -1;
+    if (!target.contains("/")) {
+      if (colonIndex == -1) {
+        hostname = target;
+      } else {
+        // must be the old style <host>:<port>
+        hostname = target.substring(0, colonIndex);
+        port = Integer.parseInt(target.substring(colonIndex + 1));
+      }
+    } else {
+      // a new uri
+      URI addr = new Path(target).toUri();
+      hostname = addr.getHost();
+      port = addr.getPort();
+    }
+
+    if (port == -1) {
+      port = defaultPort;
+    }
+  
+    if (getStaticResolution(hostname) != null) {
+      hostname = getStaticResolution(hostname);
+    }
+    return new InetSocketAddress(hostname, port);
+  }
+
+  /**
+   * Adds a static resolution for host. This can be used for setting up
+   * hostnames with names that are fake to point to a well known host. For e.g.
+   * in some testcases we require to have daemons with different hostnames
+   * running on the same machine. In order to create connections to these
+   * daemons, one can set up mappings from those hostnames to "localhost".
+   * {@link NetUtils#getStaticResolution(String)} can be used to query for
+   * the actual hostname. 
+   * @param host
+   * @param resolvedName
+   */
+  public static void addStaticResolution(String host, String resolvedName) {
+    synchronized (hostToResolved) {
+      hostToResolved.put(host, resolvedName);
+    }
+  }
+  
+  /**
+   * Retrieves the resolved name for the passed host. The resolved name must
+   * have been set earlier using 
+   * {@link NetUtils#addStaticResolution(String, String)}
+   * @param host
+   * @return the resolution
+   */
+  public static String getStaticResolution(String host) {
+    synchronized (hostToResolved) {
+      return hostToResolved.get(host);
+    }
+  }
+  
+  /**
+   * This is used to get all the resolutions that were added using
+   * {@link NetUtils#addStaticResolution(String, String)}. The return
+   * value is a List each element of which contains an array of String 
+   * of the form String[0]=hostname, String[1]=resolved-hostname
+   * @return the list of resolutions
+   */
+  public static List <String[]> getAllStaticResolutions() {
+    synchronized (hostToResolved) {
+      Set <Entry <String, String>>entries = hostToResolved.entrySet();
+      if (entries.size() == 0) {
+        return null;
+      }
+      List <String[]> l = new ArrayList<String[]>(entries.size());
+      for (Entry<String, String> e : entries) {
+        l.add(new String[] {e.getKey(), e.getValue()});
+      }
+    return l;
+    }
+  }
+  
+  /**
+   * Returns InetSocketAddress that a client can use to 
+   * connect to the server. Server.getListenerAddress() is not correct when
+   * the server binds to "0.0.0.0". This returns "127.0.0.1:port" when
+   * the getListenerAddress() returns "0.0.0.0:port".
+   * 
+   * @param server
+   * @return socket address that a client can use to connect to the server.
+   */
+  public static InetSocketAddress getConnectAddress(Server server) {
+    InetSocketAddress addr = server.getListenerAddress();
+    if (addr.getAddress().getHostAddress().equals("0.0.0.0")) {
+      addr = new InetSocketAddress("127.0.0.1", addr.getPort());
+    }
+    return addr;
+  }
+  
+  /**
+   * Same as getInputStream(socket, socket.getSoTimeout()).<br><br>
+   * 
+   * From documentation for {@link #getInputStream(Socket, long)}:<br>
+   * Returns InputStream for the socket. If the socket has an associated
+   * SocketChannel then it returns a 
+   * {@link SocketInputStream} with the given timeout. If the socket does not
+   * have a channel, {@link Socket#getInputStream()} is returned. In the later
+   * case, the timeout argument is ignored and the timeout set with 
+   * {@link Socket#setSoTimeout(int)} applies for reads.<br><br>
+   *
+   * Any socket created using socket factories returned by {@link #NetUtils},
+   * must use this interface instead of {@link Socket#getInputStream()}.
+   *     
+   * @see #getInputStream(Socket, long)
+   * 
+   * @param socket
+   * @return InputStream for reading from the socket.
+   * @throws IOException
+   */
+  public static InputStream getInputStream(Socket socket) 
+                                           throws IOException {
+    return getInputStream(socket, socket.getSoTimeout());
+  }
+  
+  /**
+   * Returns InputStream for the socket. If the socket has an associated
+   * SocketChannel then it returns a 
+   * {@link SocketInputStream} with the given timeout. If the socket does not
+   * have a channel, {@link Socket#getInputStream()} is returned. In the later
+   * case, the timeout argument is ignored and the timeout set with 
+   * {@link Socket#setSoTimeout(int)} applies for reads.<br><br>
+   * 
+   * Any socket created using socket factories returned by {@link #NetUtils},
+   * must use this interface instead of {@link Socket#getInputStream()}.
+   *     
+   * @see Socket#getChannel()
+   * 
+   * @param socket
+   * @param timeout timeout in milliseconds. This may not always apply. zero
+   *        for waiting as long as necessary.
+   * @return InputStream for reading from the socket.
+   * @throws IOException
+   */
+  public static InputStream getInputStream(Socket socket, long timeout) 
+                                           throws IOException {
+    return (socket.getChannel() == null) ? 
+          socket.getInputStream() : new SocketInputStream(socket, timeout);
+  }
+  
+  /**
+   * Same as getOutputStream(socket, 0). Timeout of zero implies write will
+   * wait until data is available.<br><br>
+   * 
+   * From documentation for {@link #getOutputStream(Socket, long)} : <br>
+   * Returns OutputStream for the socket. If the socket has an associated
+   * SocketChannel then it returns a 
+   * {@link SocketOutputStream} with the given timeout. If the socket does not
+   * have a channel, {@link Socket#getOutputStream()} is returned. In the later
+   * case, the timeout argument is ignored and the write will wait until 
+   * data is available.<br><br>
+   * 
+   * Any socket created using socket factories returned by {@link #NetUtils},
+   * must use this interface instead of {@link Socket#getOutputStream()}.
+   * 
+   * @see #getOutputStream(Socket, long)
+   * 
+   * @param socket
+   * @return OutputStream for writing to the socket.
+   * @throws IOException
+   */  
+  public static OutputStream getOutputStream(Socket socket) 
+                                             throws IOException {
+    return getOutputStream(socket, 0);
+  }
+  
+  /**
+   * Returns OutputStream for the socket. If the socket has an associated
+   * SocketChannel then it returns a 
+   * {@link SocketOutputStream} with the given timeout. If the socket does not
+   * have a channel, {@link Socket#getOutputStream()} is returned. In the later
+   * case, the timeout argument is ignored and the write will wait until 
+   * data is available.<br><br>
+   * 
+   * Any socket created using socket factories returned by {@link #NetUtils},
+   * must use this interface instead of {@link Socket#getOutputStream()}.
+   * 
+   * @see Socket#getChannel()
+   * 
+   * @param socket
+   * @param timeout timeout in milliseconds. This may not always apply. zero
+   *        for waiting as long as necessary.
+   * @return OutputStream for writing to the socket.
+   * @throws IOException   
+   */
+  public static OutputStream getOutputStream(Socket socket, long timeout) 
+                                             throws IOException {
+    return (socket.getChannel() == null) ? 
+            socket.getOutputStream() : new SocketOutputStream(socket, timeout);            
+  }
+  
+  /**
+   * This is a drop-in replacement for 
+   * {@link Socket#connect(SocketAddress, int)}.
+   * In the case of normal sockets that don't have associated channels, this 
+   * just invokes <code>socket.connect(endpoint, timeout)</code>. If 
+   * <code>socket.getChannel()</code> returns a non-null channel,
+   * connect is implemented using Hadoop's selectors. This is done mainly
+   * to avoid Sun's connect implementation from creating thread-local 
+   * selectors, since Hadoop does not have control on when these are closed
+   * and could end up taking all the available file descriptors.
+   * 
+   * @see java.net.Socket#connect(java.net.SocketAddress, int)
+   * 
+   * @param socket
+   * @param endpoint 
+   * @param timeout - timeout in milliseconds
+   */
+  public static void connect(Socket socket, 
+                             SocketAddress endpoint, 
+                             int timeout) throws IOException {
+    if (socket == null || endpoint == null || timeout < 0) {
+      throw new IllegalArgumentException("Illegal argument for connect()");
+    }
+    
+    SocketChannel ch = socket.getChannel();
+    
+    if (ch == null) {
+      // let the default implementation handle it.
+      socket.connect(endpoint, timeout);
+    } else {
+      SocketIOWithTimeout.connect(ch, endpoint, timeout);
+    }
+  }
+  
+  /** 
+   * Given a string representation of a host, return its ip address
+   * in textual presentation.
+   * 
+   * @param name a string representation of a host:
+   *             either a textual representation its IP address or its host name
+   * @return its IP address in the string format
+   */
+  public static String normalizeHostName(String name) {
+    if (Character.digit(name.charAt(0), 16) != -1) { // it is an IP
+      return name;
+    } else {
+      try {
+        InetAddress ipAddress = InetAddress.getByName(name);
+        return ipAddress.getHostAddress();
+      } catch (UnknownHostException e) {
+        return name;
+      }
+    }
+  }
+  
+  /** 
+   * Given a collection of string representation of hosts, return a list of
+   * corresponding IP addresses in the textual representation.
+   * 
+   * @param names a collection of string representations of hosts
+   * @return a list of corresponding IP addresses in the string format
+   * @see #normalizeHostName(String)
+   */
+  public static List<String> normalizeHostNames(Collection<String> names) {
+    List<String> hostNames = new ArrayList<String>(names.size());
+    for (String name : names) {
+      hostNames.add(normalizeHostName(name));
+    }
+    return hostNames;
+  }
+
+  /**
+   * Attempt to obtain the host name of a name specified by ip address.  
+   * Check that the node name is an ip addr and if so, attempt to determine
+   * its host name.  If the name is not an IP addr, or the actual name cannot
+   * be determined, return null.
+   * 
+   * @return Host name or null
+   */
+  private static final Pattern ipPattern = // Pattern for matching hostname to ip:port
+    Pattern.compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:?\\d*");
+  public static String getHostNameOfIP(String ip) {
+    // If name is not an ip addr, don't bother looking it up
+    if(!ipPattern.matcher(ip).matches())
+      return null;
+    
+    String hostname = "";
+    try {
+      String n = ip.substring(0, ip.indexOf(':'));
+      hostname = InetAddress.getByName(n).getHostName();
+    } catch (UnknownHostException e) {
+      return null;
+    }
+    
+    return hostname; 
+  }
+
+  /**
+   * Return hostname without throwing exception.
+   * @return hostname
+   */
+  public static String getHostname() {
+    try {return "" + InetAddress.getLocalHost();}
+    catch(UnknownHostException uhe) {return "" + uhe;}
+  }
+}
diff --git a/src/java/org/apache/hadoop/net/NetworkTopology.java b/src/java/org/apache/hadoop/net/NetworkTopology.java
new file mode 100644
index 00000000000..1de588bd43f
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/NetworkTopology.java
@@ -0,0 +1,655 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Random;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/** The class represents a cluster of computer with a tree hierarchical
+ * network topology.
+ * For example, a cluster may be consists of many data centers filled 
+ * with racks of computers.
+ * In a network topology, leaves represent data nodes (computers) and inner
+ * nodes represent switches/routers that manage traffic in/out of data centers
+ * or racks.  
+ * 
+ */
+public class NetworkTopology {
+  public final static String DEFAULT_RACK = "/default-rack";
+  public final static int DEFAULT_HOST_LEVEL = 2;
+  public static final Log LOG = 
+    LogFactory.getLog(NetworkTopology.class);
+    
+  /* Inner Node represent a switch/router of a data center or rack.
+   * Different from a leave node, it has non-null children.
+   */
+  private class InnerNode extends NodeBase {
+    private ArrayList<Node> children=new ArrayList<Node>();
+    private int numOfLeaves;
+        
+    /** Construct an InnerNode from a path-like string */
+    InnerNode(String path) {
+      super(path);
+    }
+        
+    /** Construct an InnerNode from its name and its network location */
+    InnerNode(String name, String location) {
+      super(name, location);
+    }
+        
+    /** Construct an InnerNode
+     * from its name, its network location, its parent, and its level */
+    InnerNode(String name, String location, InnerNode parent, int level) {
+      super(name, location, parent, level);
+    }
+        
+    /** Get its children */
+    Collection<Node> getChildren() {return children;}
+        
+    /** Return the number of children this node has */
+    int getNumOfChildren() {
+      return children.size();
+    }
+        
+    /** Judge if this node represents a rack 
+     * Return true if it has no child or its children are not InnerNodes
+     */ 
+    boolean isRack() {
+      if (children.isEmpty()) {
+        return true;
+      }
+            
+      Node firstChild = children.get(0);
+      if (firstChild instanceof InnerNode) {
+        return false;
+      }
+            
+      return true;
+    }
+        
+    /** Judge if this node is an ancestor of node <i>n</i>
+     * 
+     * @param n a node
+     * @return true if this node is an ancestor of <i>n</i>
+     */
+    boolean isAncestor(Node n) {
+      return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) ||
+        (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR).
+        startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR);
+    }
+        
+    /** Judge if this node is the parent of node <i>n</i>
+     * 
+     * @param n a node
+     * @return true if this node is the parent of <i>n</i>
+     */
+    boolean isParent(Node n) {
+      return n.getNetworkLocation().equals(getPath(this));
+    }
+        
+    /* Return a child name of this node who is an ancestor of node <i>n</i> */
+    private String getNextAncestorName(Node n) {
+      if (!isAncestor(n)) {
+        throw new IllegalArgumentException(
+                                           this + "is not an ancestor of " + n);
+      }
+      String name = n.getNetworkLocation().substring(getPath(this).length());
+      if (name.charAt(0) == PATH_SEPARATOR) {
+        name = name.substring(1);
+      }
+      int index=name.indexOf(PATH_SEPARATOR);
+      if (index !=-1)
+        name = name.substring(0, index);
+      return name;
+    }
+        
+    /** Add node <i>n</i> to the subtree of this node 
+     * @param n node to be added
+     * @return true if the node is added; false otherwise
+     */
+    boolean add(Node n) {
+      if (!isAncestor(n))
+        throw new IllegalArgumentException(n.getName()+", which is located at "
+                +n.getNetworkLocation()+", is not a decendent of "
+                +getPath(this));
+      if (isParent(n)) {
+        // this node is the parent of n; add n directly
+        n.setParent(this);
+        n.setLevel(this.level+1);
+        for(int i=0; i<children.size(); i++) {
+          if (children.get(i).getName().equals(n.getName())) {
+            children.set(i, n);
+            return false;
+          }
+        }
+        children.add(n);
+        numOfLeaves++;
+        return true;
+      } else {
+        // find the next ancestor node
+        String parentName = getNextAncestorName(n);
+        InnerNode parentNode = null;
+        for(int i=0; i<children.size(); i++) {
+          if (children.get(i).getName().equals(parentName)) {
+            parentNode = (InnerNode)children.get(i);
+            break;
+          }
+        }
+        if (parentNode == null) {
+          // create a new InnerNode
+          parentNode = new InnerNode(parentName, getPath(this),
+                                     this, this.getLevel()+1);
+          children.add(parentNode);
+        }
+        // add n to the subtree of the next ancestor node
+        if (parentNode.add(n)) {
+          numOfLeaves++;
+          return true;
+        } else {
+          return false;
+        }
+      }
+    }
+        
+    /** Remove node <i>n</i> from the subtree of this node
+     * @param n node to be deleted 
+     * @return true if the node is deleted; false otherwise
+     */
+    boolean remove(Node n) {
+      String parent = n.getNetworkLocation();
+      String currentPath = getPath(this);
+      if (!isAncestor(n))
+        throw new IllegalArgumentException(n.getName()
+                                           +", which is located at "
+                                           +parent+", is not a descendent of "+currentPath);
+      if (isParent(n)) {
+        // this node is the parent of n; remove n directly
+        for(int i=0; i<children.size(); i++) {
+          if (children.get(i).getName().equals(n.getName())) {
+            children.remove(i);
+            numOfLeaves--;
+            n.setParent(null);
+            return true;
+          }
+        }
+        return false;
+      } else {
+        // find the next ancestor node: the parent node
+        String parentName = getNextAncestorName(n);
+        InnerNode parentNode = null;
+        int i;
+        for(i=0; i<children.size(); i++) {
+          if (children.get(i).getName().equals(parentName)) {
+            parentNode = (InnerNode)children.get(i);
+            break;
+          }
+        }
+        if (parentNode==null) {
+          return false;
+        }
+        // remove n from the parent node
+        boolean isRemoved = parentNode.remove(n);
+        // if the parent node has no children, remove the parent node too
+        if (isRemoved) {
+          if (parentNode.getNumOfChildren() == 0) {
+            children.remove(i);
+          }
+          numOfLeaves--;
+        }
+        return isRemoved;
+      }
+    } // end of remove
+        
+    /** Given a node's string representation, return a reference to the node */ 
+    private Node getLoc(String loc) {
+      if (loc == null || loc.length() == 0) return this;
+            
+      String[] path = loc.split(PATH_SEPARATOR_STR, 2);
+      Node childnode = null;
+      for(int i=0; i<children.size(); i++) {
+        if (children.get(i).getName().equals(path[0])) {
+          childnode = children.get(i);
+        }
+      }
+      if (childnode == null) return null; // non-existing node
+      if (path.length == 1) return childnode;
+      if (childnode instanceof InnerNode) {
+        return ((InnerNode)childnode).getLoc(path[1]);
+      } else {
+        return null;
+      }
+    }
+        
+    /** get <i>leafIndex</i> leaf of this subtree 
+     * if it is not in the <i>excludedNode</i>*/
+    private Node getLeaf(int leafIndex, Node excludedNode) {
+      int count=0;
+      // check if the excluded node a leaf
+      boolean isLeaf =
+        excludedNode == null || !(excludedNode instanceof InnerNode);
+      // calculate the total number of excluded leaf nodes
+      int numOfExcludedLeaves =
+        isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves();
+      if (isRack()) { // children are leaves
+        if (isLeaf) { // excluded node is a leaf node
+          int excludedIndex = children.indexOf(excludedNode);
+          if (excludedIndex != -1 && leafIndex >= 0) {
+            // excluded node is one of the children so adjust the leaf index
+            leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex;
+          }
+        }
+        // range check
+        if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) {
+          return null;
+        }
+        return children.get(leafIndex);
+      } else {
+        for(int i=0; i<children.size(); i++) {
+          InnerNode child = (InnerNode)children.get(i);
+          if (excludedNode == null || excludedNode != child) {
+            // not the excludedNode
+            int numOfLeaves = child.getNumOfLeaves();
+            if (excludedNode != null && child.isAncestor(excludedNode)) {
+              numOfLeaves -= numOfExcludedLeaves;
+            }
+            if (count+numOfLeaves > leafIndex) {
+              // the leaf is in the child subtree
+              return child.getLeaf(leafIndex-count, excludedNode);
+            } else {
+              // go to the next child
+              count = count+numOfLeaves;
+            }
+          } else { // it is the excluededNode
+            // skip it and set the excludedNode to be null
+            excludedNode = null;
+          }
+        }
+        return null;
+      }
+    }
+        
+    int getNumOfLeaves() {
+      return numOfLeaves;
+    }
+  } // end of InnerNode
+    
+  InnerNode clusterMap = new InnerNode(InnerNode.ROOT); // the root
+  private int numOfRacks = 0;  // rack counter
+  private ReadWriteLock netlock;
+    
+  public NetworkTopology() {
+    netlock = new ReentrantReadWriteLock();
+  }
+    
+  /** Add a leaf node
+   * Update node counter & rack counter if necessary
+   * @param node
+   *          node to be added
+   * @exception IllegalArgumentException if add a node to a leave 
+                                         or node to be added is not a leaf
+   */
+  public void add(Node node) {
+    if (node==null) return;
+    if( node instanceof InnerNode ) {
+      throw new IllegalArgumentException(
+        "Not allow to add an inner node: "+NodeBase.getPath(node));
+    }
+    netlock.writeLock().lock();
+    try {
+      Node rack = getNode(node.getNetworkLocation());
+      if (rack != null && !(rack instanceof InnerNode)) {
+        throw new IllegalArgumentException("Unexpected data node " 
+                                           + node.toString() 
+                                           + " at an illegal network location");
+      }
+      if (clusterMap.add(node)) {
+        LOG.info("Adding a new node: "+NodeBase.getPath(node));
+        if (rack == null) {
+          numOfRacks++;
+        }
+      }
+      LOG.debug("NetworkTopology became:\n" + this.toString());
+    } finally {
+      netlock.writeLock().unlock();
+    }
+  }
+    
+  /** Remove a node
+   * Update node counter & rack counter if necessary
+   * @param node
+   *          node to be removed
+   */ 
+  public void remove(Node node) {
+    if (node==null) return;
+    if( node instanceof InnerNode ) {
+      throw new IllegalArgumentException(
+        "Not allow to remove an inner node: "+NodeBase.getPath(node));
+    }
+    LOG.info("Removing a node: "+NodeBase.getPath(node));
+    netlock.writeLock().lock();
+    try {
+      if (clusterMap.remove(node)) {
+        InnerNode rack = (InnerNode)getNode(node.getNetworkLocation());
+        if (rack == null) {
+          numOfRacks--;
+        }
+      }
+      LOG.debug("NetworkTopology became:\n" + this.toString());
+    } finally {
+      netlock.writeLock().unlock();
+    }
+  }
+       
+  /** Check if the tree contains node <i>node</i>
+   * 
+   * @param node
+   *          a node
+   * @return true if <i>node</i> is already in the tree; false otherwise
+   */
+  public boolean contains(Node node) {
+    if (node == null) return false;
+    netlock.readLock().lock();
+    try {
+      Node parent = node.getParent();
+      for(int level=node.getLevel(); parent!=null&&level>0;
+          parent=parent.getParent(), level--) {
+        if (parent == clusterMap)
+          return true;
+      }
+    } finally {
+      netlock.readLock().unlock();
+    }
+    return false; 
+  }
+    
+  /** Given a string representation of a node, return its reference
+   * 
+   * @param loc
+   *          a path-like string representation of a node
+   * @return a reference to the node; null if the node is not in the tree
+   */
+  public Node getNode(String loc) {
+    netlock.readLock().lock();
+    try {
+      loc = NodeBase.normalize(loc);
+      if (!NodeBase.ROOT.equals(loc))
+        loc = loc.substring(1);
+      return clusterMap.getLoc(loc);
+    } finally {
+      netlock.readLock().unlock();
+    }
+  }
+    
+  /** Return the total number of racks */
+  public int getNumOfRacks() {
+    netlock.readLock().lock();
+    try {
+      return numOfRacks;
+    } finally {
+      netlock.readLock().unlock();
+    }
+  }
+    
+  /** Return the total number of nodes */
+  public int getNumOfLeaves() {
+    netlock.readLock().lock();
+    try {
+      return clusterMap.getNumOfLeaves();
+    } finally {
+      netlock.readLock().unlock();
+    }
+  }
+    
+  /** Return the distance between two nodes
+   * It is assumed that the distance from one node to its parent is 1
+   * The distance between two nodes is calculated by summing up their distances
+   * to their closest common  ancestor.
+   * @param node1 one node
+   * @param node2 another node
+   * @return the distance between node1 and node2
+   * node1 or node2 do not belong to the cluster
+   */
+  public int getDistance(Node node1, Node node2) {
+    if (node1 == node2) {
+      return 0;
+    }
+    Node n1=node1, n2=node2;
+    int dis = 0;
+    netlock.readLock().lock();
+    try {
+      int level1=node1.getLevel(), level2=node2.getLevel();
+      while(n1!=null && level1>level2) {
+        n1 = n1.getParent();
+        level1--;
+        dis++;
+      }
+      while(n2!=null && level2>level1) {
+        n2 = n2.getParent();
+        level2--;
+        dis++;
+      }
+      while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) {
+        n1=n1.getParent();
+        n2=n2.getParent();
+        dis+=2;
+      }
+    } finally {
+      netlock.readLock().unlock();
+    }
+    if (n1==null) {
+      LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1));
+      return Integer.MAX_VALUE;
+    }
+    if (n2==null) {
+      LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2));
+      return Integer.MAX_VALUE;
+    }
+    return dis+2;
+  } 
+    
+  /** Check if two nodes are on the same rack
+   * @param node1 one node
+   * @param node2 another node
+   * @return true if node1 and node2 are on the same rack; false otherwise
+   * @exception IllegalArgumentException when either node1 or node2 is null, or
+   * node1 or node2 do not belong to the cluster
+   */
+  public boolean isOnSameRack( Node node1,  Node node2) {
+    if (node1 == null || node2 == null) {
+      return false;
+    }
+      
+    netlock.readLock().lock();
+    try {
+      return node1.getParent()==node2.getParent();
+    } finally {
+      netlock.readLock().unlock();
+    }
+  }
+    
+  final private static Random r = new Random();
+  /** randomly choose one node from <i>scope</i>
+   * if scope starts with ~, choose one from the all nodes except for the
+   * ones in <i>scope</i>; otherwise, choose one from <i>scope</i>
+   * @param scope range of nodes from which a node will be chosen
+   * @return the chosen node
+   */
+  public Node chooseRandom(String scope) {
+    netlock.readLock().lock();
+    try {
+      if (scope.startsWith("~")) {
+        return chooseRandom(NodeBase.ROOT, scope.substring(1));
+      } else {
+        return chooseRandom(scope, null);
+      }
+    } finally {
+      netlock.readLock().unlock();
+    }
+  }
+    
+  private Node chooseRandom(String scope, String excludedScope){
+    if (excludedScope != null) {
+      if (scope.startsWith(excludedScope)) {
+        return null;
+      }
+      if (!excludedScope.startsWith(scope)) {
+        excludedScope = null;
+      }
+    }
+    Node node = getNode(scope);
+    if (!(node instanceof InnerNode)) {
+      return node;
+    }
+    InnerNode innerNode = (InnerNode)node;
+    int numOfDatanodes = innerNode.getNumOfLeaves();
+    if (excludedScope == null) {
+      node = null;
+    } else {
+      node = getNode(excludedScope);
+      if (!(node instanceof InnerNode)) {
+        numOfDatanodes -= 1;
+      } else {
+        numOfDatanodes -= ((InnerNode)node).getNumOfLeaves();
+      }
+    }
+    int leaveIndex = r.nextInt(numOfDatanodes);
+    return innerNode.getLeaf(leaveIndex, node);
+  }
+       
+  /** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i>
+   * if scope starts with ~, return the number of nodes that are not
+   * in <i>scope</i> and <i>excludedNodes</i>; 
+   * @param scope a path string that may start with ~
+   * @param excludedNodes a list of nodes
+   * @return number of available nodes
+   */
+  public int countNumOfAvailableNodes(String scope,
+                                      Collection<Node> excludedNodes) {
+    boolean isExcluded=false;
+    if (scope.startsWith("~")) {
+      isExcluded=true;
+      scope=scope.substring(1);
+    }
+    scope = NodeBase.normalize(scope);
+    int count=0; // the number of nodes in both scope & excludedNodes
+    netlock.readLock().lock();
+    try {
+      for(Node node:excludedNodes) {
+        if ((NodeBase.getPath(node)+NodeBase.PATH_SEPARATOR_STR).
+            startsWith(scope+NodeBase.PATH_SEPARATOR_STR)) {
+          count++;
+        }
+      }
+      Node n=getNode(scope);
+      int scopeNodeCount=1;
+      if (n instanceof InnerNode) {
+        scopeNodeCount=((InnerNode)n).getNumOfLeaves();
+      }
+      if (isExcluded) {
+        return clusterMap.getNumOfLeaves()-
+          scopeNodeCount-excludedNodes.size()+count;
+      } else {
+        return scopeNodeCount-count;
+      }
+    } finally {
+      netlock.readLock().unlock();
+    }
+  }
+    
+  /** convert a network tree to a string */
+  public String toString() {
+    // print the number of racks
+    StringBuffer tree = new StringBuffer();
+    tree.append("Number of racks: ");
+    tree.append(numOfRacks);
+    tree.append("\n");
+    // print the number of leaves
+    int numOfLeaves = getNumOfLeaves();
+    tree.append("Expected number of leaves:");
+    tree.append(numOfLeaves);
+    tree.append("\n");
+    // print nodes
+    for(int i=0; i<numOfLeaves; i++) {
+      tree.append(NodeBase.getPath(clusterMap.getLeaf(i, null)));
+      tree.append("\n");
+    }
+    return tree.toString();
+  }
+
+  /* swap two array items */
+  static private void swap(Node[] nodes, int i, int j) {
+    Node tempNode;
+    tempNode = nodes[j];
+    nodes[j] = nodes[i];
+    nodes[i] = tempNode;
+    
+  }
+  
+  /** Sort nodes array by their distances to <i>reader</i>
+   * It linearly scans the array, if a local node is found, swap it with
+   * the first element of the array.
+   * If a local rack node is found, swap it with the first element following
+   * the local node.
+   * If neither local node or local rack node is found, put a random replica
+   * location at position 0.
+   * It leaves the rest nodes untouched.
+   */
+  public void pseudoSortByDistance( Node reader, Node[] nodes ) {
+    int tempIndex = 0;
+    if (reader != null ) {
+      int localRackNode = -1;
+      //scan the array to find the local node & local rack node
+      for(int i=0; i<nodes.length; i++) {
+        if(tempIndex == 0 && reader == nodes[i]) { //local node
+          //swap the local node and the node at position 0
+          if( i != 0 ) {
+            swap(nodes, tempIndex, i);
+          }
+          tempIndex=1;
+          if(localRackNode != -1 ) {
+            if(localRackNode == 0) {
+              localRackNode = i;
+            }
+            break;
+          }
+        } else if(localRackNode == -1 && isOnSameRack(reader, nodes[i])) {
+          //local rack
+          localRackNode = i;
+          if(tempIndex != 0 ) break;
+        }
+      }
+
+      // swap the local rack node and the node at position tempIndex
+      if(localRackNode != -1 && localRackNode != tempIndex ) {
+        swap(nodes, tempIndex, localRackNode);
+        tempIndex++;
+      }
+    }
+    
+    // put a random node at position 0 if it is not a local/local-rack node
+    if(tempIndex == 0 && nodes.length != 0) {
+      swap(nodes, 0, r.nextInt(nodes.length));
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/net/Node.java b/src/java/org/apache/hadoop/net/Node.java
new file mode 100644
index 00000000000..2448a943602
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/Node.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net;
+
+/** The interface defines a node in a network topology.
+ * A node may be a leave representing a data node or an inner
+ * node representing a datacenter or rack.
+ * Each data has a name and its location in the network is
+ * decided by a string with syntax similar to a file name. 
+ * For example, a data node's name is hostname:port# and if it's located at
+ * rack "orange" in datacenter "dog", the string representation of its
+ * network location is /dog/orange
+ */
+
+public interface Node {
+  /** Return the string representation of this node's network location */
+  public String getNetworkLocation();
+  /** Set the node's network location */
+  public void setNetworkLocation(String location);
+  /** Return this node's name */
+  public String getName();
+  /** Return this node's parent */
+  public Node getParent();
+  /** Set this node's parent */
+  public void setParent(Node parent);
+  /** Return this node's level in the tree.
+   * E.g. the root of a tree returns 0 and its children return 1
+   */
+  public int getLevel();
+  /** Set this node's level in the tree.*/
+  public void setLevel(int i);
+}
diff --git a/src/java/org/apache/hadoop/net/NodeBase.java b/src/java/org/apache/hadoop/net/NodeBase.java
new file mode 100644
index 00000000000..83696c813ac
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/NodeBase.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net;
+
+/** A base class that implements interface Node
+ * 
+ */
+
+public class NodeBase implements Node {
+  public final static char PATH_SEPARATOR = '/';
+  public final static String PATH_SEPARATOR_STR = "/";
+  public final static String ROOT = ""; // string representation of root
+  
+  protected String name; //host:port#
+  protected String location; //string representation of this node's location
+  protected int level; //which level of the tree the node resides
+  protected Node parent; //its parent
+  
+  /** Default constructor */
+  public NodeBase() {
+  }
+  
+  /** Construct a node from its path
+   * @param path 
+   *   a concatenation of this node's location, the path seperator, and its name 
+   */
+  public NodeBase(String path) {
+    path = normalize(path);
+    int index = path.lastIndexOf(PATH_SEPARATOR);
+    if (index== -1) {
+      set(ROOT, path);
+    } else {
+      set(path.substring(index+1), path.substring(0, index));
+    }
+  }
+  
+  /** Construct a node from its name and its location
+   * @param name this node's name 
+   * @param location this node's location 
+   */
+  public NodeBase(String name, String location) {
+    set(name, normalize(location));
+  }
+  
+  /** Construct a node from its name and its location
+   * @param name this node's name 
+   * @param location this node's location 
+   * @param parent this node's parent node
+   * @param level this node's level in the tree
+   */
+  public NodeBase(String name, String location, Node parent, int level) {
+    set(name, normalize(location));
+    this.parent = parent;
+    this.level = level;
+  }
+
+  /* set this node's name and location */
+  private void set(String name, String location) {
+    if (name != null && name.contains(PATH_SEPARATOR_STR))
+      throw new IllegalArgumentException(
+                                         "Network location name contains /: "+name);
+    this.name = (name==null)?"":name;
+    this.location = location;      
+  }
+  
+  /** Return this node's name */
+  public String getName() { return name; }
+  
+  /** Return this node's network location */
+  public String getNetworkLocation() { return location; }
+  
+  /** Set this node's network location */
+  public void setNetworkLocation(String location) { this.location = location; }
+  
+  /** Return this node's path */
+  public static String getPath(Node node) {
+    return node.getNetworkLocation()+PATH_SEPARATOR_STR+node.getName();
+  }
+  
+  /** Return this node's string representation */
+  public String toString() {
+    return getPath(this);
+  }
+
+  /** Normalize a path */
+  static public String normalize(String path) {
+    if (path == null || path.length() == 0) return ROOT;
+    
+    if (path.charAt(0) != PATH_SEPARATOR) {
+      throw new IllegalArgumentException(
+                                         "Network Location path does not start with "
+                                         +PATH_SEPARATOR_STR+ ": "+path);
+    }
+    
+    int len = path.length();
+    if (path.charAt(len-1) == PATH_SEPARATOR) {
+      return path.substring(0, len-1);
+    }
+    return path;
+  }
+  
+  /** Return this node's parent */
+  public Node getParent() { return parent; }
+  
+  /** Set this node's parent */
+  public void setParent(Node parent) {
+    this.parent = parent;
+  }
+  
+  /** Return this node's level in the tree.
+   * E.g. the root of a tree returns 0 and its children return 1
+   */
+  public int getLevel() { return level; }
+  
+  /** Set this node's level in the tree */
+  public void setLevel(int level) {
+    this.level = level;
+  }
+}
diff --git a/src/java/org/apache/hadoop/net/ScriptBasedMapping.java b/src/java/org/apache/hadoop/net/ScriptBasedMapping.java
new file mode 100644
index 00000000000..bad5499f89b
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/ScriptBasedMapping.java
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.net;
+
+import java.util.*;
+import java.io.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.*;
+import org.apache.hadoop.util.Shell.ShellCommandExecutor;
+import org.apache.hadoop.conf.*;
+
+/**
+ * This class implements the {@link DNSToSwitchMapping} interface using a 
+ * script configured via topology.script.file.name .
+ */
+public final class ScriptBasedMapping extends CachedDNSToSwitchMapping 
+implements Configurable
+{
+  public ScriptBasedMapping() {
+    super(new RawScriptBasedMapping());
+  }
+  
+  // script must accept at least this many args
+  static final int MIN_ALLOWABLE_ARGS = 1;
+  
+  static final int DEFAULT_ARG_COUNT = 100;
+  
+  static final String SCRIPT_FILENAME_KEY = "topology.script.file.name";
+  static final String SCRIPT_ARG_COUNT_KEY = "topology.script.number.args";
+  
+  public ScriptBasedMapping(Configuration conf) {
+    this();
+    setConf(conf);
+  }
+  
+  public Configuration getConf() {
+    return ((RawScriptBasedMapping)rawMapping).getConf();
+  }
+  
+  public void setConf(Configuration conf) {
+    ((RawScriptBasedMapping)rawMapping).setConf(conf);
+  }
+  
+  private static final class RawScriptBasedMapping
+  implements DNSToSwitchMapping {
+  private String scriptName;
+  private Configuration conf;
+  private int maxArgs; //max hostnames per call of the script
+  private static Log LOG = 
+    LogFactory.getLog(ScriptBasedMapping.class);
+  public void setConf (Configuration conf) {
+    this.scriptName = conf.get(SCRIPT_FILENAME_KEY);
+    this.maxArgs = conf.getInt(SCRIPT_ARG_COUNT_KEY, DEFAULT_ARG_COUNT);
+    this.conf = conf;
+  }
+  public Configuration getConf () {
+    return conf;
+  }
+  
+  public RawScriptBasedMapping() {}
+  
+  public List<String> resolve(List<String> names) {
+    List <String> m = new ArrayList<String>(names.size());
+    
+    if (names.isEmpty()) {
+      return m;
+    }
+
+    if (scriptName == null) {
+      for (int i = 0; i < names.size(); i++) {
+        m.add(NetworkTopology.DEFAULT_RACK);
+      }
+      return m;
+    }
+    
+    String output = runResolveCommand(names);
+    if (output != null) {
+      StringTokenizer allSwitchInfo = new StringTokenizer(output);
+      while (allSwitchInfo.hasMoreTokens()) {
+        String switchInfo = allSwitchInfo.nextToken();
+        m.add(switchInfo);
+      }
+      
+      if (m.size() != names.size()) {
+        // invalid number of entries returned by the script
+        LOG.warn("Script " + scriptName + " returned "
+            + Integer.toString(m.size()) + " values when "
+            + Integer.toString(names.size()) + " were expected.");
+        return null;
+      }
+    } else {
+      // an error occurred. return null to signify this.
+      // (exn was already logged in runResolveCommand)
+      return null;
+    }
+    
+    return m;
+  }
+  
+  private String runResolveCommand(List<String> args) {
+    int loopCount = 0;
+    if (args.size() == 0) {
+      return null;
+    }
+    StringBuffer allOutput = new StringBuffer();
+    int numProcessed = 0;
+    if (maxArgs < MIN_ALLOWABLE_ARGS) {
+      LOG.warn("Invalid value " + Integer.toString(maxArgs)
+          + " for " + SCRIPT_ARG_COUNT_KEY + "; must be >= "
+          + Integer.toString(MIN_ALLOWABLE_ARGS));
+      return null;
+    }
+    
+    while (numProcessed != args.size()) {
+      int start = maxArgs * loopCount;
+      List <String> cmdList = new ArrayList<String>();
+      cmdList.add(scriptName);
+      for (numProcessed = start; numProcessed < (start + maxArgs) && 
+           numProcessed < args.size(); numProcessed++) {
+        cmdList.add(args.get(numProcessed)); 
+      }
+      File dir = null;
+      String userDir;
+      if ((userDir = System.getProperty("user.dir")) != null) {
+        dir = new File(userDir);
+      }
+      ShellCommandExecutor s = new ShellCommandExecutor(
+                                   cmdList.toArray(new String[0]), dir);
+      try {
+        s.execute();
+        allOutput.append(s.getOutput() + " ");
+      } catch (Exception e) {
+        LOG.warn(StringUtils.stringifyException(e));
+        return null;
+      }
+      loopCount++; 
+    }
+    return allOutput.toString();
+  }
+  }
+}
diff --git a/src/java/org/apache/hadoop/net/SocketIOWithTimeout.java b/src/java/org/apache/hadoop/net/SocketIOWithTimeout.java
new file mode 100644
index 00000000000..f48d2d2db39
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/SocketIOWithTimeout.java
@@ -0,0 +1,455 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.net;
+
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.net.SocketAddress;
+import java.net.SocketTimeoutException;
+import java.nio.ByteBuffer;
+import java.nio.channels.SelectableChannel;
+import java.nio.channels.SelectionKey;
+import java.nio.channels.Selector;
+import java.nio.channels.SocketChannel;
+import java.nio.channels.spi.SelectorProvider;
+import java.util.Iterator;
+import java.util.LinkedList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * This supports input and output streams for a socket channels. 
+ * These streams can have a timeout.
+ */
+abstract class SocketIOWithTimeout {
+  // This is intentionally package private.
+
+  static final Log LOG = LogFactory.getLog(SocketIOWithTimeout.class);    
+  
+  private SelectableChannel channel;
+  private long timeout;
+  private boolean closed = false;
+  
+  private static SelectorPool selector = new SelectorPool();
+  
+  /* A timeout value of 0 implies wait for ever. 
+   * We should have a value of timeout that implies zero wait.. i.e. 
+   * read or write returns immediately.
+   * 
+   * This will set channel to non-blocking.
+   */
+  SocketIOWithTimeout(SelectableChannel channel, long timeout) 
+                                                 throws IOException {
+    checkChannelValidity(channel);
+    
+    this.channel = channel;
+    this.timeout = timeout;
+    // Set non-blocking
+    channel.configureBlocking(false);
+  }
+  
+  void close() {
+    closed = true;
+  }
+
+  boolean isOpen() {
+    return !closed && channel.isOpen();
+  }
+
+  SelectableChannel getChannel() {
+    return channel;
+  }
+  
+  /** 
+   * Utility function to check if channel is ok.
+   * Mainly to throw IOException instead of runtime exception
+   * in case of mismatch. This mismatch can occur for many runtime
+   * reasons.
+   */
+  static void checkChannelValidity(Object channel) throws IOException {
+    if (channel == null) {
+      /* Most common reason is that original socket does not have a channel.
+       * So making this an IOException rather than a RuntimeException.
+       */
+      throw new IOException("Channel is null. Check " +
+                            "how the channel or socket is created.");
+    }
+    
+    if (!(channel instanceof SelectableChannel)) {
+      throw new IOException("Channel should be a SelectableChannel");
+    }    
+  }
+  
+  /**
+   * Performs actual IO operations. This is not expected to block.
+   *  
+   * @param buf
+   * @return number of bytes (or some equivalent). 0 implies underlying
+   *         channel is drained completely. We will wait if more IO is 
+   *         required.
+   * @throws IOException
+   */
+  abstract int performIO(ByteBuffer buf) throws IOException;  
+  
+  /**
+   * Performs one IO and returns number of bytes read or written.
+   * It waits up to the specified timeout. If the channel is 
+   * not read before the timeout, SocketTimeoutException is thrown.
+   * 
+   * @param buf buffer for IO
+   * @param ops Selection Ops used for waiting. Suggested values: 
+   *        SelectionKey.OP_READ while reading and SelectionKey.OP_WRITE while
+   *        writing. 
+   *        
+   * @return number of bytes read or written. negative implies end of stream.
+   * @throws IOException
+   */
+  int doIO(ByteBuffer buf, int ops) throws IOException {
+    
+    /* For now only one thread is allowed. If user want to read or write
+     * from multiple threads, multiple streams could be created. In that
+     * case multiple threads work as well as underlying channel supports it.
+     */
+    if (!buf.hasRemaining()) {
+      throw new IllegalArgumentException("Buffer has no data left.");
+      //or should we just return 0?
+    }
+
+    while (buf.hasRemaining()) {
+      if (closed) {
+        return -1;
+      }
+
+      try {
+        int n = performIO(buf);
+        if (n != 0) {
+          // successful io or an error.
+          return n;
+        }
+      } catch (IOException e) {
+        if (!channel.isOpen()) {
+          closed = true;
+        }
+        throw e;
+      }
+
+      //now wait for socket to be ready.
+      int count = 0;
+      try {
+        count = selector.select(channel, ops, timeout);  
+      } catch (IOException e) { //unexpected IOException.
+        closed = true;
+        throw e;
+      } 
+
+      if (count == 0) {
+        throw new SocketTimeoutException(timeoutExceptionString(channel,
+                                                                timeout, ops));
+      }
+      // otherwise the socket should be ready for io.
+    }
+    
+    return 0; // does not reach here.
+  }
+  
+  /**
+   * The contract is similar to {@link SocketChannel#connect(SocketAddress)} 
+   * with a timeout.
+   * 
+   * @see SocketChannel#connect(SocketAddress)
+   * 
+   * @param channel - this should be a {@link SelectableChannel}
+   * @param endpoint
+   * @throws IOException
+   */
+  static void connect(SocketChannel channel, 
+                      SocketAddress endpoint, int timeout) throws IOException {
+    
+    boolean blockingOn = channel.isBlocking();
+    if (blockingOn) {
+      channel.configureBlocking(false);
+    }
+    
+    try { 
+      if (channel.connect(endpoint)) {
+        return;
+      }
+
+      long timeoutLeft = timeout;
+      long endTime = (timeout > 0) ? (System.currentTimeMillis() + timeout): 0;
+      
+      while (true) {
+        // we might have to call finishConnect() more than once
+        // for some channels (with user level protocols)
+        
+        int ret = selector.select((SelectableChannel)channel, 
+                                  SelectionKey.OP_CONNECT, timeoutLeft);
+        
+        if (ret > 0 && channel.finishConnect()) {
+          return;
+        }
+        
+        if (ret == 0 ||
+            (timeout > 0 &&  
+              (timeoutLeft = (endTime - System.currentTimeMillis())) <= 0)) {
+          throw new SocketTimeoutException(
+                    timeoutExceptionString(channel, timeout, 
+                                           SelectionKey.OP_CONNECT));
+        }
+      }
+    } catch (IOException e) {
+      // javadoc for SocketChannel.connect() says channel should be closed.
+      try {
+        channel.close();
+      } catch (IOException ignored) {}
+      throw e;
+    } finally {
+      if (blockingOn && channel.isOpen()) {
+        channel.configureBlocking(true);
+      }
+    }
+  }
+
+  /**
+   * This is similar to {@link #doIO(ByteBuffer, int)} except that it
+   * does not perform any I/O. It just waits for the channel to be ready
+   * for I/O as specified in ops.
+   * 
+   * @param ops Selection Ops used for waiting
+   * 
+   * @throws SocketTimeoutException 
+   *         if select on the channel times out.
+   * @throws IOException
+   *         if any other I/O error occurs. 
+   */
+  void waitForIO(int ops) throws IOException {
+    
+    if (selector.select(channel, ops, timeout) == 0) {
+      throw new SocketTimeoutException(timeoutExceptionString(channel, timeout,
+                                                              ops)); 
+    }
+  }
+    
+  private static String timeoutExceptionString(SelectableChannel channel,
+                                               long timeout, int ops) {
+    
+    String waitingFor;
+    switch(ops) {
+    
+    case SelectionKey.OP_READ :
+      waitingFor = "read"; break;
+      
+    case SelectionKey.OP_WRITE :
+      waitingFor = "write"; break;      
+      
+    case SelectionKey.OP_CONNECT :
+      waitingFor = "connect"; break;
+      
+    default :
+      waitingFor = "" + ops;  
+    }
+    
+    return timeout + " millis timeout while " +
+           "waiting for channel to be ready for " + 
+           waitingFor + ". ch : " + channel;    
+  }
+  
+  /**
+   * This maintains a pool of selectors. These selectors are closed
+   * once they are idle (unused) for a few seconds.
+   */
+  private static class SelectorPool {
+    
+    private static class SelectorInfo {
+      Selector              selector;
+      long                  lastActivityTime;
+      LinkedList<SelectorInfo> queue; 
+      
+      void close() {
+        if (selector != null) {
+          try {
+            selector.close();
+          } catch (IOException e) {
+            LOG.warn("Unexpected exception while closing selector : " +
+                     StringUtils.stringifyException(e));
+          }
+        }
+      }    
+    }
+    
+    private static class ProviderInfo {
+      SelectorProvider provider;
+      LinkedList<SelectorInfo> queue; // lifo
+      ProviderInfo next;
+    }
+    
+    private static final long IDLE_TIMEOUT = 10 * 1000; // 10 seconds.
+    
+    private ProviderInfo providerList = null;
+    
+    /**
+     * Waits on the channel with the given timeout using one of the 
+     * cached selectors. It also removes any cached selectors that are
+     * idle for a few seconds.
+     * 
+     * @param channel
+     * @param ops
+     * @param timeout
+     * @return
+     * @throws IOException
+     */
+    int select(SelectableChannel channel, int ops, long timeout) 
+                                                   throws IOException {
+     
+      SelectorInfo info = get(channel);
+      
+      SelectionKey key = null;
+      int ret = 0;
+      
+      try {
+        while (true) {
+          long start = (timeout == 0) ? 0 : System.currentTimeMillis();
+
+          key = channel.register(info.selector, ops);
+          ret = info.selector.select(timeout);
+          
+          if (ret != 0) {
+            return ret;
+          }
+          
+          /* Sometimes select() returns 0 much before timeout for 
+           * unknown reasons. So select again if required.
+           */
+          if (timeout > 0) {
+            timeout -= System.currentTimeMillis() - start;
+            if (timeout <= 0) {
+              return 0;
+            }
+          }
+          
+          if (Thread.currentThread().isInterrupted()) {
+            throw new InterruptedIOException("Interruped while waiting for " +
+                                             "IO on channel " + channel +
+                                             ". " + timeout + 
+                                             " millis timeout left.");
+          }
+        }
+      } finally {
+        if (key != null) {
+          key.cancel();
+        }
+        
+        //clear the canceled key.
+        try {
+          info.selector.selectNow();
+        } catch (IOException e) {
+          LOG.info("Unexpected Exception while clearing selector : " +
+                   StringUtils.stringifyException(e));
+          // don't put the selector back.
+          info.close();
+          return ret; 
+        }
+        
+        release(info);
+      }
+    }
+    
+    /**
+     * Takes one selector from end of LRU list of free selectors.
+     * If there are no selectors awailable, it creates a new selector.
+     * Also invokes trimIdleSelectors(). 
+     * 
+     * @param channel
+     * @return 
+     * @throws IOException
+     */
+    private synchronized SelectorInfo get(SelectableChannel channel) 
+                                                         throws IOException {
+      SelectorInfo selInfo = null;
+      
+      SelectorProvider provider = channel.provider();
+      
+      // pick the list : rarely there is more than one provider in use.
+      ProviderInfo pList = providerList;
+      while (pList != null && pList.provider != provider) {
+        pList = pList.next;
+      }      
+      if (pList == null) {
+        //LOG.info("Creating new ProviderInfo : " + provider.toString());
+        pList = new ProviderInfo();
+        pList.provider = provider;
+        pList.queue = new LinkedList<SelectorInfo>();
+        pList.next = providerList;
+        providerList = pList;
+      }
+      
+      LinkedList<SelectorInfo> queue = pList.queue;
+      
+      if (queue.isEmpty()) {
+        Selector selector = provider.openSelector();
+        selInfo = new SelectorInfo();
+        selInfo.selector = selector;
+        selInfo.queue = queue;
+      } else {
+        selInfo = queue.removeLast();
+      }
+      
+      trimIdleSelectors(System.currentTimeMillis());
+      return selInfo;
+    }
+    
+    /**
+     * puts selector back at the end of LRU list of free selectos.
+     * Also invokes trimIdleSelectors().
+     * 
+     * @param info
+     */
+    private synchronized void release(SelectorInfo info) {
+      long now = System.currentTimeMillis();
+      trimIdleSelectors(now);
+      info.lastActivityTime = now;
+      info.queue.addLast(info);
+    }
+    
+    /**
+     * Closes selectors that are idle for IDLE_TIMEOUT (10 sec). It does not
+     * traverse the whole list, just over the one that have crossed 
+     * the timeout.
+     */
+    private void trimIdleSelectors(long now) {
+      long cutoff = now - IDLE_TIMEOUT;
+      
+      for(ProviderInfo pList=providerList; pList != null; pList=pList.next) {
+        if (pList.queue.isEmpty()) {
+          continue;
+        }
+        for(Iterator<SelectorInfo> it = pList.queue.iterator(); it.hasNext();) {
+          SelectorInfo info = it.next();
+          if (info.lastActivityTime > cutoff) {
+            break;
+          }
+          it.remove();
+          info.close();
+        }
+      }
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/net/SocketInputStream.java b/src/java/org/apache/hadoop/net/SocketInputStream.java
new file mode 100644
index 00000000000..2568ba9c2bc
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/SocketInputStream.java
@@ -0,0 +1,170 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.net;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.Socket;
+import java.net.SocketTimeoutException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.SelectableChannel;
+import java.nio.channels.SelectionKey;
+
+
+/**
+ * This implements an input stream that can have a timeout while reading.
+ * This sets non-blocking flag on the socket channel.
+ * So after create this object, read() on 
+ * {@link Socket#getInputStream()} and write() on 
+ * {@link Socket#getOutputStream()} for the associated socket will throw 
+ * IllegalBlockingModeException. 
+ * Please use {@link SocketOutputStream} for writing.
+ */
+public class SocketInputStream extends InputStream
+                               implements ReadableByteChannel {
+
+  private Reader reader;
+
+  private static class Reader extends SocketIOWithTimeout {
+    ReadableByteChannel channel;
+    
+    Reader(ReadableByteChannel channel, long timeout) throws IOException {
+      super((SelectableChannel)channel, timeout);
+      this.channel = channel;
+    }
+    
+    int performIO(ByteBuffer buf) throws IOException {
+      return channel.read(buf);
+    }
+  }
+  
+  /**
+   * Create a new input stream with the given timeout. If the timeout
+   * is zero, it will be treated as infinite timeout. The socket's
+   * channel will be configured to be non-blocking.
+   * 
+   * @param channel 
+   *        Channel for reading, should also be a {@link SelectableChannel}.
+   *        The channel will be configured to be non-blocking.
+   * @param timeout timeout in milliseconds. must not be negative.
+   * @throws IOException
+   */
+  public SocketInputStream(ReadableByteChannel channel, long timeout)
+                                                        throws IOException {
+    SocketIOWithTimeout.checkChannelValidity(channel);
+    reader = new Reader(channel, timeout);
+  }
+
+  /**
+   * Same as SocketInputStream(socket.getChannel(), timeout): <br><br>
+   * 
+   * Create a new input stream with the given timeout. If the timeout
+   * is zero, it will be treated as infinite timeout. The socket's
+   * channel will be configured to be non-blocking.
+   * 
+   * @see SocketInputStream#SocketInputStream(ReadableByteChannel, long)
+   *  
+   * @param socket should have a channel associated with it.
+   * @param timeout timeout timeout in milliseconds. must not be negative.
+   * @throws IOException
+   */
+  public SocketInputStream(Socket socket, long timeout) 
+                                         throws IOException {
+    this(socket.getChannel(), timeout);
+  }
+  
+  /**
+   * Same as SocketInputStream(socket.getChannel(), socket.getSoTimeout())
+   * :<br><br>
+   * 
+   * Create a new input stream with the given timeout. If the timeout
+   * is zero, it will be treated as infinite timeout. The socket's
+   * channel will be configured to be non-blocking.
+   * @see SocketInputStream#SocketInputStream(ReadableByteChannel, long)
+   *  
+   * @param socket should have a channel associated with it.
+   * @throws IOException
+   */
+  public SocketInputStream(Socket socket) throws IOException {
+    this(socket.getChannel(), socket.getSoTimeout());
+  }
+  
+  @Override
+  public int read() throws IOException {
+    /* Allocation can be removed if required.
+     * probably no need to optimize or encourage single byte read.
+     */
+    byte[] buf = new byte[1];
+    int ret = read(buf, 0, 1);
+    if (ret > 0) {
+      return (byte)buf[0];
+    }
+    if (ret != -1) {
+      // unexpected
+      throw new IOException("Could not read from stream");
+    }
+    return ret;
+  }
+
+  public int read(byte[] b, int off, int len) throws IOException {
+    return read(ByteBuffer.wrap(b, off, len));
+  }
+
+  public synchronized void close() throws IOException {
+    /* close the channel since Socket.getInputStream().close()
+     * closes the socket.
+     */
+    reader.channel.close();
+    reader.close();
+  }
+
+  /**
+   * Returns underlying channel used by inputstream.
+   * This is useful in certain cases like channel for 
+   * {@link FileChannel#transferFrom(ReadableByteChannel, long, long)}.
+   */
+  public ReadableByteChannel getChannel() {
+    return reader.channel; 
+  }
+  
+  //ReadableByteChannel interface
+    
+  public boolean isOpen() {
+    return reader.isOpen();
+  }
+    
+  public int read(ByteBuffer dst) throws IOException {
+    return reader.doIO(dst, SelectionKey.OP_READ);
+  }
+  
+  /**
+   * waits for the underlying channel to be ready for reading.
+   * The timeout specified for this stream applies to this wait.
+   * 
+   * @throws SocketTimeoutException 
+   *         if select on the channel times out.
+   * @throws IOException
+   *         if any other I/O error occurs. 
+   */
+  public void waitForReadable() throws IOException {
+    reader.waitForIO(SelectionKey.OP_READ);
+  }
+}
diff --git a/src/java/org/apache/hadoop/net/SocketOutputStream.java b/src/java/org/apache/hadoop/net/SocketOutputStream.java
new file mode 100644
index 00000000000..fa4e8500b5a
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/SocketOutputStream.java
@@ -0,0 +1,219 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.net;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.Socket;
+import java.net.SocketTimeoutException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.SelectableChannel;
+import java.nio.channels.SelectionKey;
+import java.nio.channels.WritableByteChannel;
+
+/**
+ * This implements an output stream that can have a timeout while writing.
+ * This sets non-blocking flag on the socket channel.
+ * So after creating this object , read() on 
+ * {@link Socket#getInputStream()} and write() on 
+ * {@link Socket#getOutputStream()} on the associated socket will throw 
+ * llegalBlockingModeException.
+ * Please use {@link SocketInputStream} for reading.
+ */
+public class SocketOutputStream extends OutputStream 
+                                implements WritableByteChannel {                                
+  
+  private Writer writer;
+  
+  private static class Writer extends SocketIOWithTimeout {
+    WritableByteChannel channel;
+    
+    Writer(WritableByteChannel channel, long timeout) throws IOException {
+      super((SelectableChannel)channel, timeout);
+      this.channel = channel;
+    }
+    
+    int performIO(ByteBuffer buf) throws IOException {
+      return channel.write(buf);
+    }
+  }
+  
+  /**
+   * Create a new ouput stream with the given timeout. If the timeout
+   * is zero, it will be treated as infinite timeout. The socket's
+   * channel will be configured to be non-blocking.
+   * 
+   * @param channel 
+   *        Channel for writing, should also be a {@link SelectableChannel}.  
+   *        The channel will be configured to be non-blocking.
+   * @param timeout timeout in milliseconds. must not be negative.
+   * @throws IOException
+   */
+  public SocketOutputStream(WritableByteChannel channel, long timeout) 
+                                                         throws IOException {
+    SocketIOWithTimeout.checkChannelValidity(channel);
+    writer = new Writer(channel, timeout);
+  }
+  
+  /**
+   * Same as SocketOutputStream(socket.getChannel(), timeout):<br><br>
+   * 
+   * Create a new ouput stream with the given timeout. If the timeout
+   * is zero, it will be treated as infinite timeout. The socket's
+   * channel will be configured to be non-blocking.
+   * 
+   * @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long)
+   *  
+   * @param socket should have a channel associated with it.
+   * @param timeout timeout timeout in milliseconds. must not be negative.
+   * @throws IOException
+   */
+  public SocketOutputStream(Socket socket, long timeout) 
+                                         throws IOException {
+    this(socket.getChannel(), timeout);
+  }
+  
+  public void write(int b) throws IOException {
+    /* If we need to, we can optimize this allocation.
+     * probably no need to optimize or encourage single byte writes.
+     */
+    byte[] buf = new byte[1];
+    buf[0] = (byte)b;
+    write(buf, 0, 1);
+  }
+  
+  public void write(byte[] b, int off, int len) throws IOException {
+    ByteBuffer buf = ByteBuffer.wrap(b, off, len);
+    while (buf.hasRemaining()) {
+      try {
+        if (write(buf) < 0) {
+          throw new IOException("The stream is closed");
+        }
+      } catch (IOException e) {
+        /* Unlike read, write can not inform user of partial writes.
+         * So will close this if there was a partial write.
+         */
+        if (buf.capacity() > buf.remaining()) {
+          writer.close();
+        }
+        throw e;
+      }
+    }
+  }
+
+  public synchronized void close() throws IOException {
+    /* close the channel since Socket.getOuputStream().close() 
+     * closes the socket.
+     */
+    writer.channel.close();
+    writer.close();
+  }
+
+  /**
+   * Returns underlying channel used by this stream.
+   * This is useful in certain cases like channel for 
+   * {@link FileChannel#transferTo(long, long, WritableByteChannel)}
+   */
+  public WritableByteChannel getChannel() {
+    return writer.channel; 
+  }
+
+  //WritableByteChannle interface 
+  
+  public boolean isOpen() {
+    return writer.isOpen();
+  }
+
+  public int write(ByteBuffer src) throws IOException {
+    return writer.doIO(src, SelectionKey.OP_WRITE);
+  }
+  
+  /**
+   * waits for the underlying channel to be ready for writing.
+   * The timeout specified for this stream applies to this wait.
+   *
+   * @throws SocketTimeoutException 
+   *         if select on the channel times out.
+   * @throws IOException
+   *         if any other I/O error occurs. 
+   */
+  public void waitForWritable() throws IOException {
+    writer.waitForIO(SelectionKey.OP_WRITE);
+  }
+  
+  /**
+   * Transfers data from FileChannel using 
+   * {@link FileChannel#transferTo(long, long, WritableByteChannel)}. 
+   * 
+   * Similar to readFully(), this waits till requested amount of 
+   * data is transfered.
+   * 
+   * @param fileCh FileChannel to transfer data from.
+   * @param position position within the channel where the transfer begins
+   * @param count number of bytes to transfer.
+   * 
+   * @throws EOFException 
+   *         If end of input file is reached before requested number of 
+   *         bytes are transfered.
+   *
+   * @throws SocketTimeoutException 
+   *         If this channel blocks transfer longer than timeout for 
+   *         this stream.
+   *          
+   * @throws IOException Includes any exception thrown by 
+   *         {@link FileChannel#transferTo(long, long, WritableByteChannel)}. 
+   */
+  public void transferToFully(FileChannel fileCh, long position, int count) 
+                              throws IOException {
+    
+    while (count > 0) {
+      /* 
+       * Ideally we should wait after transferTo returns 0. But because of
+       * a bug in JRE on Linux (http://bugs.sun.com/view_bug.do?bug_id=5103988),
+       * which throws an exception instead of returning 0, we wait for the
+       * channel to be writable before writing to it. If you ever see 
+       * IOException with message "Resource temporarily unavailable" 
+       * thrown here, please let us know.
+       * 
+       * Once we move to JAVA SE 7, wait should be moved to correct place.
+       */
+      waitForWritable();
+      int nTransfered = (int) fileCh.transferTo(position, count, getChannel());
+      
+      if (nTransfered == 0) {
+        //check if end of file is reached.
+        if (position >= fileCh.size()) {
+          throw new EOFException("EOF Reached. file size is " + fileCh.size() + 
+                                 " and " + count + " more bytes left to be " +
+                                 "transfered.");
+        }
+        //otherwise assume the socket is full.
+        //waitForWritable(); // see comment above.
+      } else if (nTransfered < 0) {
+        throw new IOException("Unexpected return of " + nTransfered + 
+                              " from transferTo()");
+      } else {
+        position += nTransfered;
+        count -= nTransfered;
+      }
+    }
+  }  
+}
diff --git a/src/java/org/apache/hadoop/net/SocksSocketFactory.java b/src/java/org/apache/hadoop/net/SocksSocketFactory.java
new file mode 100644
index 00000000000..19c89210da9
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/SocksSocketFactory.java
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.Proxy;
+import java.net.Socket;
+import java.net.UnknownHostException;
+
+import javax.net.SocketFactory;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Specialized SocketFactory to create sockets with a SOCKS proxy
+ */
+public class SocksSocketFactory extends SocketFactory implements
+    Configurable {
+
+  private Configuration conf;
+
+  private Proxy proxy;
+
+  /**
+   * Default empty constructor (for use with the reflection API).
+   */
+  public SocksSocketFactory() {
+    this.proxy = Proxy.NO_PROXY;
+  }
+
+  /**
+   * Constructor with a supplied Proxy
+   * 
+   * @param proxy the proxy to use to create sockets
+   */
+  public SocksSocketFactory(Proxy proxy) {
+    this.proxy = proxy;
+  }
+
+  /* @inheritDoc */
+  @Override
+  public Socket createSocket() throws IOException {
+
+    return new Socket(proxy);
+  }
+
+  /* @inheritDoc */
+  @Override
+  public Socket createSocket(InetAddress addr, int port) throws IOException {
+
+    Socket socket = createSocket();
+    socket.connect(new InetSocketAddress(addr, port));
+    return socket;
+  }
+
+  /* @inheritDoc */
+  @Override
+  public Socket createSocket(InetAddress addr, int port,
+      InetAddress localHostAddr, int localPort) throws IOException {
+
+    Socket socket = createSocket();
+    socket.bind(new InetSocketAddress(localHostAddr, localPort));
+    socket.connect(new InetSocketAddress(addr, port));
+    return socket;
+  }
+
+  /* @inheritDoc */
+  @Override
+  public Socket createSocket(String host, int port) throws IOException,
+      UnknownHostException {
+
+    Socket socket = createSocket();
+    socket.connect(new InetSocketAddress(host, port));
+    return socket;
+  }
+
+  /* @inheritDoc */
+  @Override
+  public Socket createSocket(String host, int port,
+      InetAddress localHostAddr, int localPort) throws IOException,
+      UnknownHostException {
+
+    Socket socket = createSocket();
+    socket.bind(new InetSocketAddress(localHostAddr, localPort));
+    socket.connect(new InetSocketAddress(host, port));
+    return socket;
+  }
+
+  /* @inheritDoc */
+  @Override
+  public int hashCode() {
+    return proxy.hashCode();
+  }
+
+  /* @inheritDoc */
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (!(obj instanceof SocksSocketFactory))
+      return false;
+    final SocksSocketFactory other = (SocksSocketFactory) obj;
+    if (proxy == null) {
+      if (other.proxy != null)
+        return false;
+    } else if (!proxy.equals(other.proxy))
+      return false;
+    return true;
+  }
+
+  /* @inheritDoc */
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  /* @inheritDoc */
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    String proxyStr = conf.get("hadoop.socks.server");
+    if ((proxyStr != null) && (proxyStr.length() > 0)) {
+      setProxy(proxyStr);
+    }
+  }
+
+  /**
+   * Set the proxy of this socket factory as described in the string
+   * parameter
+   * 
+   * @param proxyStr the proxy address using the format "host:port"
+   */
+  private void setProxy(String proxyStr) {
+    String[] strs = proxyStr.split(":", 2);
+    if (strs.length != 2)
+      throw new RuntimeException("Bad SOCKS proxy parameter: " + proxyStr);
+    String host = strs[0];
+    int port = Integer.parseInt(strs[1]);
+    this.proxy =
+        new Proxy(Proxy.Type.SOCKS, InetSocketAddress.createUnresolved(host,
+            port));
+  }
+}
diff --git a/src/java/org/apache/hadoop/net/StandardSocketFactory.java b/src/java/org/apache/hadoop/net/StandardSocketFactory.java
new file mode 100644
index 00000000000..b95258557e9
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/StandardSocketFactory.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.Socket;
+import java.net.UnknownHostException;
+import java.nio.channels.SocketChannel;
+
+import javax.net.SocketFactory;
+
+/**
+ * Specialized SocketFactory to create sockets with a SOCKS proxy
+ */
+public class StandardSocketFactory extends SocketFactory {
+
+  /**
+   * Default empty constructor (for use with the reflection API).
+   */
+  public StandardSocketFactory() {
+  }
+
+  /* @inheritDoc */
+  @Override
+  public Socket createSocket() throws IOException {
+    /*
+     * NOTE: This returns an NIO socket so that it has an associated 
+     * SocketChannel. As of now, this unfortunately makes streams returned
+     * by Socket.getInputStream() and Socket.getOutputStream() unusable
+     * (because a blocking read on input stream blocks write on output stream
+     * and vice versa).
+     * 
+     * So users of these socket factories should use 
+     * NetUtils.getInputStream(socket) and 
+     * NetUtils.getOutputStream(socket) instead.
+     * 
+     * A solution for hiding from this from user is to write a 
+     * 'FilterSocket' on the lines of FilterInputStream and extend it by
+     * overriding getInputStream() and getOutputStream().
+     */
+    return SocketChannel.open().socket();
+  }
+
+  /* @inheritDoc */
+  @Override
+  public Socket createSocket(InetAddress addr, int port) throws IOException {
+
+    Socket socket = createSocket();
+    socket.connect(new InetSocketAddress(addr, port));
+    return socket;
+  }
+
+  /* @inheritDoc */
+  @Override
+  public Socket createSocket(InetAddress addr, int port,
+      InetAddress localHostAddr, int localPort) throws IOException {
+
+    Socket socket = createSocket();
+    socket.bind(new InetSocketAddress(localHostAddr, localPort));
+    socket.connect(new InetSocketAddress(addr, port));
+    return socket;
+  }
+
+  /* @inheritDoc */
+  @Override
+  public Socket createSocket(String host, int port) throws IOException,
+      UnknownHostException {
+
+    Socket socket = createSocket();
+    socket.connect(new InetSocketAddress(host, port));
+    return socket;
+  }
+
+  /* @inheritDoc */
+  @Override
+  public Socket createSocket(String host, int port,
+      InetAddress localHostAddr, int localPort) throws IOException,
+      UnknownHostException {
+
+    Socket socket = createSocket();
+    socket.bind(new InetSocketAddress(localHostAddr, localPort));
+    socket.connect(new InetSocketAddress(host, port));
+    return socket;
+  }
+
+  /* @inheritDoc */
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (!(obj instanceof StandardSocketFactory))
+      return false;
+    return true;
+  }
+
+  /* @inheritDoc */
+  @Override
+  public int hashCode() {
+    // Dummy hash code (to make find bugs happy)
+    return 47;
+  } 
+  
+}
diff --git a/src/java/org/apache/hadoop/net/package.html b/src/java/org/apache/hadoop/net/package.html
new file mode 100644
index 00000000000..b4e5b5dbdc9
--- /dev/null
+++ b/src/java/org/apache/hadoop/net/package.html
@@ -0,0 +1,23 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+Network-related classes.
+</body>
+</html>
diff --git a/src/java/org/apache/hadoop/record/BinaryRecordInput.java b/src/java/org/apache/hadoop/record/BinaryRecordInput.java
new file mode 100644
index 00000000000..b750df9aed5
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/BinaryRecordInput.java
@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.DataInput;
+import java.io.IOException;
+import java.io.DataInputStream;
+import java.io.InputStream;
+
+/**
+ */
+public class BinaryRecordInput implements RecordInput {
+    
+  private DataInput in;
+    
+  static private class BinaryIndex implements Index {
+    private int nelems;
+    private BinaryIndex(int nelems) {
+      this.nelems = nelems;
+    }
+    public boolean done() {
+      return (nelems <= 0);
+    }
+    public void incr() {
+      nelems--;
+    }
+  }
+    
+  private BinaryRecordInput() {}
+    
+  private void setDataInput(DataInput inp) {
+    this.in = inp;
+  }
+    
+  private static ThreadLocal bIn = new ThreadLocal() {
+      protected synchronized Object initialValue() {
+        return new BinaryRecordInput();
+      }
+    };
+    
+  /**
+   * Get a thread-local record input for the supplied DataInput.
+   * @param inp data input stream
+   * @return binary record input corresponding to the supplied DataInput.
+   */
+  public static BinaryRecordInput get(DataInput inp) {
+    BinaryRecordInput bin = (BinaryRecordInput) bIn.get();
+    bin.setDataInput(inp);
+    return bin;
+  }
+    
+  /** Creates a new instance of BinaryRecordInput */
+  public BinaryRecordInput(InputStream strm) {
+    this.in = new DataInputStream(strm);
+  }
+    
+  /** Creates a new instance of BinaryRecordInput */
+  public BinaryRecordInput(DataInput din) {
+    this.in = din;
+  }
+    
+  public byte readByte(final String tag) throws IOException {
+    return in.readByte();
+  }
+    
+  public boolean readBool(final String tag) throws IOException {
+    return in.readBoolean();
+  }
+    
+  public int readInt(final String tag) throws IOException {
+    return Utils.readVInt(in);
+  }
+    
+  public long readLong(final String tag) throws IOException {
+    return Utils.readVLong(in);
+  }
+    
+  public float readFloat(final String tag) throws IOException {
+    return in.readFloat();
+  }
+    
+  public double readDouble(final String tag) throws IOException {
+    return in.readDouble();
+  }
+    
+  public String readString(final String tag) throws IOException {
+    return Utils.fromBinaryString(in);
+  }
+    
+  public Buffer readBuffer(final String tag) throws IOException {
+    final int len = Utils.readVInt(in);
+    final byte[] barr = new byte[len];
+    in.readFully(barr);
+    return new Buffer(barr);
+  }
+    
+  public void startRecord(final String tag) throws IOException {
+    // no-op
+  }
+    
+  public void endRecord(final String tag) throws IOException {
+    // no-op
+  }
+    
+  public Index startVector(final String tag) throws IOException {
+    return new BinaryIndex(readInt(tag));
+  }
+    
+  public void endVector(final String tag) throws IOException {
+    // no-op
+  }
+    
+  public Index startMap(final String tag) throws IOException {
+    return new BinaryIndex(readInt(tag));
+  }
+    
+  public void endMap(final String tag) throws IOException {
+    // no-op
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/BinaryRecordOutput.java b/src/java/org/apache/hadoop/record/BinaryRecordOutput.java
new file mode 100644
index 00000000000..a0586534590
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/BinaryRecordOutput.java
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.IOException;
+import java.util.TreeMap;
+import java.util.ArrayList;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.OutputStream;
+
+/**
+ */
+public class BinaryRecordOutput implements RecordOutput {
+    
+  private DataOutput out;
+    
+  private BinaryRecordOutput() {}
+    
+  private void setDataOutput(DataOutput out) {
+    this.out = out;
+  }
+    
+  private static ThreadLocal bOut = new ThreadLocal() {
+      protected synchronized Object initialValue() {
+        return new BinaryRecordOutput();
+      }
+    };
+    
+  /**
+   * Get a thread-local record output for the supplied DataOutput.
+   * @param out data output stream
+   * @return binary record output corresponding to the supplied DataOutput.
+   */
+  public static BinaryRecordOutput get(DataOutput out) {
+    BinaryRecordOutput bout = (BinaryRecordOutput) bOut.get();
+    bout.setDataOutput(out);
+    return bout;
+  }
+    
+  /** Creates a new instance of BinaryRecordOutput */
+  public BinaryRecordOutput(OutputStream out) {
+    this.out = new DataOutputStream(out);
+  }
+    
+  /** Creates a new instance of BinaryRecordOutput */
+  public BinaryRecordOutput(DataOutput out) {
+    this.out = out;
+  }
+    
+    
+  public void writeByte(byte b, String tag) throws IOException {
+    out.writeByte(b);
+  }
+    
+  public void writeBool(boolean b, String tag) throws IOException {
+    out.writeBoolean(b);
+  }
+    
+  public void writeInt(int i, String tag) throws IOException {
+    Utils.writeVInt(out, i);
+  }
+    
+  public void writeLong(long l, String tag) throws IOException {
+    Utils.writeVLong(out, l);
+  }
+    
+  public void writeFloat(float f, String tag) throws IOException {
+    out.writeFloat(f);
+  }
+    
+  public void writeDouble(double d, String tag) throws IOException {
+    out.writeDouble(d);
+  }
+    
+  public void writeString(String s, String tag) throws IOException {
+    Utils.toBinaryString(out, s);
+  }
+    
+  public void writeBuffer(Buffer buf, String tag)
+    throws IOException {
+    byte[] barr = buf.get();
+    int len = buf.getCount();
+    Utils.writeVInt(out, len);
+    out.write(barr, 0, len);
+  }
+    
+  public void startRecord(Record r, String tag) throws IOException {}
+    
+  public void endRecord(Record r, String tag) throws IOException {}
+    
+  public void startVector(ArrayList v, String tag) throws IOException {
+    writeInt(v.size(), tag);
+  }
+    
+  public void endVector(ArrayList v, String tag) throws IOException {}
+    
+  public void startMap(TreeMap v, String tag) throws IOException {
+    writeInt(v.size(), tag);
+  }
+    
+  public void endMap(TreeMap v, String tag) throws IOException {}
+    
+}
diff --git a/src/java/org/apache/hadoop/record/Buffer.java b/src/java/org/apache/hadoop/record/Buffer.java
new file mode 100644
index 00000000000..d0fa95d0b48
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/Buffer.java
@@ -0,0 +1,246 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.UnsupportedEncodingException;
+
+/**
+ * A byte sequence that is used as a Java native type for buffer.
+ * It is resizable and distinguishes between the count of the seqeunce and
+ * the current capacity.
+ * 
+ */
+public class Buffer implements Comparable, Cloneable {
+  /** Number of valid bytes in this.bytes. */
+  private int count;
+  /** Backing store for Buffer. */
+  private byte[] bytes = null;
+
+  /**
+   * Create a zero-count sequence.
+   */
+  public Buffer() {
+    this.count = 0;
+  }
+
+  /**
+   * Create a Buffer using the byte array as the initial value.
+   *
+   * @param bytes This array becomes the backing storage for the object.
+   */
+  public Buffer(byte[] bytes) {
+    this.bytes = bytes;
+    this.count = (bytes == null) ? 0 : bytes.length;
+  }
+  
+  /**
+   * Create a Buffer using the byte range as the initial value.
+   *
+   * @param bytes Copy of this array becomes the backing storage for the object.
+   * @param offset offset into byte array
+   * @param length length of data
+   */
+  public Buffer(byte[] bytes, int offset, int length) {
+    copy(bytes, offset, length);
+  }
+  
+  
+  /**
+   * Use the specified bytes array as underlying sequence.
+   *
+   * @param bytes byte sequence
+   */
+  public void set(byte[] bytes) {
+    this.count = (bytes == null) ? 0 : bytes.length;
+    this.bytes = bytes;
+  }
+  
+  /**
+   * Copy the specified byte array to the Buffer. Replaces the current buffer.
+   *
+   * @param bytes byte array to be assigned
+   * @param offset offset into byte array
+   * @param length length of data
+   */
+  public final void copy(byte[] bytes, int offset, int length) {
+    if (this.bytes == null || this.bytes.length < length) {
+      this.bytes = new byte[length];
+    }
+    System.arraycopy(bytes, offset, this.bytes, 0, length);
+    this.count = length;
+  }
+  
+  /**
+   * Get the data from the Buffer.
+   * 
+   * @return The data is only valid between 0 and getCount() - 1.
+   */
+  public byte[] get() {
+    if (bytes == null) {
+      bytes = new byte[0];
+    }
+    return bytes;
+  }
+  
+  /**
+   * Get the current count of the buffer.
+   */
+  public int getCount() {
+    return count;
+  }
+  
+  /**
+   * Get the capacity, which is the maximum count that could handled without
+   * resizing the backing storage.
+   * 
+   * @return The number of bytes
+   */
+  public int getCapacity() {
+    return this.get().length;
+  }
+  
+  /**
+   * Change the capacity of the backing storage.
+   * The data is preserved if newCapacity >= getCount().
+   * @param newCapacity The new capacity in bytes.
+   */
+  public void setCapacity(int newCapacity) {
+    if (newCapacity < 0) {
+      throw new IllegalArgumentException("Invalid capacity argument "+newCapacity); 
+    }
+    if (newCapacity == 0) {
+      this.bytes = null;
+      this.count = 0;
+      return;
+    }
+    if (newCapacity != getCapacity()) {
+      byte[] data = new byte[newCapacity];
+      if (newCapacity < count) {
+        count = newCapacity;
+      }
+      if (count != 0) {
+        System.arraycopy(this.get(), 0, data, 0, count);
+      }
+      bytes = data;
+    }
+  }
+  
+  /**
+   * Reset the buffer to 0 size
+   */
+  public void reset() {
+    setCapacity(0);
+  }
+  
+  /**
+   * Change the capacity of the backing store to be the same as the current 
+   * count of buffer.
+   */
+  public void truncate() {
+    setCapacity(count);
+  }
+  
+  /**
+   * Append specified bytes to the buffer.
+   *
+   * @param bytes byte array to be appended
+   * @param offset offset into byte array
+   * @param length length of data
+
+  */
+  public void append(byte[] bytes, int offset, int length) {
+    setCapacity(count+length);
+    System.arraycopy(bytes, offset, this.get(), count, length);
+    count = count + length;
+  }
+  
+  /**
+   * Append specified bytes to the buffer
+   *
+   * @param bytes byte array to be appended
+   */
+  public void append(byte[] bytes) {
+    append(bytes, 0, bytes.length);
+  }
+  
+  // inherit javadoc
+  public int hashCode() {
+    int hash = 1;
+    byte[] b = this.get();
+    for (int i = 0; i < count; i++)
+      hash = (31 * hash) + (int)b[i];
+    return hash;
+  }
+  
+  /**
+   * Define the sort order of the Buffer.
+   * 
+   * @param other The other buffer
+   * @return Positive if this is bigger than other, 0 if they are equal, and
+   *         negative if this is smaller than other.
+   */
+  public int compareTo(Object other) {
+    Buffer right = ((Buffer) other);
+    byte[] lb = this.get();
+    byte[] rb = right.get();
+    for (int i = 0; i < count && i < right.count; i++) {
+      int a = (lb[i] & 0xff);
+      int b = (rb[i] & 0xff);
+      if (a != b) {
+        return a - b;
+      }
+    }
+    return count - right.count;
+  }
+  
+  // inherit javadoc
+  public boolean equals(Object other) {
+    if (other instanceof Buffer && this != other) {
+      return compareTo(other) == 0;
+    }
+    return (this == other);
+  }
+  
+  // inheric javadoc
+  public String toString() {
+    StringBuffer sb = new StringBuffer(2*count);
+    for(int idx = 0; idx < count; idx++) {
+      sb.append(Character.forDigit((bytes[idx] & 0xF0) >> 4, 16));
+      sb.append(Character.forDigit(bytes[idx] & 0x0F, 16));
+    }
+    return sb.toString();
+  }
+  
+  /**
+   * Convert the byte buffer to a string an specific character encoding
+   *
+   * @param charsetName Valid Java Character Set Name
+   */
+  public String toString(String charsetName)
+    throws UnsupportedEncodingException {
+    return new String(this.get(), 0, this.getCount(), charsetName);
+  }
+  
+  // inherit javadoc
+  public Object clone() throws CloneNotSupportedException {
+    Buffer result = (Buffer) super.clone();
+    result.copy(this.get(), 0, this.getCount());
+    return result;
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/CsvRecordInput.java b/src/java/org/apache/hadoop/record/CsvRecordInput.java
new file mode 100644
index 00000000000..e9c538d28c6
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/CsvRecordInput.java
@@ -0,0 +1,200 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.InputStreamReader;
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.UnsupportedEncodingException;
+
+/**
+ */
+public class CsvRecordInput implements RecordInput {
+    
+  private PushbackReader stream;
+    
+  private class CsvIndex implements Index {
+    public boolean done() {
+      char c = '\0';
+      try {
+        c = (char) stream.read();
+        stream.unread(c);
+      } catch (IOException ex) {
+      }
+      return (c == '}') ? true : false;
+    }
+    public void incr() {}
+  }
+    
+  private void throwExceptionOnError(String tag) throws IOException {
+    throw new IOException("Error deserializing "+tag);
+  }
+    
+  private String readField(String tag) throws IOException {
+    try {
+      StringBuffer buf = new StringBuffer();
+      while (true) {
+        char c = (char) stream.read();
+        switch (c) {
+        case ',':
+          return buf.toString();
+        case '}':
+        case '\n':
+        case '\r':
+          stream.unread(c);
+          return buf.toString();
+        default:
+          buf.append(c);
+        }
+      }
+    } catch (IOException ex) {
+      throw new IOException("Error reading "+tag);
+    }
+  }
+    
+  /** Creates a new instance of CsvRecordInput */
+  public CsvRecordInput(InputStream in) {
+    try {
+      stream = new PushbackReader(new InputStreamReader(in, "UTF-8"));
+    } catch (UnsupportedEncodingException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+    
+  public byte readByte(String tag) throws IOException {
+    return (byte) readLong(tag);
+  }
+    
+  public boolean readBool(String tag) throws IOException {
+    String sval = readField(tag);
+    return "T".equals(sval) ? true : false;
+  }
+    
+  public int readInt(String tag) throws IOException {
+    return (int) readLong(tag);
+  }
+    
+  public long readLong(String tag) throws IOException {
+    String sval = readField(tag);
+    try {
+      long lval = Long.parseLong(sval);
+      return lval;
+    } catch (NumberFormatException ex) {
+      throw new IOException("Error deserializing "+tag);
+    }
+  }
+    
+  public float readFloat(String tag) throws IOException {
+    return (float) readDouble(tag);
+  }
+    
+  public double readDouble(String tag) throws IOException {
+    String sval = readField(tag);
+    try {
+      double dval = Double.parseDouble(sval);
+      return dval;
+    } catch (NumberFormatException ex) {
+      throw new IOException("Error deserializing "+tag);
+    }
+  }
+    
+  public String readString(String tag) throws IOException {
+    String sval = readField(tag);
+    return Utils.fromCSVString(sval);
+  }
+    
+  public Buffer readBuffer(String tag) throws IOException {
+    String sval = readField(tag);
+    return Utils.fromCSVBuffer(sval);
+  }
+    
+  public void startRecord(String tag) throws IOException {
+    if (tag != null && !"".equals(tag)) {
+      char c1 = (char) stream.read();
+      char c2 = (char) stream.read();
+      if (c1 != 's' || c2 != '{') {
+        throw new IOException("Error deserializing "+tag);
+      }
+    }
+  }
+    
+  public void endRecord(String tag) throws IOException {
+    char c = (char) stream.read();
+    if (tag == null || "".equals(tag)) {
+      if (c != '\n' && c != '\r') {
+        throw new IOException("Error deserializing record.");
+      } else {
+        return;
+      }
+    }
+        
+    if (c != '}') {
+      throw new IOException("Error deserializing "+tag);
+    }
+    c = (char) stream.read();
+    if (c != ',') {
+      stream.unread(c);
+    }
+        
+    return;
+  }
+    
+  public Index startVector(String tag) throws IOException {
+    char c1 = (char) stream.read();
+    char c2 = (char) stream.read();
+    if (c1 != 'v' || c2 != '{') {
+      throw new IOException("Error deserializing "+tag);
+    }
+    return new CsvIndex();
+  }
+    
+  public void endVector(String tag) throws IOException {
+    char c = (char) stream.read();
+    if (c != '}') {
+      throw new IOException("Error deserializing "+tag);
+    }
+    c = (char) stream.read();
+    if (c != ',') {
+      stream.unread(c);
+    }
+    return;
+  }
+    
+  public Index startMap(String tag) throws IOException {
+    char c1 = (char) stream.read();
+    char c2 = (char) stream.read();
+    if (c1 != 'm' || c2 != '{') {
+      throw new IOException("Error deserializing "+tag);
+    }
+    return new CsvIndex();
+  }
+    
+  public void endMap(String tag) throws IOException {
+    char c = (char) stream.read();
+    if (c != '}') {
+      throw new IOException("Error deserializing "+tag);
+    }
+    c = (char) stream.read();
+    if (c != ',') {
+      stream.unread(c);
+    }
+    return;
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/CsvRecordOutput.java b/src/java/org/apache/hadoop/record/CsvRecordOutput.java
new file mode 100644
index 00000000000..f2c6be6f4f9
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/CsvRecordOutput.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.IOException;
+import java.util.TreeMap;
+import java.util.ArrayList;
+import java.io.PrintStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+
+/**
+ */
+public class CsvRecordOutput implements RecordOutput {
+
+  private PrintStream stream;
+  private boolean isFirst = true;
+    
+  private void throwExceptionOnError(String tag) throws IOException {
+    if (stream.checkError()) {
+      throw new IOException("Error serializing "+tag);
+    }
+  }
+ 
+  private void printCommaUnlessFirst() {
+    if (!isFirst) {
+      stream.print(",");
+    }
+    isFirst = false;
+  }
+    
+  /** Creates a new instance of CsvRecordOutput */
+  public CsvRecordOutput(OutputStream out) {
+    try {
+      stream = new PrintStream(out, true, "UTF-8");
+    } catch (UnsupportedEncodingException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+    
+  public void writeByte(byte b, String tag) throws IOException {
+    writeLong((long)b, tag);
+  }
+    
+  public void writeBool(boolean b, String tag) throws IOException {
+    printCommaUnlessFirst();
+    String val = b ? "T" : "F";
+    stream.print(val);
+    throwExceptionOnError(tag);
+  }
+    
+  public void writeInt(int i, String tag) throws IOException {
+    writeLong((long)i, tag);
+  }
+    
+  public void writeLong(long l, String tag) throws IOException {
+    printCommaUnlessFirst();
+    stream.print(l);
+    throwExceptionOnError(tag);
+  }
+    
+  public void writeFloat(float f, String tag) throws IOException {
+    writeDouble((double)f, tag);
+  }
+    
+  public void writeDouble(double d, String tag) throws IOException {
+    printCommaUnlessFirst();
+    stream.print(d);
+    throwExceptionOnError(tag);
+  }
+    
+  public void writeString(String s, String tag) throws IOException {
+    printCommaUnlessFirst();
+    stream.print(Utils.toCSVString(s));
+    throwExceptionOnError(tag);
+  }
+    
+  public void writeBuffer(Buffer buf, String tag)
+    throws IOException {
+    printCommaUnlessFirst();
+    stream.print(Utils.toCSVBuffer(buf));
+    throwExceptionOnError(tag);
+  }
+    
+  public void startRecord(Record r, String tag) throws IOException {
+    if (tag != null && !"".equals(tag)) {
+      printCommaUnlessFirst();
+      stream.print("s{");
+      isFirst = true;
+    }
+  }
+    
+  public void endRecord(Record r, String tag) throws IOException {
+    if (tag == null || "".equals(tag)) {
+      stream.print("\n");
+      isFirst = true;
+    } else {
+      stream.print("}");
+      isFirst = false;
+    }
+  }
+    
+  public void startVector(ArrayList v, String tag) throws IOException {
+    printCommaUnlessFirst();
+    stream.print("v{");
+    isFirst = true;
+  }
+    
+  public void endVector(ArrayList v, String tag) throws IOException {
+    stream.print("}");
+    isFirst = false;
+  }
+    
+  public void startMap(TreeMap v, String tag) throws IOException {
+    printCommaUnlessFirst();
+    stream.print("m{");
+    isFirst = true;
+  }
+    
+  public void endMap(TreeMap v, String tag) throws IOException {
+    stream.print("}");
+    isFirst = false;
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/Index.java b/src/java/org/apache/hadoop/record/Index.java
new file mode 100644
index 00000000000..82daecec03f
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/Index.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+/**
+ * Interface that acts as an iterator for deserializing maps.
+ * The deserializer returns an instance that the record uses to
+ * read vectors and maps. An example of usage is as follows:
+ *
+ * <code>
+ * Index idx = startVector(...);
+ * while (!idx.done()) {
+ *   .... // read element of a vector
+ *   idx.incr();
+ * }
+ * </code>
+ */
+public interface Index {
+  boolean done();
+  void incr();
+}
diff --git a/src/java/org/apache/hadoop/record/Record.java b/src/java/org/apache/hadoop/record/Record.java
new file mode 100644
index 00000000000..794b2597599
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/Record.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * Abstract class that is extended by generated classes.
+ * 
+ */
+public abstract class Record implements WritableComparable, Cloneable {
+  
+  /**
+   * Serialize a record with tag (ususally field name)
+   * @param rout Record output destination
+   * @param tag record tag (Used only in tagged serialization e.g. XML)
+   */
+  public abstract void serialize(RecordOutput rout, String tag)
+    throws IOException;
+  
+  /**
+   * Deserialize a record with a tag (usually field name)
+   * @param rin Record input source
+   * @param tag Record tag (Used only in tagged serialization e.g. XML)
+   */
+  public abstract void deserialize(RecordInput rin, String tag)
+    throws IOException;
+  
+  // inheric javadoc
+  public abstract int compareTo (final Object peer) throws ClassCastException;
+  
+  /**
+   * Serialize a record without a tag
+   * @param rout Record output destination
+   */
+  public void serialize(RecordOutput rout) throws IOException {
+    this.serialize(rout, "");
+  }
+  
+  /**
+   * Deserialize a record without a tag
+   * @param rin Record input source
+   */
+  public void deserialize(RecordInput rin) throws IOException {
+    this.deserialize(rin, "");
+  }
+  
+  // inherit javadoc
+  public void write(final DataOutput out) throws java.io.IOException {
+    BinaryRecordOutput bout = BinaryRecordOutput.get(out);
+    this.serialize(bout);
+  }
+  
+  // inherit javadoc
+  public void readFields(final DataInput din) throws java.io.IOException {
+    BinaryRecordInput rin = BinaryRecordInput.get(din);
+    this.deserialize(rin);
+  }
+
+  // inherit javadoc
+  public String toString() {
+    try {
+      ByteArrayOutputStream s = new ByteArrayOutputStream();
+      CsvRecordOutput a = new CsvRecordOutput(s);
+      this.serialize(a);
+      return new String(s.toByteArray(), "UTF-8");
+    } catch (Throwable ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/RecordComparator.java b/src/java/org/apache/hadoop/record/RecordComparator.java
new file mode 100644
index 00000000000..b2c2ea3e101
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/RecordComparator.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+
+/**
+ * A raw record comparator base class
+ */
+public abstract class RecordComparator extends WritableComparator {
+  
+  /**
+   * Construct a raw {@link Record} comparison implementation. */
+  protected RecordComparator(Class<? extends WritableComparable> recordClass) {
+    super(recordClass);
+  }
+  
+  // inheric JavaDoc
+  public abstract int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2);
+  
+  /**
+   * Register an optimized comparator for a {@link Record} implementation.
+   *
+   * @param c record classs for which a raw comparator is provided
+   * @param comparator Raw comparator instance for class c 
+   */
+  public static synchronized void define(Class c, RecordComparator comparator) {
+    WritableComparator.define(c, comparator);
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/RecordInput.java b/src/java/org/apache/hadoop/record/RecordInput.java
new file mode 100644
index 00000000000..f41f12eba11
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/RecordInput.java
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.IOException;
+
+/**
+ * Interface that all the Deserializers have to implement.
+ */
+public interface RecordInput {
+  /**
+   * Read a byte from serialized record.
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @return value read from serialized record.
+   */
+  byte readByte(String tag) throws IOException;
+  
+  /**
+   * Read a boolean from serialized record.
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @return value read from serialized record.
+   */
+  boolean readBool(String tag) throws IOException;
+  
+  /**
+   * Read an integer from serialized record.
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @return value read from serialized record.
+   */
+  int readInt(String tag) throws IOException;
+  
+  /**
+   * Read a long integer from serialized record.
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @return value read from serialized record.
+   */
+  long readLong(String tag) throws IOException;
+  
+  /**
+   * Read a single-precision float from serialized record.
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @return value read from serialized record.
+   */
+  float readFloat(String tag) throws IOException;
+  
+  /**
+   * Read a double-precision number from serialized record.
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @return value read from serialized record.
+   */
+  double readDouble(String tag) throws IOException;
+  
+  /**
+   * Read a UTF-8 encoded string from serialized record.
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @return value read from serialized record.
+   */
+  String readString(String tag) throws IOException;
+  
+  /**
+   * Read byte array from serialized record.
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @return value read from serialized record.
+   */
+  Buffer readBuffer(String tag) throws IOException;
+  
+  /**
+   * Check the mark for start of the serialized record.
+   * @param tag Used by tagged serialization formats (such as XML)
+   */
+  void startRecord(String tag) throws IOException;
+  
+  /**
+   * Check the mark for end of the serialized record.
+   * @param tag Used by tagged serialization formats (such as XML)
+   */
+  void endRecord(String tag) throws IOException;
+  
+  /**
+   * Check the mark for start of the serialized vector.
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @return Index that is used to count the number of elements.
+   */
+  Index startVector(String tag) throws IOException;
+  
+  /**
+   * Check the mark for end of the serialized vector.
+   * @param tag Used by tagged serialization formats (such as XML)
+   */
+  void endVector(String tag) throws IOException;
+  
+  /**
+   * Check the mark for start of the serialized map.
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @return Index that is used to count the number of map entries.
+   */
+  Index startMap(String tag) throws IOException;
+  
+  /**
+   * Check the mark for end of the serialized map.
+   * @param tag Used by tagged serialization formats (such as XML)
+   */
+  void endMap(String tag) throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/record/RecordOutput.java b/src/java/org/apache/hadoop/record/RecordOutput.java
new file mode 100644
index 00000000000..a8aba2f3d61
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/RecordOutput.java
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.IOException;
+import java.util.TreeMap;
+import java.util.ArrayList;
+
+/**
+ * Interface that alll the serializers have to implement.
+ */
+public interface RecordOutput {
+  /**
+   * Write a byte to serialized record.
+   * @param b Byte to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void writeByte(byte b, String tag) throws IOException;
+  
+  /**
+   * Write a boolean to serialized record.
+   * @param b Boolean to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void writeBool(boolean b, String tag) throws IOException;
+  
+  /**
+   * Write an integer to serialized record.
+   * @param i Integer to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void writeInt(int i, String tag) throws IOException;
+  
+  /**
+   * Write a long integer to serialized record.
+   * @param l Long to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void writeLong(long l, String tag) throws IOException;
+  
+  /**
+   * Write a single-precision float to serialized record.
+   * @param f Float to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void writeFloat(float f, String tag) throws IOException;
+  
+  /**
+   * Write a double precision floating point number to serialized record.
+   * @param d Double to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void writeDouble(double d, String tag) throws IOException;
+  
+  /**
+   * Write a unicode string to serialized record.
+   * @param s String to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void writeString(String s, String tag) throws IOException;
+  
+  /**
+   * Write a buffer to serialized record.
+   * @param buf Buffer to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void writeBuffer(Buffer buf, String tag)
+    throws IOException;
+  
+  /**
+   * Mark the start of a record to be serialized.
+   * @param r Record to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void startRecord(Record r, String tag) throws IOException;
+  
+  /**
+   * Mark the end of a serialized record.
+   * @param r Record to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void endRecord(Record r, String tag) throws IOException;
+  
+  /**
+   * Mark the start of a vector to be serialized.
+   * @param v Vector to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void startVector(ArrayList v, String tag) throws IOException;
+  
+  /**
+   * Mark the end of a serialized vector.
+   * @param v Vector to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void endVector(ArrayList v, String tag) throws IOException;
+  
+  /**
+   * Mark the start of a map to be serialized.
+   * @param m Map to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void startMap(TreeMap m, String tag) throws IOException;
+  
+  /**
+   * Mark the end of a serialized map.
+   * @param m Map to be serialized
+   * @param tag Used by tagged serialization formats (such as XML)
+   * @throws IOException Indicates error in serialization
+   */
+  public void endMap(TreeMap m, String tag) throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/record/Utils.java b/src/java/org/apache/hadoop/record/Utils.java
new file mode 100644
index 00000000000..1e8d8277a98
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/Utils.java
@@ -0,0 +1,490 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.io.WritableUtils;
+
+/**
+ * Various utility functions for Hadooop record I/O runtime.
+ */
+public class Utils {
+  
+  /** Cannot create a new instance of Utils */
+  private Utils() {
+  }
+  
+  public static final char[] hexchars = { '0', '1', '2', '3', '4', '5',
+                                          '6', '7', '8', '9', 'A', 'B',
+                                          'C', 'D', 'E', 'F' };
+  /**
+   *
+   * @param s
+   * @return
+   */
+  static String toXMLString(String s) {
+    StringBuffer sb = new StringBuffer();
+    for (int idx = 0; idx < s.length(); idx++) {
+      char ch = s.charAt(idx);
+      if (ch == '<') {
+        sb.append("&lt;");
+      } else if (ch == '&') {
+        sb.append("&amp;");
+      } else if (ch == '%') {
+        sb.append("%0025");
+      } else if (ch < 0x20 ||
+                 (ch > 0xD7FF && ch < 0xE000) ||
+                 (ch > 0xFFFD)) {
+        sb.append("%");
+        sb.append(hexchars[(ch & 0xF000) >> 12]);
+        sb.append(hexchars[(ch & 0x0F00) >> 8]);
+        sb.append(hexchars[(ch & 0x00F0) >> 4]);
+        sb.append(hexchars[(ch & 0x000F)]);
+      } else {
+        sb.append(ch);
+      }
+    }
+    return sb.toString();
+  }
+  
+  static private int h2c(char ch) {
+    if (ch >= '0' && ch <= '9') {
+      return ch - '0';
+    } else if (ch >= 'A' && ch <= 'F') {
+      return ch - 'A' + 10;
+    } else if (ch >= 'a' && ch <= 'f') {
+      return ch - 'a' + 10;
+    }
+    return 0;
+  }
+  
+  /**
+   *
+   * @param s
+   * @return
+   */
+  static String fromXMLString(String s) {
+    StringBuffer sb = new StringBuffer();
+    for (int idx = 0; idx < s.length();) {
+      char ch = s.charAt(idx++);
+      if (ch == '%') {
+        int ch1 = h2c(s.charAt(idx++)) << 12;
+        int ch2 = h2c(s.charAt(idx++)) << 8;
+        int ch3 = h2c(s.charAt(idx++)) << 4;
+        int ch4 = h2c(s.charAt(idx++));
+        char res = (char)(ch1 | ch2 | ch3 | ch4);
+        sb.append(res);
+      } else {
+        sb.append(ch);
+      }
+    }
+    return sb.toString();
+  }
+  
+  /**
+   *
+   * @param s
+   * @return
+   */
+  static String toCSVString(String s) {
+    StringBuffer sb = new StringBuffer(s.length()+1);
+    sb.append('\'');
+    int len = s.length();
+    for (int i = 0; i < len; i++) {
+      char c = s.charAt(i);
+      switch(c) {
+      case '\0':
+        sb.append("%00");
+        break;
+      case '\n':
+        sb.append("%0A");
+        break;
+      case '\r':
+        sb.append("%0D");
+        break;
+      case ',':
+        sb.append("%2C");
+        break;
+      case '}':
+        sb.append("%7D");
+        break;
+      case '%':
+        sb.append("%25");
+        break;
+      default:
+        sb.append(c);
+      }
+    }
+    return sb.toString();
+  }
+  
+  /**
+   *
+   * @param s
+   * @throws java.io.IOException
+   * @return
+   */
+  static String fromCSVString(String s) throws IOException {
+    if (s.charAt(0) != '\'') {
+      throw new IOException("Error deserializing string.");
+    }
+    int len = s.length();
+    StringBuffer sb = new StringBuffer(len-1);
+    for (int i = 1; i < len; i++) {
+      char c = s.charAt(i);
+      if (c == '%') {
+        char ch1 = s.charAt(i+1);
+        char ch2 = s.charAt(i+2);
+        i += 2;
+        if (ch1 == '0' && ch2 == '0') {
+          sb.append('\0');
+        } else if (ch1 == '0' && ch2 == 'A') {
+          sb.append('\n');
+        } else if (ch1 == '0' && ch2 == 'D') {
+          sb.append('\r');
+        } else if (ch1 == '2' && ch2 == 'C') {
+          sb.append(',');
+        } else if (ch1 == '7' && ch2 == 'D') {
+          sb.append('}');
+        } else if (ch1 == '2' && ch2 == '5') {
+          sb.append('%');
+        } else {
+          throw new IOException("Error deserializing string.");
+        }
+      } else {
+        sb.append(c);
+      }
+    }
+    return sb.toString();
+  }
+  
+  /**
+   *
+   * @param s
+   * @return
+   */
+  static String toXMLBuffer(Buffer s) {
+    return s.toString();
+  }
+  
+  /**
+   *
+   * @param s
+   * @throws java.io.IOException
+   * @return
+   */
+  static Buffer fromXMLBuffer(String s)
+    throws IOException {
+    if (s.length() == 0) { return new Buffer(); }
+    int blen = s.length()/2;
+    byte[] barr = new byte[blen];
+    for (int idx = 0; idx < blen; idx++) {
+      char c1 = s.charAt(2*idx);
+      char c2 = s.charAt(2*idx+1);
+      barr[idx] = (byte)Integer.parseInt(""+c1+c2, 16);
+    }
+    return new Buffer(barr);
+  }
+  
+  /**
+   *
+   * @param buf
+   * @return
+   */
+  static String toCSVBuffer(Buffer buf) {
+    StringBuffer sb = new StringBuffer("#");
+    sb.append(buf.toString());
+    return sb.toString();
+  }
+  
+  /**
+   * Converts a CSV-serialized representation of buffer to a new
+   * Buffer
+   * @param s CSV-serialized representation of buffer
+   * @throws java.io.IOException
+   * @return Deserialized Buffer
+   */
+  static Buffer fromCSVBuffer(String s)
+    throws IOException {
+    if (s.charAt(0) != '#') {
+      throw new IOException("Error deserializing buffer.");
+    }
+    if (s.length() == 1) { return new Buffer(); }
+    int blen = (s.length()-1)/2;
+    byte[] barr = new byte[blen];
+    for (int idx = 0; idx < blen; idx++) {
+      char c1 = s.charAt(2*idx+1);
+      char c2 = s.charAt(2*idx+2);
+      barr[idx] = (byte)Integer.parseInt(""+c1+c2, 16);
+    }
+    return new Buffer(barr);
+  }
+  
+  private static int utf8LenForCodePoint(final int cpt) throws IOException {
+    if (cpt >=0 && cpt <= 0x7F) {
+      return 1;
+    }
+    if (cpt >= 0x80 && cpt <= 0x07FF) {
+      return 2;
+    }
+    if ((cpt >= 0x0800 && cpt < 0xD800) ||
+        (cpt > 0xDFFF && cpt <= 0xFFFD)) {
+      return 3;
+    }
+    if (cpt >= 0x10000 && cpt <= 0x10FFFF) {
+      return 4;
+    }
+    throw new IOException("Illegal Unicode Codepoint "+
+                          Integer.toHexString(cpt)+" in string.");
+  }
+  
+  private static final int B10 =    Integer.parseInt("10000000", 2);
+  private static final int B110 =   Integer.parseInt("11000000", 2);
+  private static final int B1110 =  Integer.parseInt("11100000", 2);
+  private static final int B11110 = Integer.parseInt("11110000", 2);
+  private static final int B11 =    Integer.parseInt("11000000", 2);
+  private static final int B111 =   Integer.parseInt("11100000", 2);
+  private static final int B1111 =  Integer.parseInt("11110000", 2);
+  private static final int B11111 = Integer.parseInt("11111000", 2);
+  
+  private static int writeUtf8(int cpt, final byte[] bytes, final int offset)
+    throws IOException {
+    if (cpt >=0 && cpt <= 0x7F) {
+      bytes[offset] = (byte) cpt;
+      return 1;
+    }
+    if (cpt >= 0x80 && cpt <= 0x07FF) {
+      bytes[offset+1] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset] = (byte) (B110 | (cpt & 0x1F));
+      return 2;
+    }
+    if ((cpt >= 0x0800 && cpt < 0xD800) ||
+        (cpt > 0xDFFF && cpt <= 0xFFFD)) {
+      bytes[offset+2] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset+1] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset] = (byte) (B1110 | (cpt & 0x0F));
+      return 3;
+    }
+    if (cpt >= 0x10000 && cpt <= 0x10FFFF) {
+      bytes[offset+3] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset+2] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset+1] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset] = (byte) (B11110 | (cpt & 0x07));
+      return 4;
+    }
+    throw new IOException("Illegal Unicode Codepoint "+
+                          Integer.toHexString(cpt)+" in string.");
+  }
+  
+  static void toBinaryString(final DataOutput out, final String str)
+    throws IOException {
+    final int strlen = str.length();
+    byte[] bytes = new byte[strlen*4]; // Codepoints expand to 4 bytes max
+    int utf8Len = 0;
+    int idx = 0;
+    while(idx < strlen) {
+      final int cpt = str.codePointAt(idx);
+      idx += Character.isSupplementaryCodePoint(cpt) ? 2 : 1;
+      utf8Len += writeUtf8(cpt, bytes, utf8Len);
+    }
+    writeVInt(out, utf8Len);
+    out.write(bytes, 0, utf8Len);
+  }
+  
+  static boolean isValidCodePoint(int cpt) {
+    return !((cpt > 0x10FFFF) ||
+             (cpt >= 0xD800 && cpt <= 0xDFFF) ||
+             (cpt >= 0xFFFE && cpt <=0xFFFF));
+  }
+  
+  private static int utf8ToCodePoint(int b1, int b2, int b3, int b4) {
+    int cpt = 0;
+    cpt = (((b1 & ~B11111) << 18) |
+           ((b2 & ~B11) << 12) |
+           ((b3 & ~B11) << 6) |
+           (b4 & ~B11));
+    return cpt;
+  }
+  
+  private static int utf8ToCodePoint(int b1, int b2, int b3) {
+    int cpt = 0;
+    cpt = (((b1 & ~B1111) << 12) | ((b2 & ~B11) << 6) | (b3 & ~B11));
+    return cpt;
+  }
+  
+  private static int utf8ToCodePoint(int b1, int b2) {
+    int cpt = 0;
+    cpt = (((b1 & ~B111) << 6) | (b2 & ~B11));
+    return cpt;
+  }
+  
+  private static void checkB10(int b) throws IOException {
+    if ((b & B11) != B10) {
+      throw new IOException("Invalid UTF-8 representation.");
+    }
+  }
+  
+  static String fromBinaryString(final DataInput din) throws IOException {
+    final int utf8Len = readVInt(din);
+    final byte[] bytes = new byte[utf8Len];
+    din.readFully(bytes);
+    int len = 0;
+    // For the most commmon case, i.e. ascii, numChars = utf8Len
+    StringBuilder sb = new StringBuilder(utf8Len);
+    while(len < utf8Len) {
+      int cpt = 0;
+      final int b1 = bytes[len++] & 0xFF;
+      if (b1 <= 0x7F) {
+        cpt = b1;
+      } else if ((b1 & B11111) == B11110) {
+        int b2 = bytes[len++] & 0xFF;
+        checkB10(b2);
+        int b3 = bytes[len++] & 0xFF;
+        checkB10(b3);
+        int b4 = bytes[len++] & 0xFF;
+        checkB10(b4);
+        cpt = utf8ToCodePoint(b1, b2, b3, b4);
+      } else if ((b1 & B1111) == B1110) {
+        int b2 = bytes[len++] & 0xFF;
+        checkB10(b2);
+        int b3 = bytes[len++] & 0xFF;
+        checkB10(b3);
+        cpt = utf8ToCodePoint(b1, b2, b3);
+      } else if ((b1 & B111) == B110) {
+        int b2 = bytes[len++] & 0xFF;
+        checkB10(b2);
+        cpt = utf8ToCodePoint(b1, b2);
+      } else {
+        throw new IOException("Invalid UTF-8 byte "+Integer.toHexString(b1)+
+                              " at offset "+(len-1)+" in length of "+utf8Len);
+      }
+      if (!isValidCodePoint(cpt)) {
+        throw new IOException("Illegal Unicode Codepoint "+
+                              Integer.toHexString(cpt)+" in stream.");
+      }
+      sb.appendCodePoint(cpt);
+    }
+    return sb.toString();
+  }
+  
+  /** Parse a float from a byte array. */
+  public static float readFloat(byte[] bytes, int start) {
+    return WritableComparator.readFloat(bytes, start);
+  }
+  
+  /** Parse a double from a byte array. */
+  public static double readDouble(byte[] bytes, int start) {
+    return WritableComparator.readDouble(bytes, start);
+  }
+  
+  /**
+   * Reads a zero-compressed encoded long from a byte array and returns it.
+   * @param bytes byte array with decode long
+   * @param start starting index
+   * @throws java.io.IOException
+   * @return deserialized long
+   */
+  public static long readVLong(byte[] bytes, int start) throws IOException {
+    return WritableComparator.readVLong(bytes, start);
+  }
+  
+  /**
+   * Reads a zero-compressed encoded integer from a byte array and returns it.
+   * @param bytes byte array with the encoded integer
+   * @param start start index
+   * @throws java.io.IOException
+   * @return deserialized integer
+   */
+  public static int readVInt(byte[] bytes, int start) throws IOException {
+    return WritableComparator.readVInt(bytes, start);
+  }
+  
+  /**
+   * Reads a zero-compressed encoded long from a stream and return it.
+   * @param in input stream
+   * @throws java.io.IOException
+   * @return deserialized long
+   */
+  public static long readVLong(DataInput in) throws IOException {
+    return WritableUtils.readVLong(in);
+  }
+  
+  /**
+   * Reads a zero-compressed encoded integer from a stream and returns it.
+   * @param in input stream
+   * @throws java.io.IOException
+   * @return deserialized integer
+   */
+  public static int readVInt(DataInput in) throws IOException {
+    return WritableUtils.readVInt(in);
+  }
+  
+  /**
+   * Get the encoded length if an integer is stored in a variable-length format
+   * @return the encoded length
+   */
+  public static int getVIntSize(long i) {
+    return WritableUtils.getVIntSize(i);
+  }
+  
+  /**
+   * Serializes a long to a binary stream with zero-compressed encoding.
+   * For -112 <= i <= 127, only one byte is used with the actual value.
+   * For other values of i, the first byte value indicates whether the
+   * long is positive or negative, and the number of bytes that follow.
+   * If the first byte value v is between -113 and -120, the following long
+   * is positive, with number of bytes that follow are -(v+112).
+   * If the first byte value v is between -121 and -128, the following long
+   * is negative, with number of bytes that follow are -(v+120). Bytes are
+   * stored in the high-non-zero-byte-first order.
+   *
+   * @param stream Binary output stream
+   * @param i Long to be serialized
+   * @throws java.io.IOException
+   */
+  public static void writeVLong(DataOutput stream, long i) throws IOException {
+    WritableUtils.writeVLong(stream, i);
+  }
+  
+  /**
+   * Serializes an int to a binary stream with zero-compressed encoding.
+   *
+   * @param stream Binary output stream
+   * @param i int to be serialized
+   * @throws java.io.IOException
+   */
+  public static void writeVInt(DataOutput stream, int i) throws IOException {
+    WritableUtils.writeVInt(stream, i);
+  }
+  
+  /** Lexicographic order of binary data. */
+  public static int compareBytes(byte[] b1, int s1, int l1,
+                                 byte[] b2, int s2, int l2) {
+    return WritableComparator.compareBytes(b1, s1, l1, b2, s2, l2);
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/XmlRecordInput.java b/src/java/org/apache/hadoop/record/XmlRecordInput.java
new file mode 100644
index 00000000000..5272c68727c
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/XmlRecordInput.java
@@ -0,0 +1,243 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.xml.sax.*;
+import org.xml.sax.helpers.DefaultHandler;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.parsers.SAXParser;
+
+/**
+ * XML Deserializer.
+ */
+public class XmlRecordInput implements RecordInput {
+    
+  static private class Value {
+    private String type;
+    private StringBuffer sb;
+        
+    public Value(String t) {
+      type = t;
+      sb = new StringBuffer();
+    }
+    public void addChars(char[] buf, int offset, int len) {
+      sb.append(buf, offset, len);
+    }
+    public String getValue() { return sb.toString(); }
+    public String getType() { return type; }
+  }
+    
+  private static class XMLParser extends DefaultHandler {
+    private boolean charsValid = false;
+        
+    private ArrayList<Value> valList;
+        
+    private XMLParser(ArrayList<Value> vlist) {
+      valList = vlist;
+    }
+        
+    public void startDocument() throws SAXException {}
+        
+    public void endDocument() throws SAXException {}
+        
+    public void startElement(String ns,
+                             String sname,
+                             String qname,
+                             Attributes attrs) throws SAXException {
+      charsValid = false;
+      if ("boolean".equals(qname) ||
+          "i4".equals(qname) ||
+          "int".equals(qname) ||
+          "string".equals(qname) ||
+          "double".equals(qname) ||
+          "ex:i1".equals(qname) ||
+          "ex:i8".equals(qname) ||
+          "ex:float".equals(qname)) {
+        charsValid = true;
+        valList.add(new Value(qname));
+      } else if ("struct".equals(qname) ||
+                 "array".equals(qname)) {
+        valList.add(new Value(qname));
+      }
+    }
+        
+    public void endElement(String ns,
+                           String sname,
+                           String qname) throws SAXException {
+      charsValid = false;
+      if ("struct".equals(qname) ||
+          "array".equals(qname)) {
+        valList.add(new Value("/"+qname));
+      }
+    }
+        
+    public void characters(char buf[], int offset, int len)
+      throws SAXException {
+      if (charsValid) {
+        Value v = valList.get(valList.size()-1);
+        v.addChars(buf, offset, len);
+      }
+    }
+        
+  }
+    
+  private class XmlIndex implements Index {
+    public boolean done() {
+      Value v = valList.get(vIdx);
+      if ("/array".equals(v.getType())) {
+        valList.set(vIdx, null);
+        vIdx++;
+        return true;
+      } else {
+        return false;
+      }
+    }
+    public void incr() {}
+  }
+    
+  private ArrayList<Value> valList;
+  private int vLen;
+  private int vIdx;
+    
+  private Value next() throws IOException {
+    if (vIdx < vLen) {
+      Value v = valList.get(vIdx);
+      valList.set(vIdx, null);
+      vIdx++;
+      return v;
+    } else {
+      throw new IOException("Error in deserialization.");
+    }
+  }
+    
+  /** Creates a new instance of XmlRecordInput */
+  public XmlRecordInput(InputStream in) {
+    try{
+      valList = new ArrayList<Value>();
+      DefaultHandler handler = new XMLParser(valList);
+      SAXParserFactory factory = SAXParserFactory.newInstance();
+      SAXParser parser = factory.newSAXParser();
+      parser.parse(in, handler);
+      vLen = valList.size();
+      vIdx = 0;
+    } catch (Exception ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+    
+  public byte readByte(String tag) throws IOException {
+    Value v = next();
+    if (!"ex:i1".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+    return Byte.parseByte(v.getValue());
+  }
+    
+  public boolean readBool(String tag) throws IOException {
+    Value v = next();
+    if (!"boolean".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+    return "1".equals(v.getValue());
+  }
+    
+  public int readInt(String tag) throws IOException {
+    Value v = next();
+    if (!"i4".equals(v.getType()) &&
+        !"int".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+    return Integer.parseInt(v.getValue());
+  }
+    
+  public long readLong(String tag) throws IOException {
+    Value v = next();
+    if (!"ex:i8".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+    return Long.parseLong(v.getValue());
+  }
+    
+  public float readFloat(String tag) throws IOException {
+    Value v = next();
+    if (!"ex:float".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+    return Float.parseFloat(v.getValue());
+  }
+    
+  public double readDouble(String tag) throws IOException {
+    Value v = next();
+    if (!"double".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+    return Double.parseDouble(v.getValue());
+  }
+    
+  public String readString(String tag) throws IOException {
+    Value v = next();
+    if (!"string".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+    return Utils.fromXMLString(v.getValue());
+  }
+    
+  public Buffer readBuffer(String tag) throws IOException {
+    Value v = next();
+    if (!"string".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+    return Utils.fromXMLBuffer(v.getValue());
+  }
+    
+  public void startRecord(String tag) throws IOException {
+    Value v = next();
+    if (!"struct".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+  }
+    
+  public void endRecord(String tag) throws IOException {
+    Value v = next();
+    if (!"/struct".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+  }
+    
+  public Index startVector(String tag) throws IOException {
+    Value v = next();
+    if (!"array".equals(v.getType())) {
+      throw new IOException("Error deserializing "+tag+".");
+    }
+    return new XmlIndex();
+  }
+    
+  public void endVector(String tag) throws IOException {}
+    
+  public Index startMap(String tag) throws IOException {
+    return startVector(tag);
+  }
+    
+  public void endMap(String tag) throws IOException { endVector(tag); }
+
+}
diff --git a/src/java/org/apache/hadoop/record/XmlRecordOutput.java b/src/java/org/apache/hadoop/record/XmlRecordOutput.java
new file mode 100644
index 00000000000..643ee1f225c
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/XmlRecordOutput.java
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record;
+
+import java.io.IOException;
+import java.util.TreeMap;
+import java.util.ArrayList;
+import java.io.PrintStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Stack;
+
+/**
+ * XML Serializer.
+ */
+public class XmlRecordOutput implements RecordOutput {
+
+  private PrintStream stream;
+    
+  private int indent = 0;
+    
+  private Stack<String> compoundStack;
+    
+  private void putIndent() {
+    StringBuffer sb = new StringBuffer("");
+    for (int idx = 0; idx < indent; idx++) {
+      sb.append("  ");
+    }
+    stream.print(sb.toString());
+  }
+    
+  private void addIndent() {
+    indent++;
+  }
+    
+  private void closeIndent() {
+    indent--;
+  }
+    
+  private void printBeginEnvelope(String tag) {
+    if (!compoundStack.empty()) {
+      String s = compoundStack.peek();
+      if ("struct".equals(s)) {
+        putIndent();
+        stream.print("<member>\n");
+        addIndent();
+        putIndent();
+        stream.print("<name>"+tag+"</name>\n");
+        putIndent();
+        stream.print("<value>");
+      } else if ("vector".equals(s)) {
+        stream.print("<value>");
+      } else if ("map".equals(s)) {
+        stream.print("<value>");
+      }
+    } else {
+      stream.print("<value>");
+    }
+  }
+    
+  private void printEndEnvelope(String tag) {
+    if (!compoundStack.empty()) {
+      String s = compoundStack.peek();
+      if ("struct".equals(s)) {
+        stream.print("</value>\n");
+        closeIndent();
+        putIndent();
+        stream.print("</member>\n");
+      } else if ("vector".equals(s)) {
+        stream.print("</value>\n");
+      } else if ("map".equals(s)) {
+        stream.print("</value>\n");
+      }
+    } else {
+      stream.print("</value>\n");
+    }
+  }
+    
+  private void insideVector(String tag) {
+    printBeginEnvelope(tag);
+    compoundStack.push("vector");
+  }
+    
+  private void outsideVector(String tag) throws IOException {
+    String s = compoundStack.pop();
+    if (!"vector".equals(s)) {
+      throw new IOException("Error serializing vector.");
+    }
+    printEndEnvelope(tag);
+  }
+    
+  private void insideMap(String tag) {
+    printBeginEnvelope(tag);
+    compoundStack.push("map");
+  }
+    
+  private void outsideMap(String tag) throws IOException {
+    String s = compoundStack.pop();
+    if (!"map".equals(s)) {
+      throw new IOException("Error serializing map.");
+    }
+    printEndEnvelope(tag);
+  }
+    
+  private void insideRecord(String tag) {
+    printBeginEnvelope(tag);
+    compoundStack.push("struct");
+  }
+    
+  private void outsideRecord(String tag) throws IOException {
+    String s = compoundStack.pop();
+    if (!"struct".equals(s)) {
+      throw new IOException("Error serializing record.");
+    }
+    printEndEnvelope(tag);
+  }
+    
+  /** Creates a new instance of XmlRecordOutput */
+  public XmlRecordOutput(OutputStream out) {
+    try {
+      stream = new PrintStream(out, true, "UTF-8");
+      compoundStack = new Stack<String>();
+    } catch (UnsupportedEncodingException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+    
+  public void writeByte(byte b, String tag) throws IOException {
+    printBeginEnvelope(tag);
+    stream.print("<ex:i1>");
+    stream.print(Byte.toString(b));
+    stream.print("</ex:i1>");
+    printEndEnvelope(tag);
+  }
+    
+  public void writeBool(boolean b, String tag) throws IOException {
+    printBeginEnvelope(tag);
+    stream.print("<boolean>");
+    stream.print(b ? "1" : "0");
+    stream.print("</boolean>");
+    printEndEnvelope(tag);
+  }
+    
+  public void writeInt(int i, String tag) throws IOException {
+    printBeginEnvelope(tag);
+    stream.print("<i4>");
+    stream.print(Integer.toString(i));
+    stream.print("</i4>");
+    printEndEnvelope(tag);
+  }
+    
+  public void writeLong(long l, String tag) throws IOException {
+    printBeginEnvelope(tag);
+    stream.print("<ex:i8>");
+    stream.print(Long.toString(l));
+    stream.print("</ex:i8>");
+    printEndEnvelope(tag);
+  }
+    
+  public void writeFloat(float f, String tag) throws IOException {
+    printBeginEnvelope(tag);
+    stream.print("<ex:float>");
+    stream.print(Float.toString(f));
+    stream.print("</ex:float>");
+    printEndEnvelope(tag);
+  }
+    
+  public void writeDouble(double d, String tag) throws IOException {
+    printBeginEnvelope(tag);
+    stream.print("<double>");
+    stream.print(Double.toString(d));
+    stream.print("</double>");
+    printEndEnvelope(tag);
+  }
+    
+  public void writeString(String s, String tag) throws IOException {
+    printBeginEnvelope(tag);
+    stream.print("<string>");
+    stream.print(Utils.toXMLString(s));
+    stream.print("</string>");
+    printEndEnvelope(tag);
+  }
+    
+  public void writeBuffer(Buffer buf, String tag)
+    throws IOException {
+    printBeginEnvelope(tag);
+    stream.print("<string>");
+    stream.print(Utils.toXMLBuffer(buf));
+    stream.print("</string>");
+    printEndEnvelope(tag);
+  }
+    
+  public void startRecord(Record r, String tag) throws IOException {
+    insideRecord(tag);
+    stream.print("<struct>\n");
+    addIndent();
+  }
+    
+  public void endRecord(Record r, String tag) throws IOException {
+    closeIndent();
+    putIndent();
+    stream.print("</struct>");
+    outsideRecord(tag);
+  }
+    
+  public void startVector(ArrayList v, String tag) throws IOException {
+    insideVector(tag);
+    stream.print("<array>\n");
+    addIndent();
+  }
+    
+  public void endVector(ArrayList v, String tag) throws IOException {
+    closeIndent();
+    putIndent();
+    stream.print("</array>");
+    outsideVector(tag);
+  }
+    
+  public void startMap(TreeMap v, String tag) throws IOException {
+    insideMap(tag);
+    stream.print("<array>\n");
+    addIndent();
+  }
+    
+  public void endMap(TreeMap v, String tag) throws IOException {
+    closeIndent();
+    putIndent();
+    stream.print("</array>");
+    outsideMap(tag);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/CGenerator.java b/src/java/org/apache/hadoop/record/compiler/CGenerator.java
new file mode 100644
index 00000000000..b62b62924bf
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/CGenerator.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+import java.util.ArrayList;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * C Code generator front-end for Hadoop record I/O.
+ */
+class CGenerator extends CodeGenerator {
+  
+  CGenerator() {
+  }
+  
+  /**
+   * Generate C code. This method only creates the requested file(s)
+   * and spits-out file-level elements (such as include statements etc.)
+   * record-level code is generated by JRecord.
+   */
+  void genCode(String name, ArrayList<JFile> ilist,
+               ArrayList<JRecord> rlist, String destDir, ArrayList<String> options)
+    throws IOException {
+    name = new File(destDir, (new File(name)).getName()).getAbsolutePath();
+    FileWriter cc = new FileWriter(name+".c");
+    try {
+      FileWriter hh = new FileWriter(name+".h");
+      try {
+        hh.write("#ifndef __"+name.toUpperCase().replace('.','_')+"__\n");
+        hh.write("#define __"+name.toUpperCase().replace('.','_')+"__\n");
+        hh.write("#include \"recordio.h\"\n");
+        for (Iterator<JFile> iter = ilist.iterator(); iter.hasNext();) {
+          hh.write("#include \""+iter.next().getName()+".h\"\n");
+        }
+
+        cc.write("#include \""+name+".h\"\n");
+
+        /*
+        for (Iterator<JRecord> iter = rlist.iterator(); iter.hasNext();) {
+        iter.next().genCppCode(hh, cc);
+        }
+         */
+
+        hh.write("#endif //"+name.toUpperCase().replace('.','_')+"__\n");
+      } finally {
+        hh.close();
+      }
+    } finally {
+      cc.close();
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/CodeBuffer.java b/src/java/org/apache/hadoop/record/compiler/CodeBuffer.java
new file mode 100644
index 00000000000..5ba8c9fa62c
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/CodeBuffer.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.record.compiler;
+
+import java.util.ArrayList;
+
+/**
+ * A wrapper around StringBuffer that automatically does indentation
+ */
+public class CodeBuffer {
+  
+  static private ArrayList<Character> startMarkers = new ArrayList<Character>();
+  static private ArrayList<Character> endMarkers = new ArrayList<Character>();
+  
+  static {
+    addMarkers('{', '}');
+    addMarkers('(', ')');
+  }
+  
+  static void addMarkers(char ch1, char ch2) {
+    startMarkers.add(ch1);
+    endMarkers.add(ch2);
+  }
+  
+  private int level = 0;
+  private int numSpaces = 2;
+  private boolean firstChar = true;
+  private StringBuffer sb;
+  
+  /** Creates a new instance of CodeBuffer */
+  CodeBuffer() {
+    this(2, "");
+  }
+  
+  CodeBuffer(String s) {
+    this(2, s);
+  }
+  
+  CodeBuffer(int numSpaces, String s) {
+    sb = new StringBuffer();
+    this.numSpaces = numSpaces;
+    this.append(s);
+  }
+  
+  void append(String s) {
+    int length = s.length();
+    for (int idx = 0; idx < length; idx++) {
+      char ch = s.charAt(idx);
+      append(ch);
+    }
+  }
+  
+  void append(char ch) {
+    if (endMarkers.contains(ch)) {
+      level--;
+    }
+    if (firstChar) {
+      for (int idx = 0; idx < level; idx++) {
+        for (int num = 0; num < numSpaces; num++) {
+          rawAppend(' ');
+        }
+      }
+    }
+    rawAppend(ch);
+    firstChar = false;
+    if (startMarkers.contains(ch)) {
+      level++;
+    }
+    if (ch == '\n') {
+      firstChar = true;
+    }
+  }
+
+  private void rawAppend(char ch) {
+    sb.append(ch);
+  }
+  
+  public String toString() {
+    return sb.toString();
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/CodeGenerator.java b/src/java/org/apache/hadoop/record/compiler/CodeGenerator.java
new file mode 100644
index 00000000000..6224eaf3927
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/CodeGenerator.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+/**
+ * CodeGenerator is a Factory and a base class for Hadoop Record I/O translators.
+ * Different translators register creation methods with this factory.
+ */
+abstract class CodeGenerator {
+  
+  private static HashMap<String, CodeGenerator> generators =
+    new HashMap<String, CodeGenerator>();
+  
+  static {
+    register("c", new CGenerator());
+    register("c++", new CppGenerator());
+    register("java", new JavaGenerator());
+  }
+  
+  static void register(String lang, CodeGenerator gen) {
+    generators.put(lang, gen);
+  }
+  
+  static CodeGenerator get(String lang) {
+    return generators.get(lang);
+  }
+  
+  abstract void genCode(String file,
+                        ArrayList<JFile> inclFiles,
+                        ArrayList<JRecord> records,
+                        String destDir,
+                        ArrayList<String> options) throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/Consts.java b/src/java/org/apache/hadoop/record/compiler/Consts.java
new file mode 100644
index 00000000000..6bfd5360d5c
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/Consts.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.record.RecordInput;
+
+/**
+ * const definitions for Record I/O compiler
+ */
+public class Consts {
+  
+  /** Cannot create a new instance */
+  private Consts() {
+  }
+  
+  // prefix to use for variables in generated classes
+  public static final String RIO_PREFIX = "_rio_";
+  // other vars used in generated classes
+  public static final String RTI_VAR = RIO_PREFIX + "recTypeInfo";
+  public static final String RTI_FILTER = RIO_PREFIX + "rtiFilter";
+  public static final String RTI_FILTER_FIELDS = RIO_PREFIX + "rtiFilterFields";
+  public static final String RECORD_OUTPUT = RIO_PREFIX + "a";
+  public static final String RECORD_INPUT = RIO_PREFIX + "a";
+  public static final String TAG = RIO_PREFIX + "tag";
+  
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/CppGenerator.java b/src/java/org/apache/hadoop/record/compiler/CppGenerator.java
new file mode 100644
index 00000000000..e1fb599c049
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/CppGenerator.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+import java.util.ArrayList;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * C++ Code generator front-end for Hadoop record I/O.
+ */
+class CppGenerator extends CodeGenerator {
+  
+  CppGenerator() {
+  }
+  
+  /**
+   * Generate C++ code. This method only creates the requested file(s)
+   * and spits-out file-level elements (such as include statements etc.)
+   * record-level code is generated by JRecord.
+   */
+  void genCode(String name, ArrayList<JFile> ilist,
+               ArrayList<JRecord> rlist, String destDir, ArrayList<String> options)
+    throws IOException {
+    name = new File(destDir, (new File(name)).getName()).getAbsolutePath();
+
+    FileWriter cc = new FileWriter(name+".cc");
+    try {
+      FileWriter hh = new FileWriter(name+".hh");
+      
+      try {
+        String fileName = (new File(name)).getName();
+        hh.write("#ifndef __"+fileName.toUpperCase().replace('.','_')+"__\n");
+        hh.write("#define __"+fileName.toUpperCase().replace('.','_')+"__\n");
+        hh.write("#include \"recordio.hh\"\n");
+        hh.write("#include \"recordTypeInfo.hh\"\n");
+        for (Iterator<JFile> iter = ilist.iterator(); iter.hasNext();) {
+          hh.write("#include \""+iter.next().getName()+".hh\"\n");
+        }
+        
+        cc.write("#include \""+fileName+".hh\"\n");
+        cc.write("#include \"utils.hh\"\n");
+        
+        for (Iterator<JRecord> iter = rlist.iterator(); iter.hasNext();) {
+          iter.next().genCppCode(hh, cc, options);
+        }
+        
+        hh.write("#endif //"+fileName.toUpperCase().replace('.','_')+"__\n");
+      } finally {
+        hh.close();
+      }
+    } finally {
+      cc.close();
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JBoolean.java b/src/java/org/apache/hadoop/record/compiler/JBoolean.java
new file mode 100644
index 00000000000..28ddff09e33
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JBoolean.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+
+/**
+ */
+public class JBoolean extends JType {
+  
+  class JavaBoolean extends JType.JavaType {
+    
+    JavaBoolean() {
+      super("boolean", "Bool", "Boolean", "TypeID.RIOType.BOOL");
+    }
+    
+    void genCompareTo(CodeBuffer cb, String fname, String other) {
+      cb.append(Consts.RIO_PREFIX + "ret = ("+fname+" == "+other+")? 0 : ("+
+          fname+"?1:-1);\n");
+    }
+    
+    String getTypeIDObjectString() {
+      return "org.apache.hadoop.record.meta.TypeID.BoolTypeID";
+    }
+
+    void genHashCode(CodeBuffer cb, String fname) {
+      cb.append(Consts.RIO_PREFIX + "ret = ("+fname+")?0:1;\n");
+    }
+    
+    // In Binary format, boolean is written as byte. true = 1, false = 0
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      cb.append("if ("+l+"<1) {\n");
+      cb.append("throw new java.io.IOException(\"Boolean is exactly 1 byte."+
+                " Provided buffer is smaller.\");\n");
+      cb.append("}\n");
+      cb.append(s+"++; "+l+"--;\n");
+      cb.append("}\n");
+    }
+    
+    // In Binary format, boolean is written as byte. true = 1, false = 0
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      cb.append("if (l1<1 || l2<1) {\n");
+      cb.append("throw new java.io.IOException(\"Boolean is exactly 1 byte."+
+                " Provided buffer is smaller.\");\n");
+      cb.append("}\n");
+      cb.append("if (b1[s1] != b2[s2]) {\n");
+      cb.append("return (b1[s1]<b2[s2])? -1 : 0;\n");
+      cb.append("}\n");
+      cb.append("s1++; s2++; l1--; l2--;\n");
+      cb.append("}\n");
+    }
+  }
+  
+  class CppBoolean extends CppType {
+    
+    CppBoolean() {
+      super("bool");
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::TypeID(::hadoop::RIOTYPE_BOOL)";
+    }
+  }
+
+  /** Creates a new instance of JBoolean */
+  public JBoolean() {
+    setJavaType(new JavaBoolean());
+    setCppType(new CppBoolean());
+    setCType(new CType());
+  }
+  
+  String getSignature() {
+    return "z";
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JBuffer.java b/src/java/org/apache/hadoop/record/compiler/JBuffer.java
new file mode 100644
index 00000000000..9cafe334d0c
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JBuffer.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+
+/**
+ * Code generator for "buffer" type.
+ */
+public class JBuffer extends JCompType {
+  
+  class JavaBuffer extends JavaCompType {
+    
+    JavaBuffer() {
+      super("org.apache.hadoop.record.Buffer", "Buffer", 
+          "org.apache.hadoop.record.Buffer", "TypeID.RIOType.BUFFER");
+    }
+    
+    String getTypeIDObjectString() {
+      return "org.apache.hadoop.record.meta.TypeID.BufferTypeID";
+    }
+
+    void genCompareTo(CodeBuffer cb, String fname, String other) {
+      cb.append(Consts.RIO_PREFIX + "ret = "+fname+".compareTo("+other+");\n");
+    }
+    
+    void genEquals(CodeBuffer cb, String fname, String peer) {
+      cb.append(Consts.RIO_PREFIX + "ret = "+fname+".equals("+peer+");\n");
+    }
+    
+    void genHashCode(CodeBuffer cb, String fname) {
+      cb.append(Consts.RIO_PREFIX + "ret = "+fname+".hashCode();\n");
+    }
+    
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      cb.append("int i = org.apache.hadoop.record.Utils.readVInt("+
+                b+", "+s+");\n");
+      cb.append("int z = org.apache.hadoop.record.Utils.getVIntSize(i);\n");
+      cb.append(s+" += z+i; "+l+" -= (z+i);\n");
+      cb.append("}\n");
+    }
+    
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      cb.append("int i1 = org.apache.hadoop.record.Utils.readVInt(b1, s1);\n");
+      cb.append("int i2 = org.apache.hadoop.record.Utils.readVInt(b2, s2);\n");
+      cb.append("int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);\n");
+      cb.append("int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);\n");
+      cb.append("s1+=z1; s2+=z2; l1-=z1; l2-=z2;\n");
+      cb.append("int r1 = org.apache.hadoop.record.Utils.compareBytes(b1,s1,i1,b2,s2,i2);\n");
+      cb.append("if (r1 != 0) { return (r1<0)?-1:0; }\n");
+      cb.append("s1+=i1; s2+=i2; l1-=i1; l1-=i2;\n");
+      cb.append("}\n");
+    }
+  }
+  
+  class CppBuffer extends CppCompType {
+    
+    CppBuffer() {
+      super(" ::std::string");
+    }
+    
+    void genGetSet(CodeBuffer cb, String fname) {
+      cb.append("virtual const "+getType()+"& get"+toCamelCase(fname)+"() const {\n");
+      cb.append("return "+fname+";\n");
+      cb.append("}\n");
+      cb.append("virtual "+getType()+"& get"+toCamelCase(fname)+"() {\n");
+      cb.append("return "+fname+";\n");
+      cb.append("}\n");
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::TypeID(::hadoop::RIOTYPE_BUFFER)";
+    }
+
+  }
+  /** Creates a new instance of JBuffer */
+  public JBuffer() {
+    setJavaType(new JavaBuffer());
+    setCppType(new CppBuffer());
+    setCType(new CCompType());
+  }
+  
+  String getSignature() {
+    return "B";
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JByte.java b/src/java/org/apache/hadoop/record/compiler/JByte.java
new file mode 100644
index 00000000000..ab75db5383d
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JByte.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+/**
+ * Code generator for "byte" type.
+ */
+public class JByte extends JType {
+  
+  class JavaByte extends JavaType {
+    
+    JavaByte() {
+      super("byte", "Byte", "Byte", "TypeID.RIOType.BYTE");
+    }
+    
+    String getTypeIDObjectString() {
+      return "org.apache.hadoop.record.meta.TypeID.ByteTypeID";
+    }
+
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      cb.append("if ("+l+"<1) {\n");
+      cb.append("throw new java.io.IOException(\"Byte is exactly 1 byte."+
+                " Provided buffer is smaller.\");\n");
+      cb.append("}\n");
+      cb.append(s+"++; "+l+"--;\n");
+      cb.append("}\n");
+    }
+    
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      cb.append("if (l1<1 || l2<1) {\n");
+      cb.append("throw new java.io.IOException(\"Byte is exactly 1 byte."+
+                " Provided buffer is smaller.\");\n");
+      cb.append("}\n");
+      cb.append("if (b1[s1] != b2[s2]) {\n");
+      cb.append("return (b1[s1]<b2[s2])?-1:0;\n");
+      cb.append("}\n");
+      cb.append("s1++; s2++; l1--; l2--;\n");
+      cb.append("}\n");
+    }
+  }
+  
+  class CppByte extends CppType {
+    
+    CppByte() {
+      super("int8_t");
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::TypeID(::hadoop::RIOTYPE_BYTE)";
+    }
+  }
+
+  public JByte() {
+    setJavaType(new JavaByte());
+    setCppType(new CppByte());
+    setCType(new CType());
+  }
+  
+  String getSignature() {
+    return "b";
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JCompType.java b/src/java/org/apache/hadoop/record/compiler/JCompType.java
new file mode 100644
index 00000000000..119a575efb6
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JCompType.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+
+/**
+ * Abstract base class for all the "compound" types such as ustring,
+ * buffer, vector, map, and record.
+ */
+abstract class JCompType extends JType {
+  
+  abstract class JavaCompType extends JavaType {
+    
+    JavaCompType(String type, String suffix, String wrapper, 
+        String typeIDByteString) { 
+      super(type, suffix, wrapper, typeIDByteString);
+    }
+    
+    void genCompareTo(CodeBuffer cb, String fname, String other) {
+      cb.append(Consts.RIO_PREFIX + "ret = "+fname+".compareTo("+other+");\n");
+    }
+    
+    void genEquals(CodeBuffer cb, String fname, String peer) {
+      cb.append(Consts.RIO_PREFIX + "ret = "+fname+".equals("+peer+");\n");
+    }
+    
+    void genHashCode(CodeBuffer cb, String fname) {
+      cb.append(Consts.RIO_PREFIX + "ret = "+fname+".hashCode();\n");
+    }
+    
+    void genClone(CodeBuffer cb, String fname) {
+      cb.append(Consts.RIO_PREFIX + "other."+fname+" = ("+getType()+") this."+
+          fname+".clone();\n");
+    }
+  }
+  
+  abstract class CppCompType extends CppType {
+    
+    CppCompType(String type) {
+      super(type);
+    }
+    
+    void genGetSet(CodeBuffer cb, String fname) {
+      cb.append("virtual const "+getType()+"& get"+toCamelCase(fname)+"() const {\n");
+      cb.append("return "+fname+";\n");
+      cb.append("}\n");
+      cb.append("virtual "+getType()+"& get"+toCamelCase(fname)+"() {\n");
+      cb.append("return "+fname+";\n");
+      cb.append("}\n");
+    }
+  }
+  
+  class CCompType extends CType {
+    
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JDouble.java b/src/java/org/apache/hadoop/record/compiler/JDouble.java
new file mode 100644
index 00000000000..d2288416362
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JDouble.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+
+/**
+ */
+public class JDouble extends JType {
+  
+  class JavaDouble extends JavaType {
+    
+    JavaDouble() {
+      super("double", "Double", "Double", "TypeID.RIOType.DOUBLE");
+    }
+    
+    String getTypeIDObjectString() {
+      return "org.apache.hadoop.record.meta.TypeID.DoubleTypeID";
+    }
+
+    void genHashCode(CodeBuffer cb, String fname) {
+      String tmp = "Double.doubleToLongBits("+fname+")";
+      cb.append(Consts.RIO_PREFIX + "ret = (int)("+tmp+"^("+tmp+">>>32));\n");
+    }
+    
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      cb.append("if ("+l+"<8) {\n");
+      cb.append("throw new java.io.IOException(\"Double is exactly 8 bytes."+
+                " Provided buffer is smaller.\");\n");
+      cb.append("}\n");
+      cb.append(s+"+=8; "+l+"-=8;\n");
+      cb.append("}\n");
+    }
+    
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      cb.append("if (l1<8 || l2<8) {\n");
+      cb.append("throw new java.io.IOException(\"Double is exactly 8 bytes."+
+                " Provided buffer is smaller.\");\n");
+      cb.append("}\n");
+      cb.append("double d1 = org.apache.hadoop.record.Utils.readDouble(b1, s1);\n");
+      cb.append("double d2 = org.apache.hadoop.record.Utils.readDouble(b2, s2);\n");
+      cb.append("if (d1 != d2) {\n");
+      cb.append("return ((d1-d2) < 0) ? -1 : 0;\n");
+      cb.append("}\n");
+      cb.append("s1+=8; s2+=8; l1-=8; l2-=8;\n");
+      cb.append("}\n");
+    }
+  }
+
+  class CppDouble extends CppType {
+    
+    CppDouble() {
+      super("double");
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::TypeID(::hadoop::RIOTYPE_DOUBLE)";
+    }
+  }
+
+  
+  /** Creates a new instance of JDouble */
+  public JDouble() {
+    setJavaType(new JavaDouble());
+    setCppType(new CppDouble());
+    setCType(new CType());
+  }
+  
+  String getSignature() {
+    return "d";
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JField.java b/src/java/org/apache/hadoop/record/compiler/JField.java
new file mode 100644
index 00000000000..f6ff6f0832b
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JField.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+/**
+ * A thin wrappper around record field.
+ */
+public class JField<T> {
+  
+  private String name;
+  private T type;
+  
+  /**
+   * Creates a new instance of JField
+   */
+  public JField(String name, T type) {
+    this.type = type;
+    this.name = name;
+  }
+  
+  String getName() {
+    return name;
+  }
+  
+  T getType() {
+    return type;
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JFile.java b/src/java/org/apache/hadoop/record/compiler/JFile.java
new file mode 100644
index 00000000000..5bff60e1b9a
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JFile.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+/**
+ * Container for the Hadoop Record DDL.
+ * The main components of the file are filename, list of included files,
+ * and records defined in that file.
+ */
+public class JFile {
+  /** Possibly full name of the file */
+  private String mName;
+  /** Ordered list of included files */
+  private ArrayList<JFile> mInclFiles;
+  /** Ordered list of records declared in this file */
+  private ArrayList<JRecord> mRecords;
+    
+  /** Creates a new instance of JFile
+   *
+   * @param name possibly full pathname to the file
+   * @param inclFiles included files (as JFile)
+   * @param recList List of records defined within this file
+   */
+  public JFile(String name, ArrayList<JFile> inclFiles,
+               ArrayList<JRecord> recList) {
+    mName = name;
+    mInclFiles = inclFiles;
+    mRecords = recList;
+  }
+    
+  /** Strip the other pathname components and return the basename */
+  String getName() {
+    int idx = mName.lastIndexOf('/');
+    return (idx > 0) ? mName.substring(idx) : mName; 
+  }
+    
+  /** Generate record code in given language. Language should be all
+   *  lowercase.
+   */
+  public int genCode(String language, String destDir, ArrayList<String> options)
+    throws IOException {
+    CodeGenerator gen = CodeGenerator.get(language);
+    if (gen != null) {
+      gen.genCode(mName, mInclFiles, mRecords, destDir, options);
+    } else {
+      System.err.println("Cannnot recognize language:"+language);
+      return 1;
+    }
+    return 0;
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JFloat.java b/src/java/org/apache/hadoop/record/compiler/JFloat.java
new file mode 100644
index 00000000000..08d772dd41d
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JFloat.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+/**
+ */
+public class JFloat extends JType {
+  
+  class JavaFloat extends JavaType {
+    
+    JavaFloat() {
+      super("float", "Float", "Float", "TypeID.RIOType.FLOAT");
+    }
+    
+    String getTypeIDObjectString() {
+      return "org.apache.hadoop.record.meta.TypeID.FloatTypeID";
+    }
+
+    void genHashCode(CodeBuffer cb, String fname) {
+      cb.append(Consts.RIO_PREFIX + "ret = Float.floatToIntBits("+fname+");\n");
+    }
+    
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      cb.append("if ("+l+"<4) {\n");
+      cb.append("throw new java.io.IOException(\"Float is exactly 4 bytes."+
+                " Provided buffer is smaller.\");\n");
+      cb.append("}\n");
+      cb.append(s+"+=4; "+l+"-=4;\n");
+      cb.append("}\n");
+    }
+    
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      cb.append("if (l1<4 || l2<4) {\n");
+      cb.append("throw new java.io.IOException(\"Float is exactly 4 bytes."+
+                " Provided buffer is smaller.\");\n");
+      cb.append("}\n");
+      cb.append("float f1 = org.apache.hadoop.record.Utils.readFloat(b1, s1);\n");
+      cb.append("float f2 = org.apache.hadoop.record.Utils.readFloat(b2, s2);\n");
+      cb.append("if (f1 != f2) {\n");
+      cb.append("return ((f1-f2) < 0) ? -1 : 0;\n");
+      cb.append("}\n");
+      cb.append("s1+=4; s2+=4; l1-=4; l2-=4;\n");
+      cb.append("}\n");
+    }
+  }
+
+  class CppFloat extends CppType {
+    
+    CppFloat() {
+      super("float");
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::TypeID(::hadoop::RIOTYPE_FLOAT)";
+    }
+  }
+
+  /** Creates a new instance of JFloat */
+  public JFloat() {
+    setJavaType(new JavaFloat());
+    setCppType(new CppFloat());
+    setCType(new CType());
+  }
+  
+  String getSignature() {
+    return "f";
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JInt.java b/src/java/org/apache/hadoop/record/compiler/JInt.java
new file mode 100644
index 00000000000..ecf735b0e41
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JInt.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+
+/**
+ * Code generator for "int" type
+ */
+public class JInt extends JType {
+  
+  class JavaInt extends JavaType {
+    
+    JavaInt() {
+      super("int", "Int", "Integer", "TypeID.RIOType.INT");
+    }
+    
+    String getTypeIDObjectString() {
+      return "org.apache.hadoop.record.meta.TypeID.IntTypeID";
+    }
+
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      cb.append("int i = org.apache.hadoop.record.Utils.readVInt("+b+", "+s+");\n");
+      cb.append("int z = org.apache.hadoop.record.Utils.getVIntSize(i);\n");
+      cb.append(s+"+=z; "+l+"-=z;\n");
+      cb.append("}\n");
+    }
+    
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      cb.append("int i1 = org.apache.hadoop.record.Utils.readVInt(b1, s1);\n");
+      cb.append("int i2 = org.apache.hadoop.record.Utils.readVInt(b2, s2);\n");
+      cb.append("if (i1 != i2) {\n");
+      cb.append("return ((i1-i2) < 0) ? -1 : 0;\n");
+      cb.append("}\n");
+      cb.append("int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);\n");
+      cb.append("int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);\n");
+      cb.append("s1+=z1; s2+=z2; l1-=z1; l2-=z2;\n");
+      cb.append("}\n");
+    }
+  }
+
+  class CppInt extends CppType {
+    
+    CppInt() {
+      super("int32_t");
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::TypeID(::hadoop::RIOTYPE_INT)";
+    }
+  }
+
+  /** Creates a new instance of JInt */
+  public JInt() {
+    setJavaType(new JavaInt());
+    setCppType(new CppInt());
+    setCType(new CType());
+  }
+  
+  String getSignature() {
+    return "i";
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JLong.java b/src/java/org/apache/hadoop/record/compiler/JLong.java
new file mode 100644
index 00000000000..38df1e87b3c
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JLong.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+/**
+ * Code generator for "long" type
+ */
+public class JLong extends JType {
+  
+  class JavaLong extends JavaType {
+    
+    JavaLong() {
+      super("long", "Long", "Long", "TypeID.RIOType.LONG");
+    }
+    
+    String getTypeIDObjectString() {
+      return "org.apache.hadoop.record.meta.TypeID.LongTypeID";
+    }
+
+    void genHashCode(CodeBuffer cb, String fname) {
+      cb.append(Consts.RIO_PREFIX + "ret = (int) ("+fname+"^("+
+          fname+">>>32));\n");
+    }
+    
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      cb.append("long i = org.apache.hadoop.record.Utils.readVLong("+b+", "+s+");\n");
+      cb.append("int z = org.apache.hadoop.record.Utils.getVIntSize(i);\n");
+      cb.append(s+"+=z; "+l+"-=z;\n");
+      cb.append("}\n");
+    }
+    
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      cb.append("long i1 = org.apache.hadoop.record.Utils.readVLong(b1, s1);\n");
+      cb.append("long i2 = org.apache.hadoop.record.Utils.readVLong(b2, s2);\n");
+      cb.append("if (i1 != i2) {\n");
+      cb.append("return ((i1-i2) < 0) ? -1 : 0;\n");
+      cb.append("}\n");
+      cb.append("int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);\n");
+      cb.append("int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);\n");
+      cb.append("s1+=z1; s2+=z2; l1-=z1; l2-=z2;\n");
+      cb.append("}\n");
+    }
+  }
+
+  class CppLong extends CppType {
+    
+    CppLong() {
+      super("int64_t");
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::TypeID(::hadoop::RIOTYPE_LONG)";
+    }
+  }
+
+  /** Creates a new instance of JLong */
+  public JLong() {
+    setJavaType(new JavaLong());
+    setCppType(new CppLong());
+    setCType(new CType());
+  }
+  
+  String getSignature() {
+    return "l";
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JMap.java b/src/java/org/apache/hadoop/record/compiler/JMap.java
new file mode 100644
index 00000000000..fb42dc496f5
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JMap.java
@@ -0,0 +1,229 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+import java.util.Map;
+
+
+/**
+ */
+public class JMap extends JCompType {
+  
+  static private int level = 0;
+  
+  static private String getLevel() { return Integer.toString(level); }
+  
+  static private void incrLevel() { level++; }
+  
+  static private void decrLevel() { level--; }
+  
+  static private String getId(String id) { return id+getLevel(); }
+  
+  private JType keyType;
+  private JType valueType;
+  
+  class JavaMap extends JavaCompType {
+    
+    JType.JavaType key;
+    JType.JavaType value;
+    
+    JavaMap(JType.JavaType key, JType.JavaType value) {
+      super("java.util.TreeMap<"+key.getWrapperType()+","+value.getWrapperType()+">",
+            "Map",
+            "java.util.TreeMap<"+key.getWrapperType()+","+value.getWrapperType()+">",
+            "TypeID.RIOType.MAP");
+      this.key = key;
+      this.value = value;
+    }
+    
+    String getTypeIDObjectString() {
+      return "new org.apache.hadoop.record.meta.MapTypeID(" + 
+        key.getTypeIDObjectString() + ", " + 
+        value.getTypeIDObjectString() + ")";
+    }
+
+    void genSetRTIFilter(CodeBuffer cb, Map<String, Integer> nestedStructMap) {
+      key.genSetRTIFilter(cb, nestedStructMap);
+      value.genSetRTIFilter(cb, nestedStructMap);
+    }
+
+    void genCompareTo(CodeBuffer cb, String fname, String other) {
+      String setType = "java.util.Set<"+key.getWrapperType()+"> ";
+      String iterType = "java.util.Iterator<"+key.getWrapperType()+"> ";
+      cb.append("{\n");
+      cb.append(setType+getId(Consts.RIO_PREFIX + "set1")+" = "+
+          fname+".keySet();\n");
+      cb.append(setType+getId(Consts.RIO_PREFIX + "set2")+" = "+
+          other+".keySet();\n");
+      cb.append(iterType+getId(Consts.RIO_PREFIX + "miter1")+" = "+
+                getId(Consts.RIO_PREFIX + "set1")+".iterator();\n");
+      cb.append(iterType+getId(Consts.RIO_PREFIX + "miter2")+" = "+
+                getId(Consts.RIO_PREFIX + "set2")+".iterator();\n");
+      cb.append("for(; "+getId(Consts.RIO_PREFIX + "miter1")+".hasNext() && "+
+                getId(Consts.RIO_PREFIX + "miter2")+".hasNext();) {\n");
+      cb.append(key.getType()+" "+getId(Consts.RIO_PREFIX + "k1")+
+                " = "+getId(Consts.RIO_PREFIX + "miter1")+".next();\n");
+      cb.append(key.getType()+" "+getId(Consts.RIO_PREFIX + "k2")+
+                " = "+getId(Consts.RIO_PREFIX + "miter2")+".next();\n");
+      key.genCompareTo(cb, getId(Consts.RIO_PREFIX + "k1"), 
+          getId(Consts.RIO_PREFIX + "k2"));
+      cb.append("if (" + Consts.RIO_PREFIX + "ret != 0) { return " + 
+          Consts.RIO_PREFIX + "ret; }\n");
+      cb.append("}\n");
+      cb.append(Consts.RIO_PREFIX + "ret = ("+getId(Consts.RIO_PREFIX + "set1")+
+          ".size() - "+getId(Consts.RIO_PREFIX + "set2")+".size());\n");
+      cb.append("}\n");
+    }
+    
+    void genReadMethod(CodeBuffer cb, String fname, String tag, boolean decl) {
+      if (decl) {
+        cb.append(getType()+" "+fname+";\n");
+      }
+      cb.append("{\n");
+      incrLevel();
+      cb.append("org.apache.hadoop.record.Index " + 
+          getId(Consts.RIO_PREFIX + "midx")+" = " + 
+          Consts.RECORD_INPUT + ".startMap(\""+tag+"\");\n");
+      cb.append(fname+"=new "+getType()+"();\n");
+      cb.append("for (; !"+getId(Consts.RIO_PREFIX + "midx")+".done(); "+
+          getId(Consts.RIO_PREFIX + "midx")+".incr()) {\n");
+      key.genReadMethod(cb, getId(Consts.RIO_PREFIX + "k"),
+          getId(Consts.RIO_PREFIX + "k"), true);
+      value.genReadMethod(cb, getId(Consts.RIO_PREFIX + "v"), 
+          getId(Consts.RIO_PREFIX + "v"), true);
+      cb.append(fname+".put("+getId(Consts.RIO_PREFIX + "k")+","+
+          getId(Consts.RIO_PREFIX + "v")+");\n");
+      cb.append("}\n");
+      cb.append(Consts.RECORD_INPUT + ".endMap(\""+tag+"\");\n");
+      decrLevel();
+      cb.append("}\n");
+    }
+    
+    void genWriteMethod(CodeBuffer cb, String fname, String tag) {
+      String setType = "java.util.Set<java.util.Map.Entry<"+
+        key.getWrapperType()+","+value.getWrapperType()+">> ";
+      String entryType = "java.util.Map.Entry<"+
+        key.getWrapperType()+","+value.getWrapperType()+"> ";
+      String iterType = "java.util.Iterator<java.util.Map.Entry<"+
+        key.getWrapperType()+","+value.getWrapperType()+">> ";
+      cb.append("{\n");
+      incrLevel();
+      cb.append(Consts.RECORD_OUTPUT + ".startMap("+fname+",\""+tag+"\");\n");
+      cb.append(setType+getId(Consts.RIO_PREFIX + "es")+" = "+
+          fname+".entrySet();\n");
+      cb.append("for("+iterType+getId(Consts.RIO_PREFIX + "midx")+" = "+
+          getId(Consts.RIO_PREFIX + "es")+".iterator(); "+
+          getId(Consts.RIO_PREFIX + "midx")+".hasNext();) {\n");
+      cb.append(entryType+getId(Consts.RIO_PREFIX + "me")+" = "+
+          getId(Consts.RIO_PREFIX + "midx")+".next();\n");
+      cb.append(key.getType()+" "+getId(Consts.RIO_PREFIX + "k")+" = "+
+          getId(Consts.RIO_PREFIX + "me")+".getKey();\n");
+      cb.append(value.getType()+" "+getId(Consts.RIO_PREFIX + "v")+" = "+
+          getId(Consts.RIO_PREFIX + "me")+".getValue();\n");
+      key.genWriteMethod(cb, getId(Consts.RIO_PREFIX + "k"), 
+          getId(Consts.RIO_PREFIX + "k"));
+      value.genWriteMethod(cb, getId(Consts.RIO_PREFIX + "v"), 
+          getId(Consts.RIO_PREFIX + "v"));
+      cb.append("}\n");
+      cb.append(Consts.RECORD_OUTPUT + ".endMap("+fname+",\""+tag+"\");\n");
+      cb.append("}\n");
+      decrLevel();
+    }
+    
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      incrLevel();
+      cb.append("int "+getId("mi")+
+                " = org.apache.hadoop.record.Utils.readVInt("+b+", "+s+");\n");
+      cb.append("int "+getId("mz")+
+                " = org.apache.hadoop.record.Utils.getVIntSize("+getId("mi")+");\n");
+      cb.append(s+"+="+getId("mz")+"; "+l+"-="+getId("mz")+";\n");
+      cb.append("for (int "+getId("midx")+" = 0; "+getId("midx")+
+                " < "+getId("mi")+"; "+getId("midx")+"++) {");
+      key.genSlurpBytes(cb, b, s, l);
+      value.genSlurpBytes(cb, b, s, l);
+      cb.append("}\n");
+      decrLevel();
+      cb.append("}\n");
+    }
+    
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      incrLevel();
+      cb.append("int "+getId("mi1")+
+                " = org.apache.hadoop.record.Utils.readVInt(b1, s1);\n");
+      cb.append("int "+getId("mi2")+
+                " = org.apache.hadoop.record.Utils.readVInt(b2, s2);\n");
+      cb.append("int "+getId("mz1")+
+                " = org.apache.hadoop.record.Utils.getVIntSize("+getId("mi1")+");\n");
+      cb.append("int "+getId("mz2")+
+                " = org.apache.hadoop.record.Utils.getVIntSize("+getId("mi2")+");\n");
+      cb.append("s1+="+getId("mz1")+"; s2+="+getId("mz2")+
+                "; l1-="+getId("mz1")+"; l2-="+getId("mz2")+";\n");
+      cb.append("for (int "+getId("midx")+" = 0; "+getId("midx")+
+                " < "+getId("mi1")+" && "+getId("midx")+" < "+getId("mi2")+
+                "; "+getId("midx")+"++) {");
+      key.genCompareBytes(cb);
+      value.genSlurpBytes(cb, "b1", "s1", "l1");
+      value.genSlurpBytes(cb, "b2", "s2", "l2");
+      cb.append("}\n");
+      cb.append("if ("+getId("mi1")+" != "+getId("mi2")+
+                ") { return ("+getId("mi1")+"<"+getId("mi2")+")?-1:0; }\n");
+      decrLevel();
+      cb.append("}\n");
+    }
+  }
+  
+  class CppMap extends CppCompType {
+    
+    JType.CppType key;
+    JType.CppType value;
+    
+    CppMap(JType.CppType key, JType.CppType value) {
+      super("::std::map< "+key.getType()+", "+ value.getType()+" >");
+      this.key = key;
+      this.value = value;
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::MapTypeID(" + 
+        key.getTypeIDObjectString() + ", " + 
+        value.getTypeIDObjectString() + ")";
+    }
+
+    void genSetRTIFilter(CodeBuffer cb) {
+      key.genSetRTIFilter(cb);
+      value.genSetRTIFilter(cb);
+    }
+
+  }
+  
+  /** Creates a new instance of JMap */
+  public JMap(JType t1, JType t2) {
+    setJavaType(new JavaMap(t1.getJavaType(), t2.getJavaType()));
+    setCppType(new CppMap(t1.getCppType(), t2.getCppType()));
+    setCType(new CType());
+    keyType = t1;
+    valueType = t2;
+  }
+  
+  String getSignature() {
+    return "{" + keyType.getSignature() + valueType.getSignature() +"}";
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JRecord.java b/src/java/org/apache/hadoop/record/compiler/JRecord.java
new file mode 100644
index 00000000000..96955f365f0
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JRecord.java
@@ -0,0 +1,806 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.*;
+
+/**
+ */
+public class JRecord extends JCompType {
+  
+  class JavaRecord extends JavaCompType {
+    
+    private String fullName;
+    private String name;
+    private String module;
+    private ArrayList<JField<JavaType>> fields =
+      new ArrayList<JField<JavaType>>();
+    
+    JavaRecord(String name, ArrayList<JField<JType>> flist) {
+      super(name, "Record", name, "TypeID.RIOType.STRUCT");
+      this.fullName = name;
+      int idx = name.lastIndexOf('.');
+      this.name = name.substring(idx+1);
+      this.module = name.substring(0, idx);
+      for (Iterator<JField<JType>> iter = flist.iterator(); iter.hasNext();) {
+        JField<JType> f = iter.next();
+        fields.add(new JField<JavaType>(f.getName(), f.getType().getJavaType()));
+      }
+    }
+    
+    String getTypeIDObjectString() {
+      return "new org.apache.hadoop.record.meta.StructTypeID(" + 
+      fullName + ".getTypeInfo())";
+    }
+
+    void genSetRTIFilter(CodeBuffer cb, Map<String, Integer> nestedStructMap) {
+      // ignore, if we'ev already set the type filter for this record
+      if (!nestedStructMap.containsKey(fullName)) {
+        // we set the RTI filter here
+        cb.append(fullName + ".setTypeFilter(rti.getNestedStructTypeInfo(\""+
+            name + "\"));\n");
+        nestedStructMap.put(fullName, null);
+      }
+    }
+
+    // for each typeInfo in the filter, we see if there's a similar one in the record. 
+    // Since we store typeInfos in ArrayLists, thsi search is O(n squared). We do it faster
+    // if we also store a map (of TypeInfo to index), but since setupRtiFields() is called
+    // only once when deserializing, we're sticking with the former, as the code is easier.  
+    void genSetupRtiFields(CodeBuffer cb) {
+      cb.append("private static void setupRtiFields()\n{\n");
+      cb.append("if (null == " + Consts.RTI_FILTER + ") return;\n");
+      cb.append("// we may already have done this\n");
+      cb.append("if (null != " + Consts.RTI_FILTER_FIELDS + ") return;\n");
+      cb.append("int " + Consts.RIO_PREFIX + "i, " + Consts.RIO_PREFIX + "j;\n");
+      cb.append(Consts.RTI_FILTER_FIELDS + " = new int [" + 
+          Consts.RIO_PREFIX + "rtiFilter.getFieldTypeInfos().size()];\n");
+      cb.append("for (" + Consts.RIO_PREFIX + "i=0; " + Consts.RIO_PREFIX + "i<"+
+          Consts.RTI_FILTER_FIELDS + ".length; " + Consts.RIO_PREFIX + "i++) {\n");
+      cb.append(Consts.RTI_FILTER_FIELDS + "[" + Consts.RIO_PREFIX + "i] = 0;\n");
+      cb.append("}\n");
+      cb.append("java.util.Iterator<org.apache.hadoop.record.meta." +
+          "FieldTypeInfo> " + Consts.RIO_PREFIX + "itFilter = " + 
+          Consts.RIO_PREFIX + "rtiFilter.getFieldTypeInfos().iterator();\n");
+      cb.append(Consts.RIO_PREFIX + "i=0;\n");
+      cb.append("while (" + Consts.RIO_PREFIX + "itFilter.hasNext()) {\n");
+      cb.append("org.apache.hadoop.record.meta.FieldTypeInfo " + 
+          Consts.RIO_PREFIX + "tInfoFilter = " + 
+          Consts.RIO_PREFIX + "itFilter.next();\n");
+      cb.append("java.util.Iterator<org.apache.hadoop.record.meta." + 
+          "FieldTypeInfo> " + Consts.RIO_PREFIX + "it = " + Consts.RTI_VAR + 
+          ".getFieldTypeInfos().iterator();\n");
+      cb.append(Consts.RIO_PREFIX + "j=1;\n");
+      cb.append("while (" + Consts.RIO_PREFIX + "it.hasNext()) {\n");
+      cb.append("org.apache.hadoop.record.meta.FieldTypeInfo " + 
+          Consts.RIO_PREFIX + "tInfo = " + Consts.RIO_PREFIX + "it.next();\n");
+      cb.append("if (" + Consts.RIO_PREFIX + "tInfo.equals(" +  
+          Consts.RIO_PREFIX + "tInfoFilter)) {\n");
+      cb.append(Consts.RTI_FILTER_FIELDS + "[" + Consts.RIO_PREFIX + "i] = " +
+          Consts.RIO_PREFIX + "j;\n");
+      cb.append("break;\n");
+      cb.append("}\n");
+      cb.append(Consts.RIO_PREFIX + "j++;\n");
+      cb.append("}\n");
+      /*int ct = 0;
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        ct++;
+        JField<JavaType> jf = i.next();
+        JavaType type = jf.getType();
+        String name = jf.getName();
+        if (ct != 1) {
+          cb.append("else ");
+        }
+        type.genRtiFieldCondition(cb, name, ct);
+      }
+      if (ct != 0) {
+        cb.append("else {\n");
+        cb.append("rtiFilterFields[i] = 0;\n");
+        cb.append("}\n");
+      }*/
+      cb.append(Consts.RIO_PREFIX + "i++;\n");
+      cb.append("}\n");
+      cb.append("}\n");
+    }
+
+    void genReadMethod(CodeBuffer cb, String fname, String tag, boolean decl) {
+      if (decl) {
+        cb.append(fullName+" "+fname+";\n");
+      }
+      cb.append(fname+"= new "+fullName+"();\n");
+      cb.append(fname+".deserialize(" + Consts.RECORD_INPUT + ",\""+tag+"\");\n");
+    }
+    
+    void genWriteMethod(CodeBuffer cb, String fname, String tag) {
+      cb.append(fname+".serialize(" + Consts.RECORD_OUTPUT + ",\""+tag+"\");\n");
+    }
+    
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      cb.append("int r = "+fullName+
+                ".Comparator.slurpRaw("+b+","+s+","+l+");\n");
+      cb.append(s+"+=r; "+l+"-=r;\n");
+      cb.append("}\n");
+    }
+    
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      cb.append("int r1 = "+fullName+
+                ".Comparator.compareRaw(b1,s1,l1,b2,s2,l2);\n");
+      cb.append("if (r1 <= 0) { return r1; }\n");
+      cb.append("s1+=r1; s2+=r1; l1-=r1; l2-=r1;\n");
+      cb.append("}\n");
+    }
+    
+    void genCode(String destDir, ArrayList<String> options) throws IOException {
+      String pkg = module;
+      String pkgpath = pkg.replaceAll("\\.", "/");
+      File pkgdir = new File(destDir, pkgpath);
+
+      final File jfile = new File(pkgdir, name+".java");
+      if (!pkgdir.exists()) {
+        // create the pkg directory
+        boolean ret = pkgdir.mkdirs();
+        if (!ret) {
+          throw new IOException("Cannnot create directory: "+pkgpath);
+        }
+      } else if (!pkgdir.isDirectory()) {
+        // not a directory
+        throw new IOException(pkgpath+" is not a directory.");
+      }
+
+      CodeBuffer cb = new CodeBuffer();
+      cb.append("// File generated by hadoop record compiler. Do not edit.\n");
+      cb.append("package "+module+";\n\n");
+      cb.append("public class "+name+
+                " extends org.apache.hadoop.record.Record {\n");
+      
+      // type information declarations
+      cb.append("private static final " + 
+          "org.apache.hadoop.record.meta.RecordTypeInfo " + 
+          Consts.RTI_VAR + ";\n");
+      cb.append("private static " + 
+          "org.apache.hadoop.record.meta.RecordTypeInfo " + 
+          Consts.RTI_FILTER + ";\n");
+      cb.append("private static int[] " + Consts.RTI_FILTER_FIELDS + ";\n");
+      
+      // static init for type information
+      cb.append("static {\n");
+      cb.append(Consts.RTI_VAR + " = " +
+          "new org.apache.hadoop.record.meta.RecordTypeInfo(\"" +
+          name + "\");\n");
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genStaticTypeInfo(cb, name);
+      }
+      cb.append("}\n\n");
+
+      // field definitions
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genDecl(cb, name);
+      }
+
+      // default constructor
+      cb.append("public "+name+"() { }\n");
+      
+      // constructor
+      cb.append("public "+name+"(\n");
+      int fIdx = 0;
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext(); fIdx++) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genConstructorParam(cb, name);
+        cb.append((!i.hasNext())?"":",\n");
+      }
+      cb.append(") {\n");
+      fIdx = 0;
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext(); fIdx++) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genConstructorSet(cb, name);
+      }
+      cb.append("}\n");
+
+      // getter/setter for type info
+      cb.append("public static org.apache.hadoop.record.meta.RecordTypeInfo"
+              + " getTypeInfo() {\n");
+      cb.append("return " + Consts.RTI_VAR + ";\n");
+      cb.append("}\n");
+      cb.append("public static void setTypeFilter("
+          + "org.apache.hadoop.record.meta.RecordTypeInfo rti) {\n");
+      cb.append("if (null == rti) return;\n");
+      cb.append(Consts.RTI_FILTER + " = rti;\n");
+      cb.append(Consts.RTI_FILTER_FIELDS + " = null;\n");
+      // set RTIFilter for nested structs.
+      // To prevent setting up the type filter for the same struct more than once, 
+      // we use a hash map to keep track of what we've set. 
+      Map<String, Integer> nestedStructMap = new HashMap<String, Integer>();
+      for (JField<JavaType> jf : fields) {
+        JavaType type = jf.getType();
+        type.genSetRTIFilter(cb, nestedStructMap);
+      }
+      cb.append("}\n");
+
+      // setupRtiFields()
+      genSetupRtiFields(cb);
+
+      // getters/setters for member variables
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genGetSet(cb, name);
+      }
+      
+      // serialize()
+      cb.append("public void serialize("+ 
+          "final org.apache.hadoop.record.RecordOutput " + 
+          Consts.RECORD_OUTPUT + ", final String " + Consts.TAG + ")\n"+
+                "throws java.io.IOException {\n");
+      cb.append(Consts.RECORD_OUTPUT + ".startRecord(this," + Consts.TAG + ");\n");
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genWriteMethod(cb, name, name);
+      }
+      cb.append(Consts.RECORD_OUTPUT + ".endRecord(this," + Consts.TAG+");\n");
+      cb.append("}\n");
+
+      // deserializeWithoutFilter()
+      cb.append("private void deserializeWithoutFilter("+
+                "final org.apache.hadoop.record.RecordInput " + 
+                Consts.RECORD_INPUT + ", final String " + Consts.TAG + ")\n"+
+                "throws java.io.IOException {\n");
+      cb.append(Consts.RECORD_INPUT + ".startRecord(" + Consts.TAG + ");\n");
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genReadMethod(cb, name, name, false);
+      }
+      cb.append(Consts.RECORD_INPUT + ".endRecord(" + Consts.TAG+");\n");
+      cb.append("}\n");
+      
+      // deserialize()
+      cb.append("public void deserialize(final " +
+          "org.apache.hadoop.record.RecordInput " + 
+          Consts.RECORD_INPUT + ", final String " + Consts.TAG + ")\n"+
+          "throws java.io.IOException {\n");
+      cb.append("if (null == " + Consts.RTI_FILTER + ") {\n");
+      cb.append("deserializeWithoutFilter(" + Consts.RECORD_INPUT + ", " + 
+          Consts.TAG + ");\n");
+      cb.append("return;\n");
+      cb.append("}\n");
+      cb.append("// if we're here, we need to read based on version info\n");
+      cb.append(Consts.RECORD_INPUT + ".startRecord(" + Consts.TAG + ");\n");
+      cb.append("setupRtiFields();\n");
+      cb.append("for (int " + Consts.RIO_PREFIX + "i=0; " + Consts.RIO_PREFIX + 
+          "i<" + Consts.RTI_FILTER + ".getFieldTypeInfos().size(); " + 
+          Consts.RIO_PREFIX + "i++) {\n");
+      int ct = 0;
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        ct++;
+        if (1 != ct) {
+          cb.append("else ");
+        }
+        cb.append("if (" + ct + " == " + Consts.RTI_FILTER_FIELDS + "[" +
+            Consts.RIO_PREFIX + "i]) {\n");
+        type.genReadMethod(cb, name, name, false);
+        cb.append("}\n");
+      }
+      if (0 != ct) {
+        cb.append("else {\n");
+        cb.append("java.util.ArrayList<"
+                + "org.apache.hadoop.record.meta.FieldTypeInfo> typeInfos = "
+                + "(java.util.ArrayList<"
+                + "org.apache.hadoop.record.meta.FieldTypeInfo>)"
+                + "(" + Consts.RTI_FILTER + ".getFieldTypeInfos());\n");
+        cb.append("org.apache.hadoop.record.meta.Utils.skip(" + 
+            Consts.RECORD_INPUT + ", " + "typeInfos.get(" + Consts.RIO_PREFIX + 
+            "i).getFieldID(), typeInfos.get(" + 
+            Consts.RIO_PREFIX + "i).getTypeID());\n");
+        cb.append("}\n");
+      }
+      cb.append("}\n");
+      cb.append(Consts.RECORD_INPUT + ".endRecord(" + Consts.TAG+");\n");
+      cb.append("}\n");
+
+      // compareTo()
+      cb.append("public int compareTo (final Object " + Consts.RIO_PREFIX + 
+          "peer_) throws ClassCastException {\n");
+      cb.append("if (!(" + Consts.RIO_PREFIX + "peer_ instanceof "+name+")) {\n");
+      cb.append("throw new ClassCastException(\"Comparing different types of records.\");\n");
+      cb.append("}\n");
+      cb.append(name+" " + Consts.RIO_PREFIX + "peer = ("+name+") " + 
+          Consts.RIO_PREFIX + "peer_;\n");
+      cb.append("int " + Consts.RIO_PREFIX + "ret = 0;\n");
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genCompareTo(cb, name, Consts.RIO_PREFIX + "peer."+name);
+        cb.append("if (" + Consts.RIO_PREFIX + "ret != 0) return " + 
+            Consts.RIO_PREFIX + "ret;\n");
+      }
+      cb.append("return " + Consts.RIO_PREFIX + "ret;\n");
+      cb.append("}\n");
+      
+      // equals()
+      cb.append("public boolean equals(final Object " + Consts.RIO_PREFIX + 
+          "peer_) {\n");
+      cb.append("if (!(" + Consts.RIO_PREFIX + "peer_ instanceof "+name+")) {\n");
+      cb.append("return false;\n");
+      cb.append("}\n");
+      cb.append("if (" + Consts.RIO_PREFIX + "peer_ == this) {\n");
+      cb.append("return true;\n");
+      cb.append("}\n");
+      cb.append(name+" " + Consts.RIO_PREFIX + "peer = ("+name+") " + 
+          Consts.RIO_PREFIX + "peer_;\n");
+      cb.append("boolean " + Consts.RIO_PREFIX + "ret = false;\n");
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genEquals(cb, name, Consts.RIO_PREFIX + "peer."+name);
+        cb.append("if (!" + Consts.RIO_PREFIX + "ret) return " + 
+            Consts.RIO_PREFIX + "ret;\n");
+      }
+      cb.append("return " + Consts.RIO_PREFIX + "ret;\n");
+      cb.append("}\n");
+
+      // clone()
+      cb.append("public Object clone() throws CloneNotSupportedException {\n");
+      cb.append(name+" " + Consts.RIO_PREFIX + "other = new "+name+"();\n");
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genClone(cb, name);
+      }
+      cb.append("return " + Consts.RIO_PREFIX + "other;\n");
+      cb.append("}\n");
+      
+      cb.append("public int hashCode() {\n");
+      cb.append("int " + Consts.RIO_PREFIX + "result = 17;\n");
+      cb.append("int " + Consts.RIO_PREFIX + "ret;\n");
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genHashCode(cb, name);
+        cb.append(Consts.RIO_PREFIX + "result = 37*" + Consts.RIO_PREFIX + 
+            "result + " + Consts.RIO_PREFIX + "ret;\n");
+      }
+      cb.append("return " + Consts.RIO_PREFIX + "result;\n");
+      cb.append("}\n");
+      
+      cb.append("public static String signature() {\n");
+      cb.append("return \""+getSignature()+"\";\n");
+      cb.append("}\n");
+      
+      cb.append("public static class Comparator extends"+
+                " org.apache.hadoop.record.RecordComparator {\n");
+      cb.append("public Comparator() {\n");
+      cb.append("super("+name+".class);\n");
+      cb.append("}\n");
+      
+      cb.append("static public int slurpRaw(byte[] b, int s, int l) {\n");
+      cb.append("try {\n");
+      cb.append("int os = s;\n");
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genSlurpBytes(cb, "b","s","l");
+      }
+      cb.append("return (os - s);\n");
+      cb.append("} catch(java.io.IOException e) {\n");
+      cb.append("throw new RuntimeException(e);\n");
+      cb.append("}\n");
+      cb.append("}\n");
+      
+      cb.append("static public int compareRaw(byte[] b1, int s1, int l1,\n");
+      cb.append("                             byte[] b2, int s2, int l2) {\n");
+      cb.append("try {\n");
+      cb.append("int os1 = s1;\n");
+      for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
+        JField<JavaType> jf = i.next();
+        String name = jf.getName();
+        JavaType type = jf.getType();
+        type.genCompareBytes(cb);
+      }
+      cb.append("return (os1 - s1);\n");
+      cb.append("} catch(java.io.IOException e) {\n");
+      cb.append("throw new RuntimeException(e);\n");
+      cb.append("}\n");
+      cb.append("}\n");
+      cb.append("public int compare(byte[] b1, int s1, int l1,\n");
+      cb.append("                   byte[] b2, int s2, int l2) {\n");
+      cb.append("int ret = compareRaw(b1,s1,l1,b2,s2,l2);\n");
+      cb.append("return (ret == -1)? -1 : ((ret==0)? 1 : 0);");
+      cb.append("}\n");
+      cb.append("}\n\n");
+      cb.append("static {\n");
+      cb.append("org.apache.hadoop.record.RecordComparator.define("
+                +name+".class, new Comparator());\n");
+      cb.append("}\n");
+      cb.append("}\n");
+
+      FileWriter jj = new FileWriter(jfile);
+      try {
+        jj.write(cb.toString());
+      } finally {
+        jj.close();
+      }
+    }
+  }
+  
+  class CppRecord extends CppCompType {
+    
+    private String fullName;
+    private String name;
+    private String module;
+    private ArrayList<JField<CppType>> fields = 
+      new ArrayList<JField<CppType>>();
+    
+    CppRecord(String name, ArrayList<JField<JType>> flist) {
+      super(name.replaceAll("\\.","::"));
+      this.fullName = name.replaceAll("\\.", "::");
+      int idx = name.lastIndexOf('.');
+      this.name = name.substring(idx+1);
+      this.module = name.substring(0, idx).replaceAll("\\.", "::");
+      for (Iterator<JField<JType>> iter = flist.iterator(); iter.hasNext();) {
+        JField<JType> f = iter.next();
+        fields.add(new JField<CppType>(f.getName(), f.getType().getCppType()));
+      }
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::StructTypeID(" + 
+      fullName + "::getTypeInfo().getFieldTypeInfos())";
+    }
+
+    String genDecl(String fname) {
+      return "  "+name+" "+fname+";\n";
+    }
+    
+    void genSetRTIFilter(CodeBuffer cb) {
+      // we set the RTI filter here
+      cb.append(fullName + "::setTypeFilter(rti.getNestedStructTypeInfo(\""+
+          name + "\"));\n");
+    }
+
+    void genSetupRTIFields(CodeBuffer cb) {
+      cb.append("void " + fullName + "::setupRtiFields() {\n");
+      cb.append("if (NULL == p" + Consts.RTI_FILTER + ") return;\n");
+      cb.append("if (NULL != p" + Consts.RTI_FILTER_FIELDS + ") return;\n");
+      cb.append("p" + Consts.RTI_FILTER_FIELDS + " = new int[p" + 
+          Consts.RTI_FILTER + "->getFieldTypeInfos().size()];\n");
+      cb.append("for (unsigned int " + Consts.RIO_PREFIX + "i=0; " + 
+          Consts.RIO_PREFIX + "i<p" + Consts.RTI_FILTER + 
+          "->getFieldTypeInfos().size(); " + Consts.RIO_PREFIX + "i++) {\n");
+      cb.append("p" + Consts.RTI_FILTER_FIELDS + "[" + Consts.RIO_PREFIX + 
+          "i] = 0;\n");
+      cb.append("}\n");
+      cb.append("for (unsigned int " + Consts.RIO_PREFIX + "i=0; " + 
+          Consts.RIO_PREFIX + "i<p" + Consts.RTI_FILTER + 
+          "->getFieldTypeInfos().size(); " + Consts.RIO_PREFIX + "i++) {\n");
+      cb.append("for (unsigned int " + Consts.RIO_PREFIX + "j=0; " + 
+          Consts.RIO_PREFIX + "j<p" + Consts.RTI_VAR + 
+          "->getFieldTypeInfos().size(); " + Consts.RIO_PREFIX + "j++) {\n");
+      cb.append("if (*(p" + Consts.RTI_FILTER + "->getFieldTypeInfos()[" + 
+          Consts.RIO_PREFIX + "i]) == *(p" + Consts.RTI_VAR + 
+          "->getFieldTypeInfos()[" + Consts.RIO_PREFIX + "j])) {\n");
+      cb.append("p" + Consts.RTI_FILTER_FIELDS + "[" + Consts.RIO_PREFIX + 
+          "i] = " + Consts.RIO_PREFIX + "j+1;\n");
+      cb.append("break;\n");
+      cb.append("}\n");
+      cb.append("}\n");
+      cb.append("}\n");
+      cb.append("}\n");
+    }
+    
+    void genCode(FileWriter hh, FileWriter cc, ArrayList<String> options)
+      throws IOException {
+      CodeBuffer hb = new CodeBuffer();
+      
+      String[] ns = module.split("::");
+      for (int i = 0; i < ns.length; i++) {
+        hb.append("namespace "+ns[i]+" {\n");
+      }
+      
+      hb.append("class "+name+" : public ::hadoop::Record {\n");
+      hb.append("private:\n");
+      
+      for (Iterator<JField<CppType>> i = fields.iterator(); i.hasNext();) {
+        JField<CppType> jf = i.next();
+        String name = jf.getName();
+        CppType type = jf.getType();
+        type.genDecl(hb, name);
+      }
+      
+      // type info vars
+      hb.append("static ::hadoop::RecordTypeInfo* p" + Consts.RTI_VAR + ";\n");
+      hb.append("static ::hadoop::RecordTypeInfo* p" + Consts.RTI_FILTER + ";\n");
+      hb.append("static int* p" + Consts.RTI_FILTER_FIELDS + ";\n");
+      hb.append("static ::hadoop::RecordTypeInfo* setupTypeInfo();\n");
+      hb.append("static void setupRtiFields();\n");
+      hb.append("virtual void deserializeWithoutFilter(::hadoop::IArchive& " + 
+          Consts.RECORD_INPUT + ", const char* " + Consts.TAG + ");\n");
+      hb.append("public:\n");
+      hb.append("static const ::hadoop::RecordTypeInfo& getTypeInfo() " +
+          "{return *p" + Consts.RTI_VAR + ";}\n");
+      hb.append("static void setTypeFilter(const ::hadoop::RecordTypeInfo& rti);\n");
+      hb.append("static void setTypeFilter(const ::hadoop::RecordTypeInfo* prti);\n");
+      hb.append("virtual void serialize(::hadoop::OArchive& " + 
+          Consts.RECORD_OUTPUT + ", const char* " + Consts.TAG + ") const;\n");
+      hb.append("virtual void deserialize(::hadoop::IArchive& " + 
+          Consts.RECORD_INPUT + ", const char* " + Consts.TAG + ");\n");
+      hb.append("virtual const ::std::string& type() const;\n");
+      hb.append("virtual const ::std::string& signature() const;\n");
+      hb.append("virtual bool operator<(const "+name+"& peer_) const;\n");
+      hb.append("virtual bool operator==(const "+name+"& peer_) const;\n");
+      hb.append("virtual ~"+name+"() {};\n");
+      for (Iterator<JField<CppType>> i = fields.iterator(); i.hasNext();) {
+        JField<CppType> jf = i.next();
+        String name = jf.getName();
+        CppType type = jf.getType();
+        type.genGetSet(hb, name);
+      }
+      hb.append("}; // end record "+name+"\n");
+      for (int i=ns.length-1; i>=0; i--) {
+        hb.append("} // end namespace "+ns[i]+"\n");
+      }
+      
+      hh.write(hb.toString());
+      
+      CodeBuffer cb = new CodeBuffer();
+
+      // initialize type info vars
+      cb.append("::hadoop::RecordTypeInfo* " + fullName + "::p" + 
+          Consts.RTI_VAR + " = " + fullName + "::setupTypeInfo();\n");
+      cb.append("::hadoop::RecordTypeInfo* " + fullName + "::p" + 
+          Consts.RTI_FILTER + " = NULL;\n");
+      cb.append("int* " + fullName + "::p" + 
+          Consts.RTI_FILTER_FIELDS + " = NULL;\n\n");
+
+      // setupTypeInfo()
+      cb.append("::hadoop::RecordTypeInfo* "+fullName+"::setupTypeInfo() {\n");
+      cb.append("::hadoop::RecordTypeInfo* p = new ::hadoop::RecordTypeInfo(\"" + 
+          name + "\");\n");
+      for (Iterator<JField<CppType>> i = fields.iterator(); i.hasNext();) {
+        JField<CppType> jf = i.next();
+        String name = jf.getName();
+        CppType type = jf.getType();
+        type.genStaticTypeInfo(cb, name);
+      }
+      cb.append("return p;\n");
+      cb.append("}\n");
+
+      // setTypeFilter()
+      cb.append("void "+fullName+"::setTypeFilter(const " +
+          "::hadoop::RecordTypeInfo& rti) {\n");
+      cb.append("if (NULL != p" + Consts.RTI_FILTER + ") {\n");
+      cb.append("delete p" + Consts.RTI_FILTER + ";\n");
+      cb.append("}\n");
+      cb.append("p" + Consts.RTI_FILTER + " = new ::hadoop::RecordTypeInfo(rti);\n");
+      cb.append("if (NULL != p" + Consts.RTI_FILTER_FIELDS + ") {\n");
+      cb.append("delete p" + Consts.RTI_FILTER_FIELDS + ";\n");
+      cb.append("}\n");
+      cb.append("p" + Consts.RTI_FILTER_FIELDS + " = NULL;\n");
+      // set RTIFilter for nested structs. We may end up with multiple lines that 
+      // do the same thing, if the same struct is nested in more than one field, 
+      // but that's OK. 
+      for (Iterator<JField<CppType>> i = fields.iterator(); i.hasNext();) {
+        JField<CppType> jf = i.next();
+        CppType type = jf.getType();
+        type.genSetRTIFilter(cb);
+      }
+      cb.append("}\n");
+      
+      // setTypeFilter()
+      cb.append("void "+fullName+"::setTypeFilter(const " +
+          "::hadoop::RecordTypeInfo* prti) {\n");
+      cb.append("if (NULL != prti) {\n");
+      cb.append("setTypeFilter(*prti);\n");
+      cb.append("}\n");
+      cb.append("}\n");
+
+      // setupRtiFields()
+      genSetupRTIFields(cb);
+
+      // serialize()
+      cb.append("void "+fullName+"::serialize(::hadoop::OArchive& " + 
+          Consts.RECORD_OUTPUT + ", const char* " + Consts.TAG + ") const {\n");
+      cb.append(Consts.RECORD_OUTPUT + ".startRecord(*this," + 
+          Consts.TAG + ");\n");
+      for (Iterator<JField<CppType>> i = fields.iterator(); i.hasNext();) {
+        JField<CppType> jf = i.next();
+        String name = jf.getName();
+        CppType type = jf.getType();
+        if (type instanceof JBuffer.CppBuffer) {
+          cb.append(Consts.RECORD_OUTPUT + ".serialize("+name+","+name+
+              ".length(),\""+name+"\");\n");
+        } else {
+          cb.append(Consts.RECORD_OUTPUT + ".serialize("+name+",\""+
+              name+"\");\n");
+        }
+      }
+      cb.append(Consts.RECORD_OUTPUT + ".endRecord(*this," + Consts.TAG + ");\n");
+      cb.append("return;\n");
+      cb.append("}\n");
+      
+      // deserializeWithoutFilter()
+      cb.append("void "+fullName+"::deserializeWithoutFilter(::hadoop::IArchive& " +
+          Consts.RECORD_INPUT + ", const char* " + Consts.TAG + ") {\n");
+      cb.append(Consts.RECORD_INPUT + ".startRecord(*this," + 
+          Consts.TAG + ");\n");
+      for (Iterator<JField<CppType>> i = fields.iterator(); i.hasNext();) {
+        JField<CppType> jf = i.next();
+        String name = jf.getName();
+        CppType type = jf.getType();
+        if (type instanceof JBuffer.CppBuffer) {
+          cb.append("{\nsize_t len=0; " + Consts.RECORD_INPUT + ".deserialize("+
+              name+",len,\""+name+"\");\n}\n");
+        } else {
+          cb.append(Consts.RECORD_INPUT + ".deserialize("+name+",\""+
+              name+"\");\n");
+        }
+      }
+      cb.append(Consts.RECORD_INPUT + ".endRecord(*this," + Consts.TAG + ");\n");
+      cb.append("return;\n");
+      cb.append("}\n");
+      
+      // deserialize()
+      cb.append("void "+fullName+"::deserialize(::hadoop::IArchive& " +
+          Consts.RECORD_INPUT + ", const char* " + Consts.TAG + ") {\n");
+      cb.append("if (NULL == p" + Consts.RTI_FILTER + ") {\n");
+      cb.append("deserializeWithoutFilter(" + Consts.RECORD_INPUT + ", " + 
+          Consts.TAG + ");\n");
+      cb.append("return;\n");
+      cb.append("}\n");
+      cb.append("// if we're here, we need to read based on version info\n");
+      cb.append(Consts.RECORD_INPUT + ".startRecord(*this," + 
+          Consts.TAG + ");\n");
+      cb.append("setupRtiFields();\n");
+      cb.append("for (unsigned int " + Consts.RIO_PREFIX + "i=0; " + 
+          Consts.RIO_PREFIX + "i<p" + Consts.RTI_FILTER + 
+          "->getFieldTypeInfos().size(); " + Consts.RIO_PREFIX + "i++) {\n");
+      int ct = 0;
+      for (Iterator<JField<CppType>> i = fields.iterator(); i.hasNext();) {
+        JField<CppType> jf = i.next();
+        String name = jf.getName();
+        CppType type = jf.getType();
+        ct++;
+        if (1 != ct) {
+          cb.append("else ");
+        }
+        cb.append("if (" + ct + " == p" + Consts.RTI_FILTER_FIELDS + "[" +
+            Consts.RIO_PREFIX + "i]) {\n");
+        if (type instanceof JBuffer.CppBuffer) {
+          cb.append("{\nsize_t len=0; " + Consts.RECORD_INPUT + ".deserialize("+
+              name+",len,\""+name+"\");\n}\n");
+        } else {
+          cb.append(Consts.RECORD_INPUT + ".deserialize("+name+",\""+
+              name+"\");\n");
+        }
+        cb.append("}\n");
+      }
+      if (0 != ct) {
+        cb.append("else {\n");
+        cb.append("const std::vector< ::hadoop::FieldTypeInfo* >& typeInfos = p" + 
+            Consts.RTI_FILTER + "->getFieldTypeInfos();\n");
+        cb.append("::hadoop::Utils::skip(" + Consts.RECORD_INPUT + 
+            ", typeInfos[" + Consts.RIO_PREFIX + "i]->getFieldID()->c_str()" + 
+            ", *(typeInfos[" + Consts.RIO_PREFIX + "i]->getTypeID()));\n");
+        cb.append("}\n");
+      }
+      cb.append("}\n");
+      cb.append(Consts.RECORD_INPUT + ".endRecord(*this, " + Consts.TAG+");\n");
+      cb.append("}\n");
+
+      // operator <
+      cb.append("bool "+fullName+"::operator< (const "+fullName+"& peer_) const {\n");
+      cb.append("return (1\n");
+      for (Iterator<JField<CppType>> i = fields.iterator(); i.hasNext();) {
+        JField<CppType> jf = i.next();
+        String name = jf.getName();
+        cb.append("&& ("+name+" < peer_."+name+")\n");
+      }
+      cb.append(");\n");
+      cb.append("}\n");
+      
+      cb.append("bool "+fullName+"::operator== (const "+fullName+"& peer_) const {\n");
+      cb.append("return (1\n");
+      for (Iterator<JField<CppType>> i = fields.iterator(); i.hasNext();) {
+        JField<CppType> jf = i.next();
+        String name = jf.getName();
+        cb.append("&& ("+name+" == peer_."+name+")\n");
+      }
+      cb.append(");\n");
+      cb.append("}\n");
+      
+      cb.append("const ::std::string&"+fullName+"::type() const {\n");
+      cb.append("static const ::std::string type_(\""+name+"\");\n");
+      cb.append("return type_;\n");
+      cb.append("}\n");
+      
+      cb.append("const ::std::string&"+fullName+"::signature() const {\n");
+      cb.append("static const ::std::string sig_(\""+getSignature()+"\");\n");
+      cb.append("return sig_;\n");
+      cb.append("}\n");
+      
+      cc.write(cb.toString());
+    }
+  }
+  
+  class CRecord extends CCompType {
+    
+  }
+  
+  private String signature;
+  
+  /**
+   * Creates a new instance of JRecord
+   */
+  public JRecord(String name, ArrayList<JField<JType>> flist) {
+    setJavaType(new JavaRecord(name, flist));
+    setCppType(new CppRecord(name, flist));
+    setCType(new CRecord());
+    // precompute signature
+    int idx = name.lastIndexOf('.');
+    String recName = name.substring(idx+1);
+    StringBuffer sb = new StringBuffer();
+    sb.append("L").append(recName).append("(");
+    for (Iterator<JField<JType>> i = flist.iterator(); i.hasNext();) {
+      String s = i.next().getType().getSignature();
+      sb.append(s);
+    }
+    sb.append(")");
+    signature = sb.toString();
+  }
+  
+  String getSignature() {
+    return signature;
+  }
+  
+  void genCppCode(FileWriter hh, FileWriter cc, ArrayList<String> options)
+    throws IOException {
+    ((CppRecord)getCppType()).genCode(hh, cc, options);
+  }
+  
+  void genJavaCode(String destDir, ArrayList<String> options)
+    throws IOException {
+    ((JavaRecord)getJavaType()).genCode(destDir, options);
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JString.java b/src/java/org/apache/hadoop/record/compiler/JString.java
new file mode 100644
index 00000000000..931359b993c
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JString.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+
+/**
+ */
+public class JString extends JCompType {
+    
+  class JavaString extends JavaCompType {
+    
+    JavaString() {
+      super("String", "String", "String", "TypeID.RIOType.STRING");
+    }
+    
+    String getTypeIDObjectString() {
+      return "org.apache.hadoop.record.meta.TypeID.StringTypeID";
+    }
+
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      cb.append("int i = org.apache.hadoop.record.Utils.readVInt("+b+", "+s+");\n");
+      cb.append("int z = org.apache.hadoop.record.Utils.getVIntSize(i);\n");
+      cb.append(s+"+=(z+i); "+l+"-= (z+i);\n");
+      cb.append("}\n");
+    }
+    
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      cb.append("int i1 = org.apache.hadoop.record.Utils.readVInt(b1, s1);\n");
+      cb.append("int i2 = org.apache.hadoop.record.Utils.readVInt(b2, s2);\n");
+      cb.append("int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);\n");
+      cb.append("int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);\n");
+      cb.append("s1+=z1; s2+=z2; l1-=z1; l2-=z2;\n");
+      cb.append("int r1 = org.apache.hadoop.record.Utils.compareBytes(b1,s1,i1,b2,s2,i2);\n");
+      cb.append("if (r1 != 0) { return (r1<0)?-1:0; }\n");
+      cb.append("s1+=i1; s2+=i2; l1-=i1; l1-=i2;\n");
+      cb.append("}\n");
+    }
+    
+    void genClone(CodeBuffer cb, String fname) {
+      cb.append(Consts.RIO_PREFIX + "other."+fname+" = this."+fname+";\n");
+    }
+  }
+
+  class CppString extends CppCompType {
+    
+    CppString() {
+      super("::std::string");
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::TypeID(::hadoop::RIOTYPE_STRING)";
+    }
+  }
+  
+  /** Creates a new instance of JString */
+  public JString() {
+    setJavaType(new JavaString());
+    setCppType(new CppString());
+    setCType(new CCompType());
+  }
+    
+  String getSignature() {
+    return "s";
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JType.java b/src/java/org/apache/hadoop/record/compiler/JType.java
new file mode 100644
index 00000000000..6f1ff67ccb4
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JType.java
@@ -0,0 +1,222 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+import java.util.Map;
+
+
+/**
+ * Abstract Base class for all types supported by Hadoop Record I/O.
+ */
+abstract public class JType {
+  
+  static String toCamelCase(String name) {
+    char firstChar = name.charAt(0);
+    if (Character.isLowerCase(firstChar)) {
+      return ""+Character.toUpperCase(firstChar) + name.substring(1);
+    }
+    return name;
+  }
+  
+  JavaType javaType;
+  CppType cppType;
+  CType cType;
+  
+  abstract class JavaType {
+    private String name;
+    private String methodSuffix;
+    private String wrapper;
+    private String typeIDByteString; // points to TypeID.RIOType 
+    
+    JavaType(String javaname,
+        String suffix,
+        String wrapper, 
+        String typeIDByteString) { 
+      this.name = javaname;
+      this.methodSuffix = suffix;
+      this.wrapper = wrapper;
+      this.typeIDByteString = typeIDByteString;
+    }
+
+    void genDecl(CodeBuffer cb, String fname) {
+      cb.append("private "+name+" "+fname+";\n");
+    }
+    
+    void genStaticTypeInfo(CodeBuffer cb, String fname) {
+      cb.append(Consts.RTI_VAR + ".addField(\"" + fname + "\", " +
+          getTypeIDObjectString() + ");\n");
+    }
+    
+    abstract String getTypeIDObjectString();
+    
+    void genSetRTIFilter(CodeBuffer cb, Map<String, Integer> nestedStructMap) {
+      // do nothing by default
+      return;
+    }
+
+    /*void genRtiFieldCondition(CodeBuffer cb, String fname, int ct) {
+      cb.append("if ((tInfo.fieldID.equals(\"" + fname + "\")) && (typeVal ==" +
+          " org.apache.hadoop.record.meta." + getTypeIDByteString() + ")) {\n");
+      cb.append("rtiFilterFields[i] = " + ct + ";\n");
+      cb.append("}\n");
+    }
+
+    void genRtiNestedFieldCondition(CodeBuffer cb, String varName, int ct) {
+      cb.append("if (" + varName + ".getElementTypeID().getTypeVal() == " +
+          "org.apache.hadoop.record.meta." + getTypeIDByteString() + 
+          ") {\n");
+      cb.append("rtiFilterFields[i] = " + ct + ";\n");
+      cb.append("}\n");  
+    }*/
+
+    void genConstructorParam(CodeBuffer cb, String fname) {
+      cb.append("final "+name+" "+fname);
+    }
+    
+    void genGetSet(CodeBuffer cb, String fname) {
+      cb.append("public "+name+" get"+toCamelCase(fname)+"() {\n");
+      cb.append("return "+fname+";\n");
+      cb.append("}\n");
+      cb.append("public void set"+toCamelCase(fname)+"(final "+name+" "+fname+") {\n");
+      cb.append("this."+fname+"="+fname+";\n");
+      cb.append("}\n");
+    }
+    
+    String getType() {
+      return name;
+    }
+    
+    String getWrapperType() {
+      return wrapper;
+    }
+    
+    String getMethodSuffix() {
+      return methodSuffix;
+    }
+    
+    String getTypeIDByteString() {
+      return typeIDByteString;
+    }
+    
+    void genWriteMethod(CodeBuffer cb, String fname, String tag) {
+      cb.append(Consts.RECORD_OUTPUT + ".write"+methodSuffix + 
+          "("+fname+",\""+tag+"\");\n");
+    }
+    
+    void genReadMethod(CodeBuffer cb, String fname, String tag, boolean decl) {
+      if (decl) {
+        cb.append(name+" "+fname+";\n");
+      }
+      cb.append(fname+"=" + Consts.RECORD_INPUT + ".read" + 
+          methodSuffix+"(\""+tag+"\");\n");
+    }
+    
+    void genCompareTo(CodeBuffer cb, String fname, String other) {
+      cb.append(Consts.RIO_PREFIX + "ret = ("+fname+" == "+other+")? 0 :(("+
+          fname+"<"+other+")?-1:1);\n");
+    }
+    
+    abstract void genCompareBytes(CodeBuffer cb);
+    
+    abstract void genSlurpBytes(CodeBuffer cb, String b, String s, String l);
+    
+    void genEquals(CodeBuffer cb, String fname, String peer) {
+      cb.append(Consts.RIO_PREFIX + "ret = ("+fname+"=="+peer+");\n");
+    }
+    
+    void genHashCode(CodeBuffer cb, String fname) {
+      cb.append(Consts.RIO_PREFIX + "ret = (int)"+fname+";\n");
+    }
+    
+    void genConstructorSet(CodeBuffer cb, String fname) {
+      cb.append("this."+fname+" = "+fname+";\n");
+    }
+    
+    void genClone(CodeBuffer cb, String fname) {
+      cb.append(Consts.RIO_PREFIX + "other."+fname+" = this."+fname+";\n");
+    }
+  }
+  
+  abstract class CppType {
+    private String name;
+    
+    CppType(String cppname) {
+      name = cppname;
+    }
+    
+    void genDecl(CodeBuffer cb, String fname) {
+      cb.append(name+" "+fname+";\n");
+    }
+    
+    void genStaticTypeInfo(CodeBuffer cb, String fname) {
+      cb.append("p->addField(new ::std::string(\"" + 
+          fname + "\"), " + getTypeIDObjectString() + ");\n");
+    }
+    
+    void genGetSet(CodeBuffer cb, String fname) {
+      cb.append("virtual "+name+" get"+toCamelCase(fname)+"() const {\n");
+      cb.append("return "+fname+";\n");
+      cb.append("}\n");
+      cb.append("virtual void set"+toCamelCase(fname)+"("+name+" m_) {\n");
+      cb.append(fname+"=m_;\n");
+      cb.append("}\n");
+    }
+    
+    abstract String getTypeIDObjectString();
+
+    void genSetRTIFilter(CodeBuffer cb) {
+      // do nothing by default
+      return;
+    }
+
+    String getType() {
+      return name;
+    }
+  }
+  
+  class CType {
+    
+  }
+  
+  abstract String getSignature();
+  
+  void setJavaType(JavaType jType) {
+    this.javaType = jType;
+  }
+  
+  JavaType getJavaType() {
+    return javaType;
+  }
+  
+  void setCppType(CppType cppType) {
+    this.cppType = cppType;
+  }
+  
+  CppType getCppType() {
+    return cppType;
+  }
+  
+  void setCType(CType cType) {
+    this.cType = cType;
+  }
+  
+  CType getCType() {
+    return cType;
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JVector.java b/src/java/org/apache/hadoop/record/compiler/JVector.java
new file mode 100644
index 00000000000..f87442ad716
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JVector.java
@@ -0,0 +1,197 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+import java.util.Map;
+
+/**
+ */
+public class JVector extends JCompType {
+  
+  static private int level = 0;
+  
+  static private String getId(String id) { return id+getLevel(); }
+  
+  static private String getLevel() { return Integer.toString(level); }
+  
+  static private void incrLevel() { level++; }
+  
+  static private void decrLevel() { level--; }
+  
+  private JType type;
+  
+  class JavaVector extends JavaCompType {
+    
+    private JType.JavaType element;
+    
+    JavaVector(JType.JavaType t) {
+      super("java.util.ArrayList<"+t.getWrapperType()+">",
+            "Vector", "java.util.ArrayList<"+t.getWrapperType()+">",
+            "TypeID.RIOType.VECTOR");
+      element = t;
+    }
+    
+    String getTypeIDObjectString() {
+      return "new org.apache.hadoop.record.meta.VectorTypeID(" + 
+      element.getTypeIDObjectString() + ")";
+    }
+
+    void genSetRTIFilter(CodeBuffer cb, Map<String, Integer> nestedStructMap) {
+      element.genSetRTIFilter(cb, nestedStructMap);
+    }
+
+    void genCompareTo(CodeBuffer cb, String fname, String other) {
+      cb.append("{\n");
+      incrLevel();
+      cb.append("int "+getId(Consts.RIO_PREFIX + "len1")+" = "+fname+
+          ".size();\n");
+      cb.append("int "+getId(Consts.RIO_PREFIX + "len2")+" = "+other+
+          ".size();\n");
+      cb.append("for(int "+getId(Consts.RIO_PREFIX + "vidx")+" = 0; "+
+          getId(Consts.RIO_PREFIX + "vidx")+"<"+getId(Consts.RIO_PREFIX + "len1")+
+          " && "+getId(Consts.RIO_PREFIX + "vidx")+"<"+
+          getId(Consts.RIO_PREFIX + "len2")+"; "+
+          getId(Consts.RIO_PREFIX + "vidx")+"++) {\n");
+      cb.append(element.getType()+" "+getId(Consts.RIO_PREFIX + "e1")+
+                " = "+fname+
+                ".get("+getId(Consts.RIO_PREFIX + "vidx")+");\n");
+      cb.append(element.getType()+" "+getId(Consts.RIO_PREFIX + "e2")+
+                " = "+other+
+                ".get("+getId(Consts.RIO_PREFIX + "vidx")+");\n");
+      element.genCompareTo(cb, getId(Consts.RIO_PREFIX + "e1"), 
+          getId(Consts.RIO_PREFIX + "e2"));
+      cb.append("if (" + Consts.RIO_PREFIX + "ret != 0) { return " +
+          Consts.RIO_PREFIX + "ret; }\n");
+      cb.append("}\n");
+      cb.append(Consts.RIO_PREFIX + "ret = ("+getId(Consts.RIO_PREFIX + "len1")+
+          " - "+getId(Consts.RIO_PREFIX + "len2")+");\n");
+      decrLevel();
+      cb.append("}\n");
+    }
+    
+    void genReadMethod(CodeBuffer cb, String fname, String tag, boolean decl) {
+      if (decl) {
+        cb.append(getType()+" "+fname+";\n");
+      }
+      cb.append("{\n");
+      incrLevel();
+      cb.append("org.apache.hadoop.record.Index "+
+          getId(Consts.RIO_PREFIX + "vidx")+" = " + 
+          Consts.RECORD_INPUT + ".startVector(\""+tag+"\");\n");
+      cb.append(fname+"=new "+getType()+"();\n");
+      cb.append("for (; !"+getId(Consts.RIO_PREFIX + "vidx")+".done(); " + 
+          getId(Consts.RIO_PREFIX + "vidx")+".incr()) {\n");
+      element.genReadMethod(cb, getId(Consts.RIO_PREFIX + "e"), 
+          getId(Consts.RIO_PREFIX + "e"), true);
+      cb.append(fname+".add("+getId(Consts.RIO_PREFIX + "e")+");\n");
+      cb.append("}\n");
+      cb.append(Consts.RECORD_INPUT + ".endVector(\""+tag+"\");\n");
+      decrLevel();
+      cb.append("}\n");
+    }
+    
+    void genWriteMethod(CodeBuffer cb, String fname, String tag) {
+      cb.append("{\n");
+      incrLevel();
+      cb.append(Consts.RECORD_OUTPUT + ".startVector("+fname+",\""+tag+"\");\n");
+      cb.append("int "+getId(Consts.RIO_PREFIX + "len")+" = "+fname+".size();\n");
+      cb.append("for(int "+getId(Consts.RIO_PREFIX + "vidx")+" = 0; " + 
+          getId(Consts.RIO_PREFIX + "vidx")+"<"+getId(Consts.RIO_PREFIX + "len")+
+          "; "+getId(Consts.RIO_PREFIX + "vidx")+"++) {\n");
+      cb.append(element.getType()+" "+getId(Consts.RIO_PREFIX + "e")+" = "+
+          fname+".get("+getId(Consts.RIO_PREFIX + "vidx")+");\n");
+      element.genWriteMethod(cb, getId(Consts.RIO_PREFIX + "e"), 
+          getId(Consts.RIO_PREFIX + "e"));
+      cb.append("}\n");
+      cb.append(Consts.RECORD_OUTPUT + ".endVector("+fname+",\""+tag+"\");\n");
+      cb.append("}\n");
+      decrLevel();
+    }
+    
+    void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
+      cb.append("{\n");
+      incrLevel();
+      cb.append("int "+getId("vi")+
+                " = org.apache.hadoop.record.Utils.readVInt("+b+", "+s+");\n");
+      cb.append("int "+getId("vz")+
+                " = org.apache.hadoop.record.Utils.getVIntSize("+getId("vi")+");\n");
+      cb.append(s+"+="+getId("vz")+"; "+l+"-="+getId("vz")+";\n");
+      cb.append("for (int "+getId("vidx")+" = 0; "+getId("vidx")+
+                " < "+getId("vi")+"; "+getId("vidx")+"++)");
+      element.genSlurpBytes(cb, b, s, l);
+      decrLevel();
+      cb.append("}\n");
+    }
+    
+    void genCompareBytes(CodeBuffer cb) {
+      cb.append("{\n");
+      incrLevel();
+      cb.append("int "+getId("vi1")+
+                " = org.apache.hadoop.record.Utils.readVInt(b1, s1);\n");
+      cb.append("int "+getId("vi2")+
+                " = org.apache.hadoop.record.Utils.readVInt(b2, s2);\n");
+      cb.append("int "+getId("vz1")+
+                " = org.apache.hadoop.record.Utils.getVIntSize("+getId("vi1")+");\n");
+      cb.append("int "+getId("vz2")+
+                " = org.apache.hadoop.record.Utils.getVIntSize("+getId("vi2")+");\n");
+      cb.append("s1+="+getId("vz1")+"; s2+="+getId("vz2")+
+                "; l1-="+getId("vz1")+"; l2-="+getId("vz2")+";\n");
+      cb.append("for (int "+getId("vidx")+" = 0; "+getId("vidx")+
+                " < "+getId("vi1")+" && "+getId("vidx")+" < "+getId("vi2")+
+                "; "+getId("vidx")+"++)");
+      element.genCompareBytes(cb);
+      cb.append("if ("+getId("vi1")+" != "+getId("vi2")+
+                ") { return ("+getId("vi1")+"<"+getId("vi2")+")?-1:0; }\n");
+      decrLevel();
+      cb.append("}\n");
+    }
+  }
+  
+  class CppVector extends CppCompType {
+    
+    private JType.CppType element;
+    
+    CppVector(JType.CppType t) {
+      super("::std::vector< "+t.getType()+" >");
+      element = t;
+    }
+    
+    String getTypeIDObjectString() {
+      return "new ::hadoop::VectorTypeID(" +    
+      element.getTypeIDObjectString() + ")";
+    }
+
+    void genSetRTIFilter(CodeBuffer cb) {
+      element.genSetRTIFilter(cb);
+    }
+
+  }
+  
+  /** Creates a new instance of JVector */
+  public JVector(JType t) {
+    type = t;
+    setJavaType(new JavaVector(t.getJavaType()));
+    setCppType(new CppVector(t.getCppType()));
+    setCType(new CCompType());
+  }
+  
+  String getSignature() {
+    return "[" + type.getSignature() + "]";
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/JavaGenerator.java b/src/java/org/apache/hadoop/record/compiler/JavaGenerator.java
new file mode 100644
index 00000000000..04c4bd84733
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/JavaGenerator.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler;
+
+import java.util.ArrayList;
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * Java Code generator front-end for Hadoop record I/O.
+ */
+class JavaGenerator extends CodeGenerator {
+  
+  JavaGenerator() {
+  }
+  
+  /**
+   * Generate Java code for records. This method is only a front-end to
+   * JRecord, since one file is generated for each record.
+   *
+   * @param name possibly full pathname to the file
+   * @param ilist included files (as JFile)
+   * @param rlist List of records defined within this file
+   * @param destDir output directory
+   */
+  void genCode(String name, ArrayList<JFile> ilist,
+               ArrayList<JRecord> rlist, String destDir, ArrayList<String> options)
+    throws IOException {
+    for (Iterator<JRecord> iter = rlist.iterator(); iter.hasNext();) {
+      JRecord rec = iter.next();
+      rec.genJavaCode(destDir, options);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/ant/RccTask.java b/src/java/org/apache/hadoop/record/compiler/ant/RccTask.java
new file mode 100644
index 00000000000..ce1bc2cbf9d
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/ant/RccTask.java
@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.record.compiler.ant;
+
+import java.io.File;
+import java.util.ArrayList;
+import org.apache.hadoop.record.compiler.generated.Rcc;
+import org.apache.tools.ant.BuildException;
+import org.apache.tools.ant.DirectoryScanner;
+import org.apache.tools.ant.Project;
+import org.apache.tools.ant.Task;
+import org.apache.tools.ant.types.FileSet;
+
+/**
+ * Hadoop record compiler ant Task
+ *<p> This task takes the given record definition files and compiles them into
+ * java or c++
+ * files. It is then up to the user to compile the generated files.
+ *
+ * <p> The task requires the <code>file</code> or the nested fileset element to be
+ * specified. Optional attributes are <code>language</code> (set the output
+ * language, default is "java"),
+ * <code>destdir</code> (name of the destination directory for generated java/c++
+ * code, default is ".") and <code>failonerror</code> (specifies error handling
+ * behavior. default is true).
+ * <p><h4>Usage</h4>
+ * <pre>
+ * &lt;recordcc
+ *       destdir="${basedir}/gensrc"
+ *       language="java"&gt;
+ *   &lt;fileset include="**\/*.jr" /&gt;
+ * &lt;/recordcc&gt;
+ * </pre>
+ */
+public class RccTask extends Task {
+  
+  private String language = "java";
+  private File src;
+  private File dest = new File(".");
+  private final ArrayList<FileSet> filesets = new ArrayList<FileSet>();
+  private boolean failOnError = true;
+  
+  /** Creates a new instance of RccTask */
+  public RccTask() {
+  }
+  
+  /**
+   * Sets the output language option
+   * @param language "java"/"c++"
+   */
+  public void setLanguage(String language) {
+    this.language = language;
+  }
+  
+  /**
+   * Sets the record definition file attribute
+   * @param file record definition file
+   */
+  public void setFile(File file) {
+    this.src = file;
+  }
+  
+  /**
+   * Given multiple files (via fileset), set the error handling behavior
+   * @param flag true will throw build exception in case of failure (default)
+   */
+  public void setFailonerror(boolean flag) {
+    this.failOnError = flag;
+  }
+  
+  /**
+   * Sets directory where output files will be generated
+   * @param dir output directory
+   */
+  public void setDestdir(File dir) {
+    this.dest = dir;
+  }
+  
+  /**
+   * Adds a fileset that can consist of one or more files
+   * @param set Set of record definition files
+   */
+  public void addFileset(FileSet set) {
+    filesets.add(set);
+  }
+  
+  /**
+   * Invoke the Hadoop record compiler on each record definition file
+   */
+  public void execute() throws BuildException {
+    if (src == null && filesets.size()==0) {
+      throw new BuildException("There must be a file attribute or a fileset child element");
+    }
+    if (src != null) {
+      doCompile(src);
+    }
+    Project myProject = getProject();
+    for (int i = 0; i < filesets.size(); i++) {
+      FileSet fs = filesets.get(i);
+      DirectoryScanner ds = fs.getDirectoryScanner(myProject);
+      File dir = fs.getDir(myProject);
+      String[] srcs = ds.getIncludedFiles();
+      for (int j = 0; j < srcs.length; j++) {
+        doCompile(new File(dir, srcs[j]));
+      }
+    }
+  }
+  
+  private void doCompile(File file) throws BuildException {
+    String[] args = new String[5];
+    args[0] = "--language";
+    args[1] = this.language;
+    args[2] = "--destdir";
+    args[3] = this.dest.getPath();
+    args[4] = file.getPath();
+    int retVal = Rcc.driver(args);
+    if (retVal != 0 && failOnError) {
+      throw new BuildException("Hadoop record compiler returned error code "+retVal);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/generated/ParseException.java b/src/java/org/apache/hadoop/record/compiler/generated/ParseException.java
new file mode 100644
index 00000000000..59d2e467623
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/generated/ParseException.java
@@ -0,0 +1,210 @@
+/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+/**
+ * This exception is thrown when parse errors are encountered.
+ * You can explicitly create objects of this exception type by
+ * calling the method generateParseException in the generated
+ * parser.
+ *
+ * You can modify this class to customize your error reporting
+ * mechanisms so long as you retain the public fields.
+ */
+public class ParseException extends Exception {
+
+  /**
+   * This constructor is used by the method "generateParseException"
+   * in the generated parser.  Calling this constructor generates
+   * a new object of this type with the fields "currentToken",
+   * "expectedTokenSequences", and "tokenImage" set.  The boolean
+   * flag "specialConstructor" is also set to true to indicate that
+   * this constructor was used to create this object.
+   * This constructor calls its super class with the empty string
+   * to force the "toString" method of parent class "Throwable" to
+   * print the error message in the form:
+   *     ParseException: <result of getMessage>
+   */
+  public ParseException(Token currentTokenVal,
+                        int[][] expectedTokenSequencesVal,
+                        String[] tokenImageVal
+                        )
+  {
+    super("");
+    specialConstructor = true;
+    currentToken = currentTokenVal;
+    expectedTokenSequences = expectedTokenSequencesVal;
+    tokenImage = tokenImageVal;
+  }
+
+  /**
+   * The following constructors are for use by you for whatever
+   * purpose you can think of.  Constructing the exception in this
+   * manner makes the exception behave in the normal way - i.e., as
+   * documented in the class "Throwable".  The fields "errorToken",
+   * "expectedTokenSequences", and "tokenImage" do not contain
+   * relevant information.  The JavaCC generated code does not use
+   * these constructors.
+   */
+
+  public ParseException() {
+    super();
+    specialConstructor = false;
+  }
+
+  public ParseException(String message) {
+    super(message);
+    specialConstructor = false;
+  }
+
+  /**
+   * This variable determines which constructor was used to create
+   * this object and thereby affects the semantics of the
+   * "getMessage" method (see below).
+   */
+  protected boolean specialConstructor;
+
+  /**
+   * This is the last token that has been consumed successfully.  If
+   * this object has been created due to a parse error, the token
+   * followng this token will (therefore) be the first error token.
+   */
+  public Token currentToken;
+
+  /**
+   * Each entry in this array is an array of integers.  Each array
+   * of integers represents a sequence of tokens (by their ordinal
+   * values) that is expected at this point of the parse.
+   */
+  public int[][] expectedTokenSequences;
+
+  /**
+   * This is a reference to the "tokenImage" array of the generated
+   * parser within which the parse error occurred.  This array is
+   * defined in the generated ...Constants interface.
+   */
+  public String[] tokenImage;
+
+  /**
+   * This method has the standard behavior when this object has been
+   * created using the standard constructors.  Otherwise, it uses
+   * "currentToken" and "expectedTokenSequences" to generate a parse
+   * error message and returns it.  If this object has been created
+   * due to a parse error, and you do not catch it (it gets thrown
+   * from the parser), then this method is called during the printing
+   * of the final stack trace, and hence the correct error message
+   * gets displayed.
+   */
+  public String getMessage() {
+    if (!specialConstructor) {
+      return super.getMessage();
+    }
+    StringBuffer expected = new StringBuffer();
+    int maxSize = 0;
+    for (int i = 0; i < expectedTokenSequences.length; i++) {
+      if (maxSize < expectedTokenSequences[i].length) {
+        maxSize = expectedTokenSequences[i].length;
+      }
+      for (int j = 0; j < expectedTokenSequences[i].length; j++) {
+        expected.append(tokenImage[expectedTokenSequences[i][j]]).append(" ");
+      }
+      if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
+        expected.append("...");
+      }
+      expected.append(eol).append("    ");
+    }
+    String retval = "Encountered \"";
+    Token tok = currentToken.next;
+    for (int i = 0; i < maxSize; i++) {
+      if (i != 0) retval += " ";
+      if (tok.kind == 0) {
+        retval += tokenImage[0];
+        break;
+      }
+      retval += add_escapes(tok.image);
+      tok = tok.next; 
+    }
+    retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
+    retval += "." + eol;
+    if (expectedTokenSequences.length == 1) {
+      retval += "Was expecting:" + eol + "    ";
+    } else {
+      retval += "Was expecting one of:" + eol + "    ";
+    }
+    retval += expected.toString();
+    return retval;
+  }
+
+  /**
+   * The end of line string for this machine.
+   */
+  protected String eol = System.getProperty("line.separator", "\n");
+ 
+  /**
+   * Used to convert raw characters to their escaped version
+   * when these raw version cannot be used as part of an ASCII
+   * string literal.
+   */
+  protected String add_escapes(String str) {
+    StringBuffer retval = new StringBuffer();
+    char ch;
+    for (int i = 0; i < str.length(); i++) {
+      switch (str.charAt(i))
+        {
+        case 0 :
+          continue;
+        case '\b':
+          retval.append("\\b");
+          continue;
+        case '\t':
+          retval.append("\\t");
+          continue;
+        case '\n':
+          retval.append("\\n");
+          continue;
+        case '\f':
+          retval.append("\\f");
+          continue;
+        case '\r':
+          retval.append("\\r");
+          continue;
+        case '\"':
+          retval.append("\\\"");
+          continue;
+        case '\'':
+          retval.append("\\\'");
+          continue;
+        case '\\':
+          retval.append("\\\\");
+          continue;
+        default:
+          if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
+            String s = "0000" + Integer.toString(ch, 16);
+            retval.append("\\u" + s.substring(s.length() - 4, s.length()));
+          } else {
+            retval.append(ch);
+          }
+          continue;
+        }
+    }
+    return retval.toString();
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/generated/Rcc.java b/src/java/org/apache/hadoop/record/compiler/generated/Rcc.java
new file mode 100644
index 00000000000..933710a683b
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/generated/Rcc.java
@@ -0,0 +1,535 @@
+/* Generated By:JavaCC: Do not edit this line. Rcc.java */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+import org.apache.hadoop.record.compiler.*;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+public class Rcc implements RccConstants {
+  private static String language = "java";
+  private static String destDir = ".";
+  private static ArrayList<String> recFiles = new ArrayList<String>();
+  private static ArrayList<String> cmdargs = new ArrayList<String>();
+  private static JFile curFile;
+  private static Hashtable<String,JRecord> recTab;
+  private static String curDir = ".";
+  private static String curFileName;
+  private static String curModuleName;
+
+  public static void main(String[] args) {
+    System.exit(driver(args));
+  }
+
+  public static void usage() {
+    System.err.println("Usage: rcc --language [java|c++] ddl-files");
+  }
+
+  public static int driver(String[] args) {
+    for (int i=0; i<args.length; i++) {
+      if ("-l".equalsIgnoreCase(args[i]) ||
+          "--language".equalsIgnoreCase(args[i])) {
+        language = args[i+1].toLowerCase();
+        i++;
+      } else if ("-d".equalsIgnoreCase(args[i]) ||
+                 "--destdir".equalsIgnoreCase(args[i])) {
+        destDir = args[i+1];
+        i++;
+      } else if (args[i].startsWith("-")) {
+        String arg = args[i].substring(1);
+        if (arg.startsWith("-")) {
+          arg = arg.substring(1);
+        }
+        cmdargs.add(arg.toLowerCase());
+      } else {
+        recFiles.add(args[i]);
+      }
+    }
+    if (recFiles.size() == 0) {
+      usage();
+      return 1;
+    }
+    for (int i=0; i<recFiles.size(); i++) {
+      curFileName = recFiles.get(i);
+      File file = new File(curFileName);
+      try {
+        FileReader reader = new FileReader(file);
+        Rcc parser = new Rcc(reader);
+        try {
+          recTab = new Hashtable<String,JRecord>();
+          curFile = parser.Input();
+        } catch (ParseException e) {
+          System.err.println(e.toString());
+          return 1;
+        }
+        try {
+          reader.close();
+        } catch (IOException e) {
+        }
+      } catch (FileNotFoundException e) {
+        System.err.println("File " + recFiles.get(i) +
+                           " Not found.");
+        return 1;
+      }
+      try {
+        int retCode = curFile.genCode(language, destDir, cmdargs);
+        if (retCode != 0) { return retCode; }
+      } catch (IOException e) {
+        System.err.println(e.toString());
+        return 1;
+      }
+    }
+    return 0;
+  }
+
+  final public JFile Input() throws ParseException {
+    ArrayList<JFile> ilist = new ArrayList<JFile>();
+    ArrayList<JRecord> rlist = new ArrayList<JRecord>();
+    JFile i;
+    ArrayList<JRecord> l;
+    label_1:
+    while (true) {
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case INCLUDE_TKN:
+        i = Include();
+        ilist.add(i);
+        break;
+      case MODULE_TKN:
+        l = Module();
+        rlist.addAll(l);
+        break;
+      default:
+        jj_la1[0] = jj_gen;
+        jj_consume_token(-1);
+        throw new ParseException();
+      }
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case MODULE_TKN:
+      case INCLUDE_TKN:
+        ;
+        break;
+      default:
+        jj_la1[1] = jj_gen;
+        break label_1;
+      }
+    }
+    jj_consume_token(0);
+    {if (true) return new JFile(curFileName, ilist, rlist);}
+    throw new Error("Missing return statement in function");
+  }
+
+  final public JFile Include() throws ParseException {
+    String fname;
+    Token t;
+    jj_consume_token(INCLUDE_TKN);
+    t = jj_consume_token(CSTRING_TKN);
+    JFile ret = null;
+    fname = t.image.replaceAll("^\"", "").replaceAll("\"$","");
+    File file = new File(curDir, fname);
+    String tmpDir = curDir;
+    String tmpFile = curFileName;
+    curDir = file.getParent();
+    curFileName = file.getName();
+    try {
+      FileReader reader = new FileReader(file);
+      Rcc parser = new Rcc(reader);
+      try {
+        ret = parser.Input();
+        System.out.println(fname + " Parsed Successfully");
+      } catch (ParseException e) {
+        System.out.println(e.toString());
+        System.exit(1);
+      }
+      try {
+        reader.close();
+      } catch (IOException e) {
+      }
+    } catch (FileNotFoundException e) {
+      System.out.println("File " + fname +
+                         " Not found.");
+      System.exit(1);
+    }
+    curDir = tmpDir;
+    curFileName = tmpFile;
+    {if (true) return ret;}
+    throw new Error("Missing return statement in function");
+  }
+
+  final public ArrayList<JRecord> Module() throws ParseException {
+    String mName;
+    ArrayList<JRecord> rlist;
+    jj_consume_token(MODULE_TKN);
+    mName = ModuleName();
+    curModuleName = mName;
+    jj_consume_token(LBRACE_TKN);
+    rlist = RecordList();
+    jj_consume_token(RBRACE_TKN);
+    {if (true) return rlist;}
+    throw new Error("Missing return statement in function");
+  }
+
+  final public String ModuleName() throws ParseException {
+    String name = "";
+    Token t;
+    t = jj_consume_token(IDENT_TKN);
+    name += t.image;
+    label_2:
+    while (true) {
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case DOT_TKN:
+        ;
+        break;
+      default:
+        jj_la1[2] = jj_gen;
+        break label_2;
+      }
+      jj_consume_token(DOT_TKN);
+      t = jj_consume_token(IDENT_TKN);
+      name += "." + t.image;
+    }
+    {if (true) return name;}
+    throw new Error("Missing return statement in function");
+  }
+
+  final public ArrayList<JRecord> RecordList() throws ParseException {
+    ArrayList<JRecord> rlist = new ArrayList<JRecord>();
+    JRecord r;
+    label_3:
+    while (true) {
+      r = Record();
+      rlist.add(r);
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case RECORD_TKN:
+        ;
+        break;
+      default:
+        jj_la1[3] = jj_gen;
+        break label_3;
+      }
+    }
+    {if (true) return rlist;}
+    throw new Error("Missing return statement in function");
+  }
+
+  final public JRecord Record() throws ParseException {
+    String rname;
+    ArrayList<JField<JType>> flist = new ArrayList<JField<JType>>();
+    Token t;
+    JField<JType> f;
+    jj_consume_token(RECORD_TKN);
+    t = jj_consume_token(IDENT_TKN);
+    rname = t.image;
+    jj_consume_token(LBRACE_TKN);
+    label_4:
+    while (true) {
+      f = Field();
+      flist.add(f);
+      jj_consume_token(SEMICOLON_TKN);
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case BYTE_TKN:
+      case BOOLEAN_TKN:
+      case INT_TKN:
+      case LONG_TKN:
+      case FLOAT_TKN:
+      case DOUBLE_TKN:
+      case USTRING_TKN:
+      case BUFFER_TKN:
+      case VECTOR_TKN:
+      case MAP_TKN:
+      case IDENT_TKN:
+        ;
+        break;
+      default:
+        jj_la1[4] = jj_gen;
+        break label_4;
+      }
+    }
+    jj_consume_token(RBRACE_TKN);
+    String fqn = curModuleName + "." + rname;
+    JRecord r = new JRecord(fqn, flist);
+    recTab.put(fqn, r);
+    {if (true) return r;}
+    throw new Error("Missing return statement in function");
+  }
+
+  final public JField<JType> Field() throws ParseException {
+    JType jt;
+    Token t;
+    jt = Type();
+    t = jj_consume_token(IDENT_TKN);
+    {if (true) return new JField<JType>(t.image, jt);}
+    throw new Error("Missing return statement in function");
+  }
+
+  final public JType Type() throws ParseException {
+    JType jt;
+    Token t;
+    String rname;
+    switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+    case MAP_TKN:
+      jt = Map();
+      {if (true) return jt;}
+      break;
+    case VECTOR_TKN:
+      jt = Vector();
+      {if (true) return jt;}
+      break;
+    case BYTE_TKN:
+      jj_consume_token(BYTE_TKN);
+      {if (true) return new JByte();}
+      break;
+    case BOOLEAN_TKN:
+      jj_consume_token(BOOLEAN_TKN);
+      {if (true) return new JBoolean();}
+      break;
+    case INT_TKN:
+      jj_consume_token(INT_TKN);
+      {if (true) return new JInt();}
+      break;
+    case LONG_TKN:
+      jj_consume_token(LONG_TKN);
+      {if (true) return new JLong();}
+      break;
+    case FLOAT_TKN:
+      jj_consume_token(FLOAT_TKN);
+      {if (true) return new JFloat();}
+      break;
+    case DOUBLE_TKN:
+      jj_consume_token(DOUBLE_TKN);
+      {if (true) return new JDouble();}
+      break;
+    case USTRING_TKN:
+      jj_consume_token(USTRING_TKN);
+      {if (true) return new JString();}
+      break;
+    case BUFFER_TKN:
+      jj_consume_token(BUFFER_TKN);
+      {if (true) return new JBuffer();}
+      break;
+    case IDENT_TKN:
+      rname = ModuleName();
+      if (rname.indexOf('.', 0) < 0) {
+        rname = curModuleName + "." + rname;
+      }
+      JRecord r = recTab.get(rname);
+      if (r == null) {
+        System.out.println("Type " + rname + " not known. Exiting.");
+        System.exit(1);
+      }
+      {if (true) return r;}
+      break;
+    default:
+      jj_la1[5] = jj_gen;
+      jj_consume_token(-1);
+      throw new ParseException();
+    }
+    throw new Error("Missing return statement in function");
+  }
+
+  final public JMap Map() throws ParseException {
+    JType jt1;
+    JType jt2;
+    jj_consume_token(MAP_TKN);
+    jj_consume_token(LT_TKN);
+    jt1 = Type();
+    jj_consume_token(COMMA_TKN);
+    jt2 = Type();
+    jj_consume_token(GT_TKN);
+    {if (true) return new JMap(jt1, jt2);}
+    throw new Error("Missing return statement in function");
+  }
+
+  final public JVector Vector() throws ParseException {
+    JType jt;
+    jj_consume_token(VECTOR_TKN);
+    jj_consume_token(LT_TKN);
+    jt = Type();
+    jj_consume_token(GT_TKN);
+    {if (true) return new JVector(jt);}
+    throw new Error("Missing return statement in function");
+  }
+
+  public RccTokenManager token_source;
+  SimpleCharStream jj_input_stream;
+  public Token token, jj_nt;
+  private int jj_ntk;
+  private int jj_gen;
+  final private int[] jj_la1 = new int[6];
+  static private int[] jj_la1_0;
+  static private int[] jj_la1_1;
+  static {
+    jj_la1_0();
+    jj_la1_1();
+  }
+  private static void jj_la1_0() {
+    jj_la1_0 = new int[] {0x2800, 0x2800, 0x40000000, 0x1000, 0xffc000, 0xffc000,};
+  }
+  private static void jj_la1_1() {
+    jj_la1_1 = new int[] {0x0, 0x0, 0x0, 0x0, 0x1, 0x1,};
+  }
+
+  public Rcc(java.io.InputStream stream) {
+    this(stream, null);
+  }
+  public Rcc(java.io.InputStream stream, String encoding) {
+    try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
+    token_source = new RccTokenManager(jj_input_stream);
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 6; i++) jj_la1[i] = -1;
+  }
+
+  public void ReInit(java.io.InputStream stream) {
+    ReInit(stream, null);
+  }
+  public void ReInit(java.io.InputStream stream, String encoding) {
+    try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
+    token_source.ReInit(jj_input_stream);
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 6; i++) jj_la1[i] = -1;
+  }
+
+  public Rcc(java.io.Reader stream) {
+    jj_input_stream = new SimpleCharStream(stream, 1, 1);
+    token_source = new RccTokenManager(jj_input_stream);
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 6; i++) jj_la1[i] = -1;
+  }
+
+  public void ReInit(java.io.Reader stream) {
+    jj_input_stream.ReInit(stream, 1, 1);
+    token_source.ReInit(jj_input_stream);
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 6; i++) jj_la1[i] = -1;
+  }
+
+  public Rcc(RccTokenManager tm) {
+    token_source = tm;
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 6; i++) jj_la1[i] = -1;
+  }
+
+  public void ReInit(RccTokenManager tm) {
+    token_source = tm;
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 6; i++) jj_la1[i] = -1;
+  }
+
+  final private Token jj_consume_token(int kind) throws ParseException {
+    Token oldToken;
+    if ((oldToken = token).next != null) token = token.next;
+    else token = token.next = token_source.getNextToken();
+    jj_ntk = -1;
+    if (token.kind == kind) {
+      jj_gen++;
+      return token;
+    }
+    token = oldToken;
+    jj_kind = kind;
+    throw generateParseException();
+  }
+
+  final public Token getNextToken() {
+    if (token.next != null) token = token.next;
+    else token = token.next = token_source.getNextToken();
+    jj_ntk = -1;
+    jj_gen++;
+    return token;
+  }
+
+  final public Token getToken(int index) {
+    Token t = token;
+    for (int i = 0; i < index; i++) {
+      if (t.next != null) t = t.next;
+      else t = t.next = token_source.getNextToken();
+    }
+    return t;
+  }
+
+  final private int jj_ntk() {
+    if ((jj_nt=token.next) == null)
+      return (jj_ntk = (token.next=token_source.getNextToken()).kind);
+    else
+      return (jj_ntk = jj_nt.kind);
+  }
+
+  private java.util.Vector<int[]> jj_expentries = new java.util.Vector<int[]>();
+  private int[] jj_expentry;
+  private int jj_kind = -1;
+
+  public ParseException generateParseException() {
+    jj_expentries.removeAllElements();
+    boolean[] la1tokens = new boolean[33];
+    for (int i = 0; i < 33; i++) {
+      la1tokens[i] = false;
+    }
+    if (jj_kind >= 0) {
+      la1tokens[jj_kind] = true;
+      jj_kind = -1;
+    }
+    for (int i = 0; i < 6; i++) {
+      if (jj_la1[i] == jj_gen) {
+        for (int j = 0; j < 32; j++) {
+          if ((jj_la1_0[i] & (1<<j)) != 0) {
+            la1tokens[j] = true;
+          }
+          if ((jj_la1_1[i] & (1<<j)) != 0) {
+            la1tokens[32+j] = true;
+          }
+        }
+      }
+    }
+    for (int i = 0; i < 33; i++) {
+      if (la1tokens[i]) {
+        jj_expentry = new int[1];
+        jj_expentry[0] = i;
+        jj_expentries.addElement(jj_expentry);
+      }
+    }
+    int[][] exptokseq = new int[jj_expentries.size()][];
+    for (int i = 0; i < jj_expentries.size(); i++) {
+      exptokseq[i] = jj_expentries.elementAt(i);
+    }
+    return new ParseException(token, exptokseq, tokenImage);
+  }
+
+  final public void enable_tracing() {
+  }
+
+  final public void disable_tracing() {
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/generated/RccConstants.java b/src/java/org/apache/hadoop/record/compiler/generated/RccConstants.java
new file mode 100644
index 00000000000..f50d2f9a178
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/generated/RccConstants.java
@@ -0,0 +1,88 @@
+/* Generated By:JavaCC: Do not edit this line. RccConstants.java */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+public interface RccConstants {
+
+  int EOF = 0;
+  int MODULE_TKN = 11;
+  int RECORD_TKN = 12;
+  int INCLUDE_TKN = 13;
+  int BYTE_TKN = 14;
+  int BOOLEAN_TKN = 15;
+  int INT_TKN = 16;
+  int LONG_TKN = 17;
+  int FLOAT_TKN = 18;
+  int DOUBLE_TKN = 19;
+  int USTRING_TKN = 20;
+  int BUFFER_TKN = 21;
+  int VECTOR_TKN = 22;
+  int MAP_TKN = 23;
+  int LBRACE_TKN = 24;
+  int RBRACE_TKN = 25;
+  int LT_TKN = 26;
+  int GT_TKN = 27;
+  int SEMICOLON_TKN = 28;
+  int COMMA_TKN = 29;
+  int DOT_TKN = 30;
+  int CSTRING_TKN = 31;
+  int IDENT_TKN = 32;
+
+  int DEFAULT = 0;
+  int WithinOneLineComment = 1;
+  int WithinMultiLineComment = 2;
+
+  String[] tokenImage = {
+    "<EOF>",
+    "\" \"",
+    "\"\\t\"",
+    "\"\\n\"",
+    "\"\\r\"",
+    "\"//\"",
+    "<token of kind 6>",
+    "<token of kind 7>",
+    "\"/*\"",
+    "\"*/\"",
+    "<token of kind 10>",
+    "\"module\"",
+    "\"class\"",
+    "\"include\"",
+    "\"byte\"",
+    "\"boolean\"",
+    "\"int\"",
+    "\"long\"",
+    "\"float\"",
+    "\"double\"",
+    "\"ustring\"",
+    "\"buffer\"",
+    "\"vector\"",
+    "\"map\"",
+    "\"{\"",
+    "\"}\"",
+    "\"<\"",
+    "\">\"",
+    "\";\"",
+    "\",\"",
+    "\".\"",
+    "<CSTRING_TKN>",
+    "<IDENT_TKN>",
+  };
+
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java b/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java
new file mode 100644
index 00000000000..42b04733ebf
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java
@@ -0,0 +1,833 @@
+/* Generated By:JavaCC: Do not edit this line. RccTokenManager.java */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+import org.apache.hadoop.record.compiler.*;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+public class RccTokenManager implements RccConstants
+{
+  public  java.io.PrintStream debugStream = System.out;
+  public  void setDebugStream(java.io.PrintStream ds) { debugStream = ds; }
+  private final int jjMoveStringLiteralDfa0_1()
+  {
+    return jjMoveNfa_1(0, 0);
+  }
+  private final void jjCheckNAdd(int state)
+  {
+    if (jjrounds[state] != jjround)
+      {
+        jjstateSet[jjnewStateCnt++] = state;
+        jjrounds[state] = jjround;
+      }
+  }
+  private final void jjAddStates(int start, int end)
+  {
+    do {
+      jjstateSet[jjnewStateCnt++] = jjnextStates[start];
+    } while (start++ != end);
+  }
+  private final void jjCheckNAddTwoStates(int state1, int state2)
+  {
+    jjCheckNAdd(state1);
+    jjCheckNAdd(state2);
+  }
+  private final void jjCheckNAddStates(int start, int end)
+  {
+    do {
+      jjCheckNAdd(jjnextStates[start]);
+    } while (start++ != end);
+  }
+  private final void jjCheckNAddStates(int start)
+  {
+    jjCheckNAdd(jjnextStates[start]);
+    jjCheckNAdd(jjnextStates[start + 1]);
+  }
+  private final int jjMoveNfa_1(int startState, int curPos)
+  {
+    int[] nextStates;
+    int startsAt = 0;
+    jjnewStateCnt = 3;
+    int i = 1;
+    jjstateSet[0] = startState;
+    int j, kind = 0x7fffffff;
+    for (;;)
+      {
+        if (++jjround == 0x7fffffff)
+          ReInitRounds();
+        if (curChar < 64)
+          {
+            long l = 1L << curChar;
+            MatchLoop: do
+              {
+                switch(jjstateSet[--i])
+                  {
+                  case 0:
+                    if ((0x2400L & l) != 0L)
+                      {
+                        if (kind > 6)
+                          kind = 6;
+                      }
+                    if (curChar == 13)
+                      jjstateSet[jjnewStateCnt++] = 1;
+                    break;
+                  case 1:
+                    if (curChar == 10 && kind > 6)
+                      kind = 6;
+                    break;
+                  case 2:
+                    if (curChar == 13)
+                      jjstateSet[jjnewStateCnt++] = 1;
+                    break;
+                  default : break;
+                  }
+              } while(i != startsAt);
+          }
+        else if (curChar < 128)
+          {
+            long l = 1L << (curChar & 077);
+            MatchLoop: do
+              {
+                switch(jjstateSet[--i])
+                  {
+                  default : break;
+                  }
+              } while(i != startsAt);
+          }
+        else
+          {
+            int i2 = (curChar & 0xff) >> 6;
+            long l2 = 1L << (curChar & 077);
+            MatchLoop: do
+              {
+                switch(jjstateSet[--i])
+                  {
+                  default : break;
+                  }
+              } while(i != startsAt);
+          }
+        if (kind != 0x7fffffff)
+          {
+            jjmatchedKind = kind;
+            jjmatchedPos = curPos;
+            kind = 0x7fffffff;
+          }
+        ++curPos;
+        if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt)))
+          return curPos;
+        try { curChar = input_stream.readChar(); }
+        catch(java.io.IOException e) { return curPos; }
+      }
+  }
+  private final int jjStopStringLiteralDfa_0(int pos, long active0)
+  {
+    switch (pos)
+      {
+      case 0:
+        if ((active0 & 0xfff800L) != 0L)
+          {
+            jjmatchedKind = 32;
+            return 4;
+          }
+        return -1;
+      case 1:
+        if ((active0 & 0xfff800L) != 0L)
+          {
+            jjmatchedKind = 32;
+            jjmatchedPos = 1;
+            return 4;
+          }
+        return -1;
+      case 2:
+        if ((active0 & 0x7ef800L) != 0L)
+          {
+            jjmatchedKind = 32;
+            jjmatchedPos = 2;
+            return 4;
+          }
+        if ((active0 & 0x810000L) != 0L)
+          return 4;
+        return -1;
+      case 3:
+        if ((active0 & 0x24000L) != 0L)
+          return 4;
+        if ((active0 & 0x7cb800L) != 0L)
+          {
+            jjmatchedKind = 32;
+            jjmatchedPos = 3;
+            return 4;
+          }
+        return -1;
+      case 4:
+        if ((active0 & 0x41000L) != 0L)
+          return 4;
+        if ((active0 & 0x78a800L) != 0L)
+          {
+            jjmatchedKind = 32;
+            jjmatchedPos = 4;
+            return 4;
+          }
+        return -1;
+      case 5:
+        if ((active0 & 0x680800L) != 0L)
+          return 4;
+        if ((active0 & 0x10a000L) != 0L)
+          {
+            jjmatchedKind = 32;
+            jjmatchedPos = 5;
+            return 4;
+          }
+        return -1;
+      default :
+        return -1;
+      }
+  }
+  private final int jjStartNfa_0(int pos, long active0)
+  {
+    return jjMoveNfa_0(jjStopStringLiteralDfa_0(pos, active0), pos + 1);
+  }
+  private final int jjStopAtPos(int pos, int kind)
+  {
+    jjmatchedKind = kind;
+    jjmatchedPos = pos;
+    return pos + 1;
+  }
+  private final int jjStartNfaWithStates_0(int pos, int kind, int state)
+  {
+    jjmatchedKind = kind;
+    jjmatchedPos = pos;
+    try { curChar = input_stream.readChar(); }
+    catch(java.io.IOException e) { return pos + 1; }
+    return jjMoveNfa_0(state, pos + 1);
+  }
+  private final int jjMoveStringLiteralDfa0_0()
+  {
+    switch(curChar)
+      {
+      case 44:
+        return jjStopAtPos(0, 29);
+      case 46:
+        return jjStopAtPos(0, 30);
+      case 47:
+        return jjMoveStringLiteralDfa1_0(0x120L);
+      case 59:
+        return jjStopAtPos(0, 28);
+      case 60:
+        return jjStopAtPos(0, 26);
+      case 62:
+        return jjStopAtPos(0, 27);
+      case 98:
+        return jjMoveStringLiteralDfa1_0(0x20c000L);
+      case 99:
+        return jjMoveStringLiteralDfa1_0(0x1000L);
+      case 100:
+        return jjMoveStringLiteralDfa1_0(0x80000L);
+      case 102:
+        return jjMoveStringLiteralDfa1_0(0x40000L);
+      case 105:
+        return jjMoveStringLiteralDfa1_0(0x12000L);
+      case 108:
+        return jjMoveStringLiteralDfa1_0(0x20000L);
+      case 109:
+        return jjMoveStringLiteralDfa1_0(0x800800L);
+      case 117:
+        return jjMoveStringLiteralDfa1_0(0x100000L);
+      case 118:
+        return jjMoveStringLiteralDfa1_0(0x400000L);
+      case 123:
+        return jjStopAtPos(0, 24);
+      case 125:
+        return jjStopAtPos(0, 25);
+      default :
+        return jjMoveNfa_0(0, 0);
+      }
+  }
+  private final int jjMoveStringLiteralDfa1_0(long active0)
+  {
+    try { curChar = input_stream.readChar(); }
+    catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(0, active0);
+      return 1;
+    }
+    switch(curChar)
+      {
+      case 42:
+        if ((active0 & 0x100L) != 0L)
+          return jjStopAtPos(1, 8);
+        break;
+      case 47:
+        if ((active0 & 0x20L) != 0L)
+          return jjStopAtPos(1, 5);
+        break;
+      case 97:
+        return jjMoveStringLiteralDfa2_0(active0, 0x800000L);
+      case 101:
+        return jjMoveStringLiteralDfa2_0(active0, 0x400000L);
+      case 108:
+        return jjMoveStringLiteralDfa2_0(active0, 0x41000L);
+      case 110:
+        return jjMoveStringLiteralDfa2_0(active0, 0x12000L);
+      case 111:
+        return jjMoveStringLiteralDfa2_0(active0, 0xa8800L);
+      case 115:
+        return jjMoveStringLiteralDfa2_0(active0, 0x100000L);
+      case 117:
+        return jjMoveStringLiteralDfa2_0(active0, 0x200000L);
+      case 121:
+        return jjMoveStringLiteralDfa2_0(active0, 0x4000L);
+      default :
+        break;
+      }
+    return jjStartNfa_0(0, active0);
+  }
+  private final int jjMoveStringLiteralDfa2_0(long old0, long active0)
+  {
+    if (((active0 &= old0)) == 0L)
+      return jjStartNfa_0(0, old0); 
+    try { curChar = input_stream.readChar(); }
+    catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(1, active0);
+      return 2;
+    }
+    switch(curChar)
+      {
+      case 97:
+        return jjMoveStringLiteralDfa3_0(active0, 0x1000L);
+      case 99:
+        return jjMoveStringLiteralDfa3_0(active0, 0x402000L);
+      case 100:
+        return jjMoveStringLiteralDfa3_0(active0, 0x800L);
+      case 102:
+        return jjMoveStringLiteralDfa3_0(active0, 0x200000L);
+      case 110:
+        return jjMoveStringLiteralDfa3_0(active0, 0x20000L);
+      case 111:
+        return jjMoveStringLiteralDfa3_0(active0, 0x48000L);
+      case 112:
+        if ((active0 & 0x800000L) != 0L)
+          return jjStartNfaWithStates_0(2, 23, 4);
+        break;
+      case 116:
+        if ((active0 & 0x10000L) != 0L)
+          return jjStartNfaWithStates_0(2, 16, 4);
+        return jjMoveStringLiteralDfa3_0(active0, 0x104000L);
+      case 117:
+        return jjMoveStringLiteralDfa3_0(active0, 0x80000L);
+      default :
+        break;
+      }
+    return jjStartNfa_0(1, active0);
+  }
+  private final int jjMoveStringLiteralDfa3_0(long old0, long active0)
+  {
+    if (((active0 &= old0)) == 0L)
+      return jjStartNfa_0(1, old0); 
+    try { curChar = input_stream.readChar(); }
+    catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(2, active0);
+      return 3;
+    }
+    switch(curChar)
+      {
+      case 97:
+        return jjMoveStringLiteralDfa4_0(active0, 0x40000L);
+      case 98:
+        return jjMoveStringLiteralDfa4_0(active0, 0x80000L);
+      case 101:
+        if ((active0 & 0x4000L) != 0L)
+          return jjStartNfaWithStates_0(3, 14, 4);
+        break;
+      case 102:
+        return jjMoveStringLiteralDfa4_0(active0, 0x200000L);
+      case 103:
+        if ((active0 & 0x20000L) != 0L)
+          return jjStartNfaWithStates_0(3, 17, 4);
+        break;
+      case 108:
+        return jjMoveStringLiteralDfa4_0(active0, 0xa000L);
+      case 114:
+        return jjMoveStringLiteralDfa4_0(active0, 0x100000L);
+      case 115:
+        return jjMoveStringLiteralDfa4_0(active0, 0x1000L);
+      case 116:
+        return jjMoveStringLiteralDfa4_0(active0, 0x400000L);
+      case 117:
+        return jjMoveStringLiteralDfa4_0(active0, 0x800L);
+      default :
+        break;
+      }
+    return jjStartNfa_0(2, active0);
+  }
+  private final int jjMoveStringLiteralDfa4_0(long old0, long active0)
+  {
+    if (((active0 &= old0)) == 0L)
+      return jjStartNfa_0(2, old0); 
+    try { curChar = input_stream.readChar(); }
+    catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(3, active0);
+      return 4;
+    }
+    switch(curChar)
+      {
+      case 101:
+        return jjMoveStringLiteralDfa5_0(active0, 0x208000L);
+      case 105:
+        return jjMoveStringLiteralDfa5_0(active0, 0x100000L);
+      case 108:
+        return jjMoveStringLiteralDfa5_0(active0, 0x80800L);
+      case 111:
+        return jjMoveStringLiteralDfa5_0(active0, 0x400000L);
+      case 115:
+        if ((active0 & 0x1000L) != 0L)
+          return jjStartNfaWithStates_0(4, 12, 4);
+        break;
+      case 116:
+        if ((active0 & 0x40000L) != 0L)
+          return jjStartNfaWithStates_0(4, 18, 4);
+        break;
+      case 117:
+        return jjMoveStringLiteralDfa5_0(active0, 0x2000L);
+      default :
+        break;
+      }
+    return jjStartNfa_0(3, active0);
+  }
+  private final int jjMoveStringLiteralDfa5_0(long old0, long active0)
+  {
+    if (((active0 &= old0)) == 0L)
+      return jjStartNfa_0(3, old0); 
+    try { curChar = input_stream.readChar(); }
+    catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(4, active0);
+      return 5;
+    }
+    switch(curChar)
+      {
+      case 97:
+        return jjMoveStringLiteralDfa6_0(active0, 0x8000L);
+      case 100:
+        return jjMoveStringLiteralDfa6_0(active0, 0x2000L);
+      case 101:
+        if ((active0 & 0x800L) != 0L)
+          return jjStartNfaWithStates_0(5, 11, 4);
+        else if ((active0 & 0x80000L) != 0L)
+          return jjStartNfaWithStates_0(5, 19, 4);
+        break;
+      case 110:
+        return jjMoveStringLiteralDfa6_0(active0, 0x100000L);
+      case 114:
+        if ((active0 & 0x200000L) != 0L)
+          return jjStartNfaWithStates_0(5, 21, 4);
+        else if ((active0 & 0x400000L) != 0L)
+          return jjStartNfaWithStates_0(5, 22, 4);
+        break;
+      default :
+        break;
+      }
+    return jjStartNfa_0(4, active0);
+  }
+  private final int jjMoveStringLiteralDfa6_0(long old0, long active0)
+  {
+    if (((active0 &= old0)) == 0L)
+      return jjStartNfa_0(4, old0); 
+    try { curChar = input_stream.readChar(); }
+    catch(java.io.IOException e) {
+      jjStopStringLiteralDfa_0(5, active0);
+      return 6;
+    }
+    switch(curChar)
+      {
+      case 101:
+        if ((active0 & 0x2000L) != 0L)
+          return jjStartNfaWithStates_0(6, 13, 4);
+        break;
+      case 103:
+        if ((active0 & 0x100000L) != 0L)
+          return jjStartNfaWithStates_0(6, 20, 4);
+        break;
+      case 110:
+        if ((active0 & 0x8000L) != 0L)
+          return jjStartNfaWithStates_0(6, 15, 4);
+        break;
+      default :
+        break;
+      }
+    return jjStartNfa_0(5, active0);
+  }
+  static final long[] jjbitVec0 = {
+    0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
+  };
+  private final int jjMoveNfa_0(int startState, int curPos)
+  {
+    int[] nextStates;
+    int startsAt = 0;
+    jjnewStateCnt = 5;
+    int i = 1;
+    jjstateSet[0] = startState;
+    int j, kind = 0x7fffffff;
+    for (;;)
+      {
+        if (++jjround == 0x7fffffff)
+          ReInitRounds();
+        if (curChar < 64)
+          {
+            long l = 1L << curChar;
+            MatchLoop: do
+              {
+                switch(jjstateSet[--i])
+                  {
+                  case 0:
+                    if (curChar == 34)
+                      jjCheckNAdd(1);
+                    break;
+                  case 1:
+                    if ((0xfffffffbffffffffL & l) != 0L)
+                      jjCheckNAddTwoStates(1, 2);
+                    break;
+                  case 2:
+                    if (curChar == 34 && kind > 31)
+                      kind = 31;
+                    break;
+                  case 4:
+                    if ((0x3ff000000000000L & l) == 0L)
+                      break;
+                    if (kind > 32)
+                      kind = 32;
+                    jjstateSet[jjnewStateCnt++] = 4;
+                    break;
+                  default : break;
+                  }
+              } while(i != startsAt);
+          }
+        else if (curChar < 128)
+          {
+            long l = 1L << (curChar & 077);
+            MatchLoop: do
+              {
+                switch(jjstateSet[--i])
+                  {
+                  case 0:
+                    if ((0x7fffffe07fffffeL & l) == 0L)
+                      break;
+                    if (kind > 32)
+                      kind = 32;
+                    jjCheckNAdd(4);
+                    break;
+                  case 1:
+                    jjAddStates(0, 1);
+                    break;
+                  case 4:
+                    if ((0x7fffffe87fffffeL & l) == 0L)
+                      break;
+                    if (kind > 32)
+                      kind = 32;
+                    jjCheckNAdd(4);
+                    break;
+                  default : break;
+                  }
+              } while(i != startsAt);
+          }
+        else
+          {
+            int i2 = (curChar & 0xff) >> 6;
+            long l2 = 1L << (curChar & 077);
+            MatchLoop: do
+              {
+                switch(jjstateSet[--i])
+                  {
+                  case 1:
+                    if ((jjbitVec0[i2] & l2) != 0L)
+                      jjAddStates(0, 1);
+                    break;
+                  default : break;
+                  }
+              } while(i != startsAt);
+          }
+        if (kind != 0x7fffffff)
+          {
+            jjmatchedKind = kind;
+            jjmatchedPos = curPos;
+            kind = 0x7fffffff;
+          }
+        ++curPos;
+        if ((i = jjnewStateCnt) == (startsAt = 5 - (jjnewStateCnt = startsAt)))
+          return curPos;
+        try { curChar = input_stream.readChar(); }
+        catch(java.io.IOException e) { return curPos; }
+      }
+  }
+  private final int jjMoveStringLiteralDfa0_2()
+  {
+    switch(curChar)
+      {
+      case 42:
+        return jjMoveStringLiteralDfa1_2(0x200L);
+      default :
+        return 1;
+      }
+  }
+  private final int jjMoveStringLiteralDfa1_2(long active0)
+  {
+    try { curChar = input_stream.readChar(); }
+    catch(java.io.IOException e) {
+      return 1;
+    }
+    switch(curChar)
+      {
+      case 47:
+        if ((active0 & 0x200L) != 0L)
+          return jjStopAtPos(1, 9);
+        break;
+      default :
+        return 2;
+      }
+    return 2;
+  }
+  static final int[] jjnextStates = {
+    1, 2, 
+  };
+  public static final String[] jjstrLiteralImages = {
+    "", null, null, null, null, null, null, null, null, null, null, 
+    "\155\157\144\165\154\145", "\143\154\141\163\163", "\151\156\143\154\165\144\145", "\142\171\164\145", 
+    "\142\157\157\154\145\141\156", "\151\156\164", "\154\157\156\147", "\146\154\157\141\164", 
+    "\144\157\165\142\154\145", "\165\163\164\162\151\156\147", "\142\165\146\146\145\162", 
+    "\166\145\143\164\157\162", "\155\141\160", "\173", "\175", "\74", "\76", "\73", "\54", "\56", null, null, };
+  public static final String[] lexStateNames = {
+    "DEFAULT", 
+    "WithinOneLineComment", 
+    "WithinMultiLineComment", 
+  };
+  public static final int[] jjnewLexState = {
+    -1, -1, -1, -1, -1, 1, 0, -1, 2, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
+    -1, -1, -1, -1, -1, -1, -1, -1, 
+  };
+  static final long[] jjtoToken = {
+    0x1fffff801L, 
+  };
+  static final long[] jjtoSkip = {
+    0x37eL, 
+  };
+  static final long[] jjtoSpecial = {
+    0x360L, 
+  };
+  static final long[] jjtoMore = {
+    0x480L, 
+  };
+  protected SimpleCharStream input_stream;
+  private final int[] jjrounds = new int[5];
+  private final int[] jjstateSet = new int[10];
+  StringBuffer image;
+  int jjimageLen;
+  int lengthOfMatch;
+  protected char curChar;
+  public RccTokenManager(SimpleCharStream stream){
+    if (SimpleCharStream.staticFlag)
+      throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
+    input_stream = stream;
+  }
+  public RccTokenManager(SimpleCharStream stream, int lexState){
+    this(stream);
+    SwitchTo(lexState);
+  }
+  public void ReInit(SimpleCharStream stream)
+  {
+    jjmatchedPos = jjnewStateCnt = 0;
+    curLexState = defaultLexState;
+    input_stream = stream;
+    ReInitRounds();
+  }
+  private final void ReInitRounds()
+  {
+    int i;
+    jjround = 0x80000001;
+    for (i = 5; i-- > 0;)
+      jjrounds[i] = 0x80000000;
+  }
+  public void ReInit(SimpleCharStream stream, int lexState)
+  {
+    ReInit(stream);
+    SwitchTo(lexState);
+  }
+  public void SwitchTo(int lexState)
+  {
+    if (lexState >= 3 || lexState < 0)
+      throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE);
+    else
+      curLexState = lexState;
+  }
+
+  protected Token jjFillToken()
+  {
+    Token t = Token.newToken(jjmatchedKind);
+    t.kind = jjmatchedKind;
+    String im = jjstrLiteralImages[jjmatchedKind];
+    t.image = (im == null) ? input_stream.GetImage() : im;
+    t.beginLine = input_stream.getBeginLine();
+    t.beginColumn = input_stream.getBeginColumn();
+    t.endLine = input_stream.getEndLine();
+    t.endColumn = input_stream.getEndColumn();
+    return t;
+  }
+
+  int curLexState = 0;
+  int defaultLexState = 0;
+  int jjnewStateCnt;
+  int jjround;
+  int jjmatchedPos;
+  int jjmatchedKind;
+
+  public Token getNextToken() 
+  {
+    int kind;
+    Token specialToken = null;
+    Token matchedToken;
+    int curPos = 0;
+
+    EOFLoop :
+      for (;;)
+        {   
+          try   
+            {     
+              curChar = input_stream.BeginToken();
+            }     
+          catch(java.io.IOException e)
+            {        
+              jjmatchedKind = 0;
+              matchedToken = jjFillToken();
+              matchedToken.specialToken = specialToken;
+              return matchedToken;
+            }
+          image = null;
+          jjimageLen = 0;
+
+          for (;;)
+            {
+              switch(curLexState)
+                {
+                case 0:
+                  try { input_stream.backup(0);
+                  while (curChar <= 32 && (0x100002600L & (1L << curChar)) != 0L)
+                    curChar = input_stream.BeginToken();
+                  }
+                  catch (java.io.IOException e1) { continue EOFLoop; }
+                  jjmatchedKind = 0x7fffffff;
+                  jjmatchedPos = 0;
+                  curPos = jjMoveStringLiteralDfa0_0();
+                  break;
+                case 1:
+                  jjmatchedKind = 0x7fffffff;
+                  jjmatchedPos = 0;
+                  curPos = jjMoveStringLiteralDfa0_1();
+                  if (jjmatchedPos == 0 && jjmatchedKind > 7)
+                    {
+                      jjmatchedKind = 7;
+                    }
+                  break;
+                case 2:
+                  jjmatchedKind = 0x7fffffff;
+                  jjmatchedPos = 0;
+                  curPos = jjMoveStringLiteralDfa0_2();
+                  if (jjmatchedPos == 0 && jjmatchedKind > 10)
+                    {
+                      jjmatchedKind = 10;
+                    }
+                  break;
+                }
+              if (jjmatchedKind != 0x7fffffff)
+                {
+                  if (jjmatchedPos + 1 < curPos)
+                    input_stream.backup(curPos - jjmatchedPos - 1);
+                  if ((jjtoToken[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L)
+                    {
+                      matchedToken = jjFillToken();
+                      matchedToken.specialToken = specialToken;
+                      if (jjnewLexState[jjmatchedKind] != -1)
+                        curLexState = jjnewLexState[jjmatchedKind];
+                      return matchedToken;
+                    }
+                  else if ((jjtoSkip[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L)
+                    {
+                      if ((jjtoSpecial[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L)
+                        {
+                          matchedToken = jjFillToken();
+                          if (specialToken == null)
+                            specialToken = matchedToken;
+                          else
+                            {
+                              matchedToken.specialToken = specialToken;
+                              specialToken = (specialToken.next = matchedToken);
+                            }
+                          SkipLexicalActions(matchedToken);
+                        }
+                      else 
+                        SkipLexicalActions(null);
+                      if (jjnewLexState[jjmatchedKind] != -1)
+                        curLexState = jjnewLexState[jjmatchedKind];
+                      continue EOFLoop;
+                    }
+                  jjimageLen += jjmatchedPos + 1;
+                  if (jjnewLexState[jjmatchedKind] != -1)
+                    curLexState = jjnewLexState[jjmatchedKind];
+                  curPos = 0;
+                  jjmatchedKind = 0x7fffffff;
+                  try {
+                    curChar = input_stream.readChar();
+                    continue;
+                  }
+                  catch (java.io.IOException e1) { }
+                }
+              int error_line = input_stream.getEndLine();
+              int error_column = input_stream.getEndColumn();
+              String error_after = null;
+              boolean EOFSeen = false;
+              try { input_stream.readChar(); input_stream.backup(1); }
+              catch (java.io.IOException e1) {
+                EOFSeen = true;
+                error_after = curPos <= 1 ? "" : input_stream.GetImage();
+                if (curChar == '\n' || curChar == '\r') {
+                  error_line++;
+                  error_column = 0;
+                }
+                else
+                  error_column++;
+              }
+              if (!EOFSeen) {
+                input_stream.backup(1);
+                error_after = curPos <= 1 ? "" : input_stream.GetImage();
+              }
+              throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR);
+            }
+        }
+  }
+
+  void SkipLexicalActions(Token matchedToken)
+  {
+    switch(jjmatchedKind)
+      {
+      default :
+        break;
+      }
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java b/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java
new file mode 100644
index 00000000000..364d708e462
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java
@@ -0,0 +1,439 @@
+/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.0 */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+/**
+ * An implementation of interface CharStream, where the stream is assumed to
+ * contain only ASCII characters (without unicode processing).
+ */
+
+public class SimpleCharStream
+{
+  public static final boolean staticFlag = false;
+  int bufsize;
+  int available;
+  int tokenBegin;
+  public int bufpos = -1;
+  protected int bufline[];
+  protected int bufcolumn[];
+
+  protected int column = 0;
+  protected int line = 1;
+
+  protected boolean prevCharIsCR = false;
+  protected boolean prevCharIsLF = false;
+
+  protected java.io.Reader inputStream;
+
+  protected char[] buffer;
+  protected int maxNextCharInd = 0;
+  protected int inBuf = 0;
+  protected int tabSize = 8;
+
+  protected void setTabSize(int i) { tabSize = i; }
+  protected int getTabSize(int i) { return tabSize; }
+
+
+  protected void ExpandBuff(boolean wrapAround)
+  {
+    char[] newbuffer = new char[bufsize + 2048];
+    int newbufline[] = new int[bufsize + 2048];
+    int newbufcolumn[] = new int[bufsize + 2048];
+
+    try
+      {
+        if (wrapAround)
+          {
+            System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
+            System.arraycopy(buffer, 0, newbuffer,
+                             bufsize - tokenBegin, bufpos);
+            buffer = newbuffer;
+
+            System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
+            System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
+            bufline = newbufline;
+
+            System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
+            System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
+            bufcolumn = newbufcolumn;
+
+            maxNextCharInd = (bufpos += (bufsize - tokenBegin));
+          }
+        else
+          {
+            System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
+            buffer = newbuffer;
+
+            System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
+            bufline = newbufline;
+
+            System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
+            bufcolumn = newbufcolumn;
+
+            maxNextCharInd = (bufpos -= tokenBegin);
+          }
+      }
+    catch (Throwable t)
+      {
+        throw new Error(t.getMessage());
+      }
+
+
+    bufsize += 2048;
+    available = bufsize;
+    tokenBegin = 0;
+  }
+
+  protected void FillBuff() throws java.io.IOException
+  {
+    if (maxNextCharInd == available)
+      {
+        if (available == bufsize)
+          {
+            if (tokenBegin > 2048)
+              {
+                bufpos = maxNextCharInd = 0;
+                available = tokenBegin;
+              }
+            else if (tokenBegin < 0)
+              bufpos = maxNextCharInd = 0;
+            else
+              ExpandBuff(false);
+          }
+        else if (available > tokenBegin)
+          available = bufsize;
+        else if ((tokenBegin - available) < 2048)
+          ExpandBuff(true);
+        else
+          available = tokenBegin;
+      }
+
+    int i;
+    try {
+      if ((i = inputStream.read(buffer, maxNextCharInd,
+                                available - maxNextCharInd)) == -1)
+        {
+          inputStream.close();
+          throw new java.io.IOException();
+        }
+      else
+        maxNextCharInd += i;
+      return;
+    }
+    catch(java.io.IOException e) {
+      --bufpos;
+      backup(0);
+      if (tokenBegin == -1)
+        tokenBegin = bufpos;
+      throw e;
+    }
+  }
+
+  public char BeginToken() throws java.io.IOException
+  {
+    tokenBegin = -1;
+    char c = readChar();
+    tokenBegin = bufpos;
+
+    return c;
+  }
+
+  protected void UpdateLineColumn(char c)
+  {
+    column++;
+
+    if (prevCharIsLF)
+      {
+        prevCharIsLF = false;
+        line += (column = 1);
+      }
+    else if (prevCharIsCR)
+      {
+        prevCharIsCR = false;
+        if (c == '\n')
+          {
+            prevCharIsLF = true;
+          }
+        else
+          line += (column = 1);
+      }
+
+    switch (c)
+      {
+      case '\r' :
+        prevCharIsCR = true;
+        break;
+      case '\n' :
+        prevCharIsLF = true;
+        break;
+      case '\t' :
+        column--;
+        column += (tabSize - (column % tabSize));
+        break;
+      default :
+        break;
+      }
+
+    bufline[bufpos] = line;
+    bufcolumn[bufpos] = column;
+  }
+
+  public char readChar() throws java.io.IOException
+  {
+    if (inBuf > 0)
+      {
+        --inBuf;
+
+        if (++bufpos == bufsize)
+          bufpos = 0;
+
+        return buffer[bufpos];
+      }
+
+    if (++bufpos >= maxNextCharInd)
+      FillBuff();
+
+    char c = buffer[bufpos];
+
+    UpdateLineColumn(c);
+    return (c);
+  }
+
+  public int getEndColumn() {
+    return bufcolumn[bufpos];
+  }
+
+  public int getEndLine() {
+    return bufline[bufpos];
+  }
+
+  public int getBeginColumn() {
+    return bufcolumn[tokenBegin];
+  }
+
+  public int getBeginLine() {
+    return bufline[tokenBegin];
+  }
+
+  public void backup(int amount) {
+
+    inBuf += amount;
+    if ((bufpos -= amount) < 0)
+      bufpos += bufsize;
+  }
+
+  public SimpleCharStream(java.io.Reader dstream, int startline,
+                          int startcolumn, int buffersize)
+  {
+    inputStream = dstream;
+    line = startline;
+    column = startcolumn - 1;
+
+    available = bufsize = buffersize;
+    buffer = new char[buffersize];
+    bufline = new int[buffersize];
+    bufcolumn = new int[buffersize];
+  }
+
+  public SimpleCharStream(java.io.Reader dstream, int startline,
+                          int startcolumn)
+  {
+    this(dstream, startline, startcolumn, 4096);
+  }
+
+  public SimpleCharStream(java.io.Reader dstream)
+  {
+    this(dstream, 1, 1, 4096);
+  }
+  public void ReInit(java.io.Reader dstream, int startline,
+                     int startcolumn, int buffersize)
+  {
+    inputStream = dstream;
+    line = startline;
+    column = startcolumn - 1;
+
+    if (buffer == null || buffersize != buffer.length)
+      {
+        available = bufsize = buffersize;
+        buffer = new char[buffersize];
+        bufline = new int[buffersize];
+        bufcolumn = new int[buffersize];
+      }
+    prevCharIsLF = prevCharIsCR = false;
+    tokenBegin = inBuf = maxNextCharInd = 0;
+    bufpos = -1;
+  }
+
+  public void ReInit(java.io.Reader dstream, int startline,
+                     int startcolumn)
+  {
+    ReInit(dstream, startline, startcolumn, 4096);
+  }
+
+  public void ReInit(java.io.Reader dstream)
+  {
+    ReInit(dstream, 1, 1, 4096);
+  }
+  public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
+                          int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
+  {
+    this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
+  }
+
+  public SimpleCharStream(java.io.InputStream dstream, int startline,
+                          int startcolumn, int buffersize)
+  {
+    this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
+  }
+
+  public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
+                          int startcolumn) throws java.io.UnsupportedEncodingException
+  {
+    this(dstream, encoding, startline, startcolumn, 4096);
+  }
+
+  public SimpleCharStream(java.io.InputStream dstream, int startline,
+                          int startcolumn)
+  {
+    this(dstream, startline, startcolumn, 4096);
+  }
+
+  public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
+  {
+    this(dstream, encoding, 1, 1, 4096);
+  }
+
+  public SimpleCharStream(java.io.InputStream dstream)
+  {
+    this(dstream, 1, 1, 4096);
+  }
+
+  public void ReInit(java.io.InputStream dstream, String encoding, int startline,
+                     int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
+  {
+    ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
+  }
+
+  public void ReInit(java.io.InputStream dstream, int startline,
+                     int startcolumn, int buffersize)
+  {
+    ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
+  }
+
+  public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
+  {
+    ReInit(dstream, encoding, 1, 1, 4096);
+  }
+
+  public void ReInit(java.io.InputStream dstream)
+  {
+    ReInit(dstream, 1, 1, 4096);
+  }
+  public void ReInit(java.io.InputStream dstream, String encoding, int startline,
+                     int startcolumn) throws java.io.UnsupportedEncodingException
+  {
+    ReInit(dstream, encoding, startline, startcolumn, 4096);
+  }
+  public void ReInit(java.io.InputStream dstream, int startline,
+                     int startcolumn)
+  {
+    ReInit(dstream, startline, startcolumn, 4096);
+  }
+  public String GetImage()
+  {
+    if (bufpos >= tokenBegin)
+      return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
+    else
+      return new String(buffer, tokenBegin, bufsize - tokenBegin) +
+        new String(buffer, 0, bufpos + 1);
+  }
+
+  public char[] GetSuffix(int len)
+  {
+    char[] ret = new char[len];
+
+    if ((bufpos + 1) >= len)
+      System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
+    else
+      {
+        System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0,
+                         len - bufpos - 1);
+        System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
+      }
+
+    return ret;
+  }
+
+  public void Done()
+  {
+    buffer = null;
+    bufline = null;
+    bufcolumn = null;
+  }
+
+  /**
+   * Method to adjust line and column numbers for the start of a token.
+   */
+  public void adjustBeginLineColumn(int newLine, int newCol)
+  {
+    int start = tokenBegin;
+    int len;
+
+    if (bufpos >= tokenBegin)
+      {
+        len = bufpos - tokenBegin + inBuf + 1;
+      }
+    else
+      {
+        len = bufsize - tokenBegin + bufpos + 1 + inBuf;
+      }
+
+    int i = 0, j = 0, k = 0;
+    int nextColDiff = 0, columnDiff = 0;
+
+    while (i < len &&
+           bufline[j = start % bufsize] == bufline[k = ++start % bufsize])
+      {
+        bufline[j] = newLine;
+        nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j];
+        bufcolumn[j] = newCol + columnDiff;
+        columnDiff = nextColDiff;
+        i++;
+      } 
+
+    if (i < len)
+      {
+        bufline[j] = newLine++;
+        bufcolumn[j] = newCol + columnDiff;
+
+        while (i++ < len)
+          {
+            if (bufline[j = start % bufsize] != bufline[++start % bufsize])
+              bufline[j] = newLine++;
+            else
+              bufline[j] = newLine;
+          }
+      }
+
+    line = bufline[j];
+    column = bufcolumn[j];
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/generated/Token.java b/src/java/org/apache/hadoop/record/compiler/generated/Token.java
new file mode 100644
index 00000000000..29f36ab1e07
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/generated/Token.java
@@ -0,0 +1,99 @@
+/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+/**
+ * Describes the input token stream.
+ */
+
+public class Token {
+
+  /**
+   * An integer that describes the kind of this token.  This numbering
+   * system is determined by JavaCCParser, and a table of these numbers is
+   * stored in the file ...Constants.java.
+   */
+  public int kind;
+
+  /**
+   * beginLine and beginColumn describe the position of the first character
+   * of this token; endLine and endColumn describe the position of the
+   * last character of this token.
+   */
+  public int beginLine, beginColumn, endLine, endColumn;
+
+  /**
+   * The string image of the token.
+   */
+  public String image;
+
+  /**
+   * A reference to the next regular (non-special) token from the input
+   * stream.  If this is the last token from the input stream, or if the
+   * token manager has not read tokens beyond this one, this field is
+   * set to null.  This is true only if this token is also a regular
+   * token.  Otherwise, see below for a description of the contents of
+   * this field.
+   */
+  public Token next;
+
+  /**
+   * This field is used to access special tokens that occur prior to this
+   * token, but after the immediately preceding regular (non-special) token.
+   * If there are no such special tokens, this field is set to null.
+   * When there are more than one such special token, this field refers
+   * to the last of these special tokens, which in turn refers to the next
+   * previous special token through its specialToken field, and so on
+   * until the first special token (whose specialToken field is null).
+   * The next fields of special tokens refer to other special tokens that
+   * immediately follow it (without an intervening regular token).  If there
+   * is no such token, this field is null.
+   */
+  public Token specialToken;
+
+  /**
+   * Returns the image.
+   */
+  public String toString()
+  {
+    return image;
+  }
+
+  /**
+   * Returns a new Token object, by default. However, if you want, you
+   * can create and return subclass objects based on the value of ofKind.
+   * Simply add the cases to the switch for all those special cases.
+   * For example, if you have a subclass of Token called IDToken that
+   * you want to create if ofKind is ID, simlpy add something like :
+   *
+   *    case MyParserConstants.ID : return new IDToken();
+   *
+   * to the following switch statement. Then you can cast matchedToken
+   * variable to the appropriate type and use it in your lexical actions.
+   */
+  public static final Token newToken(int ofKind)
+  {
+    switch(ofKind)
+      {
+      default : return new Token();
+      }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java b/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java
new file mode 100644
index 00000000000..14f3ae34805
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java
@@ -0,0 +1,151 @@
+/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+public class TokenMgrError extends Error
+{
+  /*
+   * Ordinals for various reasons why an Error of this type can be thrown.
+   */
+
+  /**
+   * Lexical error occured.
+   */
+  static final int LEXICAL_ERROR = 0;
+
+  /**
+   * An attempt wass made to create a second instance of a static token manager.
+   */
+  static final int STATIC_LEXER_ERROR = 1;
+
+  /**
+   * Tried to change to an invalid lexical state.
+   */
+  static final int INVALID_LEXICAL_STATE = 2;
+
+  /**
+   * Detected (and bailed out of) an infinite loop in the token manager.
+   */
+  static final int LOOP_DETECTED = 3;
+
+  /**
+   * Indicates the reason why the exception is thrown. It will have
+   * one of the above 4 values.
+   */
+  int errorCode;
+
+  /**
+   * Replaces unprintable characters by their espaced (or unicode escaped)
+   * equivalents in the given string
+   */
+  protected static final String addEscapes(String str) {
+    StringBuffer retval = new StringBuffer();
+    char ch;
+    for (int i = 0; i < str.length(); i++) {
+      switch (str.charAt(i))
+        {
+        case 0 :
+          continue;
+        case '\b':
+          retval.append("\\b");
+          continue;
+        case '\t':
+          retval.append("\\t");
+          continue;
+        case '\n':
+          retval.append("\\n");
+          continue;
+        case '\f':
+          retval.append("\\f");
+          continue;
+        case '\r':
+          retval.append("\\r");
+          continue;
+        case '\"':
+          retval.append("\\\"");
+          continue;
+        case '\'':
+          retval.append("\\\'");
+          continue;
+        case '\\':
+          retval.append("\\\\");
+          continue;
+        default:
+          if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
+            String s = "0000" + Integer.toString(ch, 16);
+            retval.append("\\u" + s.substring(s.length() - 4, s.length()));
+          } else {
+            retval.append(ch);
+          }
+          continue;
+        }
+    }
+    return retval.toString();
+  }
+
+  /**
+   * Returns a detailed message for the Error when it is thrown by the
+   * token manager to indicate a lexical error.
+   * Parameters : 
+   *    EOFSeen     : indicates if EOF caused the lexicl error
+   *    curLexState : lexical state in which this error occured
+   *    errorLine   : line number when the error occured
+   *    errorColumn : column number when the error occured
+   *    errorAfter  : prefix that was seen before this error occured
+   *    curchar     : the offending character
+   * Note: You can customize the lexical error message by modifying this method.
+   */
+  protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) {
+    return("Lexical error at line " +
+           errorLine + ", column " +
+           errorColumn + ".  Encountered: " +
+           (EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") +
+           "after : \"" + addEscapes(errorAfter) + "\"");
+  }
+
+  /**
+   * You can also modify the body of this method to customize your error messages.
+   * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
+   * of end-users concern, so you can return something like : 
+   *
+   *     "Internal Error : Please file a bug report .... "
+   *
+   * from this method for such cases in the release version of your parser.
+   */
+  public String getMessage() {
+    return super.getMessage();
+  }
+
+  /*
+   * Constructors of various flavors follow.
+   */
+
+  public TokenMgrError() {
+  }
+
+  public TokenMgrError(String message, int reason) {
+    super(message);
+    errorCode = reason;
+  }
+
+  public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
+    this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/generated/package.html b/src/java/org/apache/hadoop/record/compiler/generated/package.html
new file mode 100644
index 00000000000..2fd0f68967d
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/generated/package.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+  <head>
+    <title>Hadoop Record Compiler: Parser</title>
+  </head>
+  <body>
+  This package contains code generated by JavaCC from the
+  Hadoop record syntax file rcc.jj. For details about the
+  record file syntax please @see org.apache.hadoop.record.
+  </body>
+</html>
diff --git a/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj b/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj
new file mode 100644
index 00000000000..4eeae3e47db
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj
@@ -0,0 +1,384 @@
+options {
+STATIC=false;
+}
+
+PARSER_BEGIN(Rcc)
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.compiler.generated;
+
+import org.apache.hadoop.record.compiler.*;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+public class Rcc {
+    private static String language = "java";
+    private static String destDir = ".";
+    private static ArrayList<String> recFiles = new ArrayList<String>();
+    private static ArrayList<String> cmdargs = new ArrayList<String>();
+    private static JFile curFile;
+    private static Hashtable<String,JRecord> recTab;
+    private static String curDir = ".";
+    private static String curFileName;
+    private static String curModuleName;
+
+    public static void main(String[] args) {
+        System.exit(driver(args));
+    }
+ 
+    public static void usage() {
+        System.err.println("Usage: rcc --language [java|c++] ddl-files");
+    }
+
+    public static int driver(String[] args) {
+        for (int i=0; i<args.length; i++) {
+            if ("-l".equalsIgnoreCase(args[i]) ||
+                "--language".equalsIgnoreCase(args[i])) {
+                language = args[i+1].toLowerCase();
+                i++;
+            } else if ("-d".equalsIgnoreCase(args[i]) ||
+                "--destdir".equalsIgnoreCase(args[i])) {
+                destDir = args[i+1];
+                i++;
+            } else if (args[i].startsWith("-")) {
+              String arg = args[i].substring(1);
+              if (arg.startsWith("-")) {
+                arg = arg.substring(1);
+              }
+              cmdargs.add(arg.toLowerCase());
+            } else {
+                recFiles.add(args[i]);
+            }
+        }
+        if (recFiles.size() == 0) {
+            usage();
+            return 1;
+        }
+        for (int i=0; i<recFiles.size(); i++) {
+            curFileName = recFiles.get(i);
+            File file = new File(curFileName);
+            try {
+                FileReader reader = new FileReader(file);
+                Rcc parser = new Rcc(reader);
+                try {
+                    recTab = new Hashtable<String,JRecord>();
+                    curFile = parser.Input();
+                } catch (ParseException e) {
+                    System.err.println(e.toString());
+                    return 1;
+                }
+                try {
+                    reader.close();
+                } catch (IOException e) {
+                }
+            } catch (FileNotFoundException e) {
+                System.err.println("File " + (String) recFiles.get(i) +
+                    " Not found.");
+                return 1;
+            }
+            try {
+                int retCode = curFile.genCode(language, destDir, cmdargs);
+                if (retCode != 0) { return retCode; }
+            } catch (IOException e) {
+                System.err.println(e.toString());
+                return 1;
+            }
+        }
+        return 0;
+    }
+}
+
+PARSER_END(Rcc)
+
+SKIP :
+{
+  " "
+| "\t"
+| "\n"
+| "\r"
+}
+
+SPECIAL_TOKEN :
+{
+  "//" : WithinOneLineComment
+}
+
+<WithinOneLineComment> SPECIAL_TOKEN :
+{
+  <("\n" | "\r" | "\r\n" )> : DEFAULT
+}
+
+<WithinOneLineComment> MORE :
+{
+  <~[]>
+}
+
+SPECIAL_TOKEN :
+{
+  "/*" : WithinMultiLineComment
+}
+
+<WithinMultiLineComment> SPECIAL_TOKEN :
+{
+  "*/" : DEFAULT
+}
+
+<WithinMultiLineComment> MORE :
+{
+  <~[]>
+}
+
+TOKEN :
+{
+    <MODULE_TKN: "module">
+|   <RECORD_TKN: "class">
+|   <INCLUDE_TKN: "include">
+|   <BYTE_TKN: "byte">
+|   <BOOLEAN_TKN: "boolean">
+|   <INT_TKN: "int">
+|   <LONG_TKN: "long">
+|   <FLOAT_TKN: "float">
+|   <DOUBLE_TKN: "double">
+|   <USTRING_TKN: "ustring">
+|   <BUFFER_TKN: "buffer">
+|   <VECTOR_TKN: "vector">
+|   <MAP_TKN: "map">
+|   <LBRACE_TKN: "{">
+|   <RBRACE_TKN: "}">
+|   <LT_TKN: "<">
+|   <GT_TKN: ">">
+|   <SEMICOLON_TKN: ";">
+|   <COMMA_TKN: ",">
+|   <DOT_TKN: ".">
+|   <CSTRING_TKN: "\"" ( ~["\""] )+ "\"">
+|   <IDENT_TKN: ["A"-"Z","a"-"z"] (["a"-"z","A"-"Z","0"-"9","_"])*>
+}
+
+JFile Input() :
+{
+    ArrayList<JFile> ilist = new ArrayList<JFile>();
+    ArrayList<JRecord> rlist = new ArrayList<JRecord>();
+    JFile i;
+    ArrayList<JRecord> l;
+}
+{
+    (
+        i = Include()
+        { ilist.add(i); }
+    |   l = Module()
+        { rlist.addAll(l); }
+    )+
+    <EOF>
+    { return new JFile(curFileName, ilist, rlist); }
+}
+
+JFile Include() :
+{
+    String fname;
+    Token t;
+}
+{
+    <INCLUDE_TKN>
+    t = <CSTRING_TKN>
+    {
+        JFile ret = null;
+        fname = t.image.replaceAll("^\"", "").replaceAll("\"$","");
+        File file = new File(curDir, fname);
+        String tmpDir = curDir;
+        String tmpFile = curFileName;
+        curDir = file.getParent();
+        curFileName = file.getName();
+        try {
+            FileReader reader = new FileReader(file);
+            Rcc parser = new Rcc(reader);
+            try {
+                ret = parser.Input();
+                System.out.println(fname + " Parsed Successfully");
+            } catch (ParseException e) {
+                System.out.println(e.toString());
+                System.exit(1);
+            }
+            try {
+                reader.close();
+            } catch (IOException e) {
+            }
+        } catch (FileNotFoundException e) {
+            System.out.println("File " + fname +
+                " Not found.");
+            System.exit(1);
+        }
+        curDir = tmpDir;
+        curFileName = tmpFile;
+        return ret;
+    }
+}
+
+ArrayList<JRecord> Module() :
+{
+    String mName;
+    ArrayList<JRecord> rlist;
+}
+{
+    <MODULE_TKN>
+    mName = ModuleName()
+    { curModuleName = mName; }
+    <LBRACE_TKN>
+    rlist = RecordList()
+    <RBRACE_TKN>
+    { return rlist; }
+}
+
+String ModuleName() :
+{
+    String name = "";
+    Token t;
+}
+{
+    t = <IDENT_TKN>
+    { name += t.image; }
+    (
+        <DOT_TKN>
+        t = <IDENT_TKN>
+        { name += "." + t.image; }
+    )*
+    { return name; }
+}
+
+ArrayList<JRecord> RecordList() :
+{
+    ArrayList<JRecord> rlist = new ArrayList<JRecord>();
+    JRecord r;
+}
+{
+    (
+        r = Record()
+        { rlist.add(r); }
+    )+
+    { return rlist; }
+}
+
+JRecord Record() :
+{
+    String rname;
+    ArrayList<JField<JType>> flist = new ArrayList<JField<JType>>();
+    Token t;
+    JField<JType> f;
+}
+{
+    <RECORD_TKN>
+    t = <IDENT_TKN>
+    { rname = t.image; }
+    <LBRACE_TKN>
+    (
+        f = Field()
+        { flist.add(f); }
+        <SEMICOLON_TKN>
+    )+
+    <RBRACE_TKN>
+    {
+        String fqn = curModuleName + "." + rname;
+        JRecord r = new JRecord(fqn, flist);
+        recTab.put(fqn, r);
+        return r;
+    }
+}
+
+JField<JType> Field() :
+{
+    JType jt;
+    Token t;
+}
+{
+    jt = Type()
+    t = <IDENT_TKN>
+    { return new JField<JType>(t.image, jt); }
+}
+
+JType Type() :
+{
+    JType jt;
+    Token t;
+    String rname;
+}
+{
+    jt = Map()
+    { return jt; }
+|   jt = Vector()
+    { return jt; }
+|   <BYTE_TKN>
+    { return new JByte(); }
+|   <BOOLEAN_TKN>
+    { return new JBoolean(); }
+|   <INT_TKN>
+    { return new JInt(); }
+|   <LONG_TKN>
+    { return new JLong(); }
+|   <FLOAT_TKN>
+    { return new JFloat(); }
+|   <DOUBLE_TKN>
+    { return new JDouble(); }
+|   <USTRING_TKN>
+    { return new JString(); }
+|   <BUFFER_TKN>
+    { return new JBuffer(); }
+|   rname = ModuleName()
+    {
+        if (rname.indexOf('.', 0) < 0) {
+            rname = curModuleName + "." + rname;
+        }
+        JRecord r = recTab.get(rname);
+        if (r == null) {
+            System.out.println("Type " + rname + " not known. Exiting.");
+            System.exit(1);
+        }
+        return r;
+    }
+}
+
+JMap Map() :
+{
+    JType jt1;
+    JType jt2;
+}
+{
+    <MAP_TKN>
+    <LT_TKN>
+    jt1 = Type()
+    <COMMA_TKN>
+    jt2 = Type()
+    <GT_TKN>
+    { return new JMap(jt1, jt2); }
+}
+
+JVector Vector() :
+{
+    JType jt;
+}
+{
+    <VECTOR_TKN>
+    <LT_TKN>
+    jt = Type()
+    <GT_TKN>
+    { return new JVector(jt); }
+}
diff --git a/src/java/org/apache/hadoop/record/compiler/package.html b/src/java/org/apache/hadoop/record/compiler/package.html
new file mode 100644
index 00000000000..23cac15ffb4
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/compiler/package.html
@@ -0,0 +1,31 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+  <head>
+    <title>Hadoop Record Compiler</title>
+  </head>
+  <body>
+  This package contains classes needed for code generation
+  from the hadoop record compiler. CppGenerator and JavaGenerator
+  are the main entry points from the parser. There are classes
+  corrsponding to every primitive type and compound type
+  included in Hadoop record I/O syntax.
+  </body>
+</html>
diff --git a/src/java/org/apache/hadoop/record/meta/FieldTypeInfo.java b/src/java/org/apache/hadoop/record/meta/FieldTypeInfo.java
new file mode 100644
index 00000000000..82d4c8affc4
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/meta/FieldTypeInfo.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.meta;
+
+import java.io.IOException;
+
+import org.apache.hadoop.record.RecordOutput;
+
+/** 
+ * Represents a type information for a field, which is made up of its 
+ * ID (name) and its type (a TypeID object).
+ */
+public class FieldTypeInfo
+{
+
+  private String fieldID;
+  private TypeID typeID;
+
+  /**
+   * Construct a FiledTypeInfo with the given field name and the type
+   */
+  FieldTypeInfo(String fieldID, TypeID typeID) {
+    this.fieldID = fieldID;
+    this.typeID = typeID;
+  }
+
+  /**
+   * get the field's TypeID object
+   */
+  public TypeID getTypeID() {
+    return typeID;
+  }
+  
+  /**
+   * get the field's id (name)
+   */
+  public String getFieldID() {
+    return fieldID;
+  }
+  
+  void write(RecordOutput rout, String tag) throws IOException {
+    rout.writeString(fieldID, tag);
+    typeID.write(rout, tag);
+  }
+  
+  /**
+   * Two FieldTypeInfos are equal if ach of their fields matches
+   */
+  public boolean equals(Object o) {
+    if (this == o) 
+      return true;
+    if (!(o instanceof FieldTypeInfo))
+      return false;
+    FieldTypeInfo fti = (FieldTypeInfo) o;
+    // first check if fieldID matches
+    if (!this.fieldID.equals(fti.fieldID)) {
+      return false;
+    }
+    // now see if typeID matches
+    return (this.typeID.equals(fti.typeID));
+  }
+  
+  /**
+   * We use a basic hashcode implementation, since this class will likely not
+   * be used as a hashmap key 
+   */
+  public int hashCode() {
+    return 37*17+typeID.hashCode() + 37*17+fieldID.hashCode();
+  }
+  
+
+  public boolean equals(FieldTypeInfo ti) {
+    // first check if fieldID matches
+    if (!this.fieldID.equals(ti.fieldID)) {
+      return false;
+    }
+    // now see if typeID matches
+    return (this.typeID.equals(ti.typeID));
+  }
+
+}
+
diff --git a/src/java/org/apache/hadoop/record/meta/MapTypeID.java b/src/java/org/apache/hadoop/record/meta/MapTypeID.java
new file mode 100644
index 00000000000..2180d94adc1
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/meta/MapTypeID.java
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.meta;
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.hadoop.record.RecordOutput;
+
+/** 
+ * Represents typeID for a Map 
+ */
+public class MapTypeID extends TypeID {
+  
+  private TypeID typeIDKey; 
+  private TypeID typeIDValue; 
+  
+  public MapTypeID(TypeID typeIDKey, TypeID typeIDValue) {
+    super(RIOType.MAP);
+    this.typeIDKey = typeIDKey;
+    this.typeIDValue = typeIDValue;
+  }
+  
+  /**
+   * get the TypeID of the map's key element
+   */
+  public TypeID getKeyTypeID() {
+    return this.typeIDKey;
+  }
+  
+  /**
+   * get the TypeID of the map's value element
+   */
+  public TypeID getValueTypeID() {
+    return this.typeIDValue;
+  }
+  
+  void write(RecordOutput rout, String tag) throws IOException {
+    rout.writeByte(typeVal, tag);
+    typeIDKey.write(rout, tag);
+    typeIDValue.write(rout, tag);
+  }
+  
+  /**
+   * Two map  typeIDs are equal if their constituent elements have the 
+   * same type
+   */
+  public boolean equals(Object o) {
+    if (!super.equals(o))
+      return false;
+
+    MapTypeID mti = (MapTypeID) o;
+
+    return this.typeIDKey.equals(mti.typeIDKey) &&
+           this.typeIDValue.equals(mti.typeIDValue);
+  }
+  
+  /**
+   * We use a basic hashcode implementation, since this class will likely not
+   * be used as a hashmap key 
+   */
+  public int hashCode() {
+    return 37*17+typeIDKey.hashCode() + 37*17+typeIDValue.hashCode();
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/record/meta/RecordTypeInfo.java b/src/java/org/apache/hadoop/record/meta/RecordTypeInfo.java
new file mode 100644
index 00000000000..2e24d7861c7
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/meta/RecordTypeInfo.java
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.meta;
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.hadoop.record.RecordInput;
+import org.apache.hadoop.record.RecordOutput;
+
+
+/** 
+ * A record's Type Information object which can read/write itself. 
+ * 
+ * Type information for a record comprises metadata about the record, 
+ * as well as a collection of type information for each field in the record. 
+ */
+public class RecordTypeInfo extends org.apache.hadoop.record.Record 
+{
+
+  private String name;
+  // A RecordTypeInfo is really just a wrapper around StructTypeID
+  StructTypeID sTid;
+   // A RecordTypeInfo object is just a collection of TypeInfo objects for each of its fields.  
+  //private ArrayList<FieldTypeInfo> typeInfos = new ArrayList<FieldTypeInfo>();
+  // we keep a hashmap of struct/record names and their type information, as we need it to 
+  // set filters when reading nested structs. This map is used during deserialization.
+  //private Map<String, RecordTypeInfo> structRTIs = new HashMap<String, RecordTypeInfo>();
+
+  /**
+   * Create an empty RecordTypeInfo object.
+   */
+  public RecordTypeInfo() {
+    sTid = new StructTypeID();
+  }
+
+  /**
+   * Create a RecordTypeInfo object representing a record with the given name
+   * @param name Name of the record
+   */
+  public RecordTypeInfo(String name) {
+    this.name = name;
+    sTid = new StructTypeID();
+  }
+
+  /*
+   * private constructor
+   */
+  private RecordTypeInfo(String name, StructTypeID stid) {
+    this.sTid = stid;
+    this.name = name;
+  }
+  
+  /**
+   * return the name of the record
+   */
+  public String getName() {
+    return name;
+  }
+
+  /**
+   * set the name of the record
+   */
+  public void setName(String name) {
+    this.name = name;
+  }
+
+  /**
+   * Add a field. 
+   * @param fieldName Name of the field
+   * @param tid Type ID of the field
+   */
+  public void addField(String fieldName, TypeID tid) {
+    sTid.getFieldTypeInfos().add(new FieldTypeInfo(fieldName, tid));
+  }
+  
+  private void addAll(Collection<FieldTypeInfo> tis) {
+    sTid.getFieldTypeInfos().addAll(tis);
+  }
+
+  /**
+   * Return a collection of field type infos
+   */
+  public Collection<FieldTypeInfo> getFieldTypeInfos() {
+    return sTid.getFieldTypeInfos();
+  }
+  
+  /**
+   * Return the type info of a nested record. We only consider nesting 
+   * to one level. 
+   * @param name Name of the nested record
+   */
+  public RecordTypeInfo getNestedStructTypeInfo(String name) {
+    StructTypeID stid = sTid.findStruct(name);
+    if (null == stid) return null;
+    return new RecordTypeInfo(name, stid);
+  }
+
+  /**
+   * Serialize the type information for a record
+   */
+  public void serialize(RecordOutput rout, String tag) throws IOException {
+    // write out any header, version info, here
+    rout.startRecord(this, tag);
+    rout.writeString(name, tag);
+    sTid.writeRest(rout, tag);
+    rout.endRecord(this, tag);
+  }
+
+  /**
+   * Deserialize the type information for a record
+   */
+  public void deserialize(RecordInput rin, String tag) throws IOException {
+    // read in any header, version info 
+    rin.startRecord(tag);
+    // name
+    this.name = rin.readString(tag);
+    sTid.read(rin, tag);
+    rin.endRecord(tag);
+  }
+  
+  /**
+   * This class doesn't implement Comparable as it's not meant to be used 
+   * for anything besides de/serializing.
+   * So we always throw an exception.
+   * Not implemented. Always returns 0 if another RecordTypeInfo is passed in. 
+   */
+  public int compareTo (final Object peer_) throws ClassCastException {
+    if (!(peer_ instanceof RecordTypeInfo)) {
+      throw new ClassCastException("Comparing different types of records.");
+    }
+    throw new UnsupportedOperationException("compareTo() is not supported");
+  }
+}
+
diff --git a/src/java/org/apache/hadoop/record/meta/StructTypeID.java b/src/java/org/apache/hadoop/record/meta/StructTypeID.java
new file mode 100644
index 00000000000..e18ed27a3ac
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/meta/StructTypeID.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.meta;
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.hadoop.record.RecordInput;
+import org.apache.hadoop.record.RecordOutput;
+
+/** 
+ * Represents typeID for a struct 
+ */
+public class StructTypeID extends TypeID {
+  private ArrayList<FieldTypeInfo> typeInfos = new ArrayList<FieldTypeInfo>();
+  
+  StructTypeID() {
+    super(RIOType.STRUCT);
+  }
+  
+  /**
+   * Create a StructTypeID based on the RecordTypeInfo of some record
+   */
+  public StructTypeID(RecordTypeInfo rti) {
+    super(RIOType.STRUCT);
+    typeInfos.addAll(rti.getFieldTypeInfos());
+  }
+
+  void add (FieldTypeInfo ti) {
+    typeInfos.add(ti);
+  }
+  
+  public Collection<FieldTypeInfo> getFieldTypeInfos() {
+    return typeInfos;
+  }
+  
+  /* 
+   * return the StructTypeiD, if any, of the given field 
+   */
+  StructTypeID findStruct(String name) {
+    // walk through the list, searching. Not the most efficient way, but this
+    // in intended to be used rarely, so we keep it simple. 
+    // As an optimization, we can keep a hashmap of record name to its RTI, for later.
+    for (FieldTypeInfo ti : typeInfos) {
+      if ((0 == ti.getFieldID().compareTo(name)) && (ti.getTypeID().getTypeVal() == RIOType.STRUCT)) {
+        return (StructTypeID) ti.getTypeID();
+      }
+    }
+    return null;
+  }
+  
+  void write(RecordOutput rout, String tag) throws IOException {
+    rout.writeByte(typeVal, tag);
+    writeRest(rout, tag);
+  }
+
+  /* 
+   * Writes rest of the struct (excluding type value).
+   * As an optimization, this method is directly called by RTI 
+   * for the top level record so that we don't write out the byte
+   * indicating that this is a struct (since top level records are
+   * always structs).
+   */
+  void writeRest(RecordOutput rout, String tag) throws IOException {
+    rout.writeInt(typeInfos.size(), tag);
+    for (FieldTypeInfo ti : typeInfos) {
+      ti.write(rout, tag);
+    }
+  }
+
+  /* 
+   * deserialize ourselves. Called by RTI. 
+   */
+  void read(RecordInput rin, String tag) throws IOException {
+    // number of elements
+    int numElems = rin.readInt(tag);
+    for (int i=0; i<numElems; i++) {
+      typeInfos.add(genericReadTypeInfo(rin, tag));
+    }
+  }
+  
+  // generic reader: reads the next TypeInfo object from stream and returns it
+  private FieldTypeInfo genericReadTypeInfo(RecordInput rin, String tag) throws IOException {
+    String fieldName = rin.readString(tag);
+    TypeID id = genericReadTypeID(rin, tag);
+    return new FieldTypeInfo(fieldName, id);
+  }
+  
+  // generic reader: reads the next TypeID object from stream and returns it
+  private TypeID genericReadTypeID(RecordInput rin, String tag) throws IOException {
+    byte typeVal = rin.readByte(tag);
+    switch (typeVal) {
+    case TypeID.RIOType.BOOL: 
+      return TypeID.BoolTypeID;
+    case TypeID.RIOType.BUFFER: 
+      return TypeID.BufferTypeID;
+    case TypeID.RIOType.BYTE:
+      return TypeID.ByteTypeID;
+    case TypeID.RIOType.DOUBLE:
+      return TypeID.DoubleTypeID;
+    case TypeID.RIOType.FLOAT:
+      return TypeID.FloatTypeID;
+    case TypeID.RIOType.INT: 
+      return TypeID.IntTypeID;
+    case TypeID.RIOType.LONG:
+      return TypeID.LongTypeID;
+    case TypeID.RIOType.MAP:
+    {
+      TypeID tIDKey = genericReadTypeID(rin, tag);
+      TypeID tIDValue = genericReadTypeID(rin, tag);
+      return new MapTypeID(tIDKey, tIDValue);
+    }
+    case TypeID.RIOType.STRING: 
+      return TypeID.StringTypeID;
+    case TypeID.RIOType.STRUCT: 
+    {
+      StructTypeID stID = new StructTypeID();
+      int numElems = rin.readInt(tag);
+      for (int i=0; i<numElems; i++) {
+        stID.add(genericReadTypeInfo(rin, tag));
+      }
+      return stID;
+    }
+    case TypeID.RIOType.VECTOR: 
+    {
+      TypeID tID = genericReadTypeID(rin, tag);
+      return new VectorTypeID(tID);
+    }
+    default:
+      // shouldn't be here
+      throw new IOException("Unknown type read");
+    }
+  }
+  
+  public boolean equals(Object o) {
+    return super.equals(o);
+  }
+  
+  public int hashCode() { return super.hashCode(); }
+}
diff --git a/src/java/org/apache/hadoop/record/meta/TypeID.java b/src/java/org/apache/hadoop/record/meta/TypeID.java
new file mode 100644
index 00000000000..78aa5487aca
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/meta/TypeID.java
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.meta;
+
+import java.io.IOException;
+import org.apache.hadoop.record.RecordOutput;
+
+/** 
+ * Represents typeID for basic types. 
+ */
+public class TypeID {
+
+  /**
+   * constants representing the IDL types we support
+   */
+  public static final class RIOType {
+    public static final byte BOOL   = 1;
+    public static final byte BUFFER = 2;
+    public static final byte BYTE   = 3;
+    public static final byte DOUBLE = 4;
+    public static final byte FLOAT  = 5;
+    public static final byte INT    = 6;
+    public static final byte LONG   = 7;
+    public static final byte MAP    = 8;
+    public static final byte STRING = 9;
+    public static final byte STRUCT = 10;
+    public static final byte VECTOR = 11;
+  }
+
+  /**
+   * Constant classes for the basic types, so we can share them.
+   */
+  public static final TypeID BoolTypeID = new TypeID(RIOType.BOOL);
+  public static final TypeID BufferTypeID = new TypeID(RIOType.BUFFER);
+  public static final TypeID ByteTypeID = new TypeID(RIOType.BYTE);
+  public static final TypeID DoubleTypeID = new TypeID(RIOType.DOUBLE);
+  public static final TypeID FloatTypeID = new TypeID(RIOType.FLOAT);
+  public static final TypeID IntTypeID = new TypeID(RIOType.INT);
+  public static final TypeID LongTypeID = new TypeID(RIOType.LONG);
+  public static final TypeID StringTypeID = new TypeID(RIOType.STRING);
+  
+  protected byte typeVal;
+
+  /**
+   * Create a TypeID object 
+   */
+  TypeID(byte typeVal) {
+    this.typeVal = typeVal;
+  }
+
+  /**
+   * Get the type value. One of the constants in RIOType.
+   */
+  public byte getTypeVal() {
+    return typeVal;
+  }
+
+  /**
+   * Serialize the TypeID object
+   */
+  void write(RecordOutput rout, String tag) throws IOException {
+    rout.writeByte(typeVal, tag);
+  }
+  
+  /**
+   * Two base typeIDs are equal if they refer to the same type
+   */
+  public boolean equals(Object o) {
+    if (this == o) 
+      return true;
+
+    if (o == null)
+      return false;
+
+    if (this.getClass() != o.getClass())
+      return false;
+
+    TypeID oTypeID = (TypeID) o;
+    return (this.typeVal == oTypeID.typeVal);
+  }
+  
+  /**
+   * We use a basic hashcode implementation, since this class will likely not
+   * be used as a hashmap key 
+   */
+  public int hashCode() {
+    // See 'Effectve Java' by Joshua Bloch
+    return 37*17+(int)typeVal;
+  }
+}
+
diff --git a/src/java/org/apache/hadoop/record/meta/Utils.java b/src/java/org/apache/hadoop/record/meta/Utils.java
new file mode 100644
index 00000000000..99f39f9c945
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/meta/Utils.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.meta;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.record.RecordInput;
+
+/**
+ * Various utility functions for Hadooop record I/O platform.
+ */
+public class Utils {
+  
+  /** Cannot create a new instance of Utils */
+  private Utils() {
+  }
+  
+  /**
+   * read/skip bytes from stream based on a type
+   */
+  public static void skip(RecordInput rin, String tag, TypeID typeID) throws IOException {
+    switch (typeID.typeVal) {
+    case TypeID.RIOType.BOOL: 
+      rin.readBool(tag);
+      break;
+    case TypeID.RIOType.BUFFER: 
+      rin.readBuffer(tag);
+      break;
+    case TypeID.RIOType.BYTE: 
+      rin.readByte(tag);
+      break;
+    case TypeID.RIOType.DOUBLE: 
+      rin.readDouble(tag);
+      break;
+    case TypeID.RIOType.FLOAT: 
+      rin.readFloat(tag);
+      break;
+    case TypeID.RIOType.INT: 
+      rin.readInt(tag);
+      break;
+    case TypeID.RIOType.LONG: 
+      rin.readLong(tag);
+      break;
+    case TypeID.RIOType.MAP: 
+      org.apache.hadoop.record.Index midx1 = rin.startMap(tag);
+      MapTypeID mtID = (MapTypeID) typeID;
+      for (; !midx1.done(); midx1.incr()) {
+        skip(rin, tag, mtID.getKeyTypeID());
+        skip(rin, tag, mtID.getValueTypeID());
+      }
+      rin.endMap(tag);
+      break;
+    case TypeID.RIOType.STRING: 
+      rin.readString(tag);
+      break;
+    case TypeID.RIOType.STRUCT:
+      rin.startRecord(tag);
+      // read past each field in the struct
+      StructTypeID stID = (StructTypeID) typeID;
+      Iterator<FieldTypeInfo> it = stID.getFieldTypeInfos().iterator();
+      while (it.hasNext()) {
+        FieldTypeInfo tInfo = it.next();
+        skip(rin, tag, tInfo.getTypeID());
+      }
+      rin.endRecord(tag);
+      break;
+    case TypeID.RIOType.VECTOR: 
+      org.apache.hadoop.record.Index vidx1 = rin.startVector(tag);
+      VectorTypeID vtID = (VectorTypeID) typeID;
+      for (; !vidx1.done(); vidx1.incr()) {
+        skip(rin, tag, vtID.getElementTypeID());
+      }
+      rin.endVector(tag);
+      break;
+    default: 
+      // shouldn't be here
+      throw new IOException("Unknown typeID when skipping bytes");
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/record/meta/VectorTypeID.java b/src/java/org/apache/hadoop/record/meta/VectorTypeID.java
new file mode 100644
index 00000000000..e4a2b3f0bd7
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/meta/VectorTypeID.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.record.meta;
+
+import java.io.IOException;
+
+import org.apache.hadoop.record.RecordOutput;
+
+/** 
+ * Represents typeID for vector. 
+ */
+public class VectorTypeID extends TypeID {
+  private TypeID typeIDElement; 
+  
+  public VectorTypeID(TypeID typeIDElement) {
+    super(RIOType.VECTOR);
+    this.typeIDElement = typeIDElement;
+  }
+  
+  public TypeID getElementTypeID() {
+    return this.typeIDElement;
+  }
+  
+  void write(RecordOutput rout, String tag) throws IOException {
+    rout.writeByte(typeVal, tag);
+    typeIDElement.write(rout, tag);
+  }
+  
+  /**
+   * Two vector typeIDs are equal if their constituent elements have the 
+   * same type
+   */
+  public boolean equals(Object o) {
+    if (!super.equals (o))
+      return false;
+
+    VectorTypeID vti = (VectorTypeID) o;
+    return this.typeIDElement.equals(vti.typeIDElement);
+  }
+  
+  /**
+   * We use a basic hashcode implementation, since this class will likely not
+   * be used as a hashmap key 
+   */
+  public int hashCode() {
+    return 37*17+typeIDElement.hashCode();
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/record/package.html b/src/java/org/apache/hadoop/record/package.html
new file mode 100644
index 00000000000..d736f4a38f7
--- /dev/null
+++ b/src/java/org/apache/hadoop/record/package.html
@@ -0,0 +1,800 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+  <head>
+    <title>Hadoop Record I/O</title>
+  </head>
+  <body>
+  Hadoop record I/O contains classes and a record description language
+  translator for simplifying serialization and deserialization of records in a
+  language-neutral manner.
+  
+  <h2>Introduction</h2>
+  
+  Software systems of any significant complexity require mechanisms for data 
+interchange with the outside world. These interchanges typically involve the
+marshaling and unmarshaling of logical units of data to and from data streams
+(files, network connections, memory buffers etc.). Applications usually have
+some code for serializing and deserializing the data types that they manipulate
+embedded in them. The work of serialization has several features that make
+automatic code generation for it worthwhile. Given a particular output encoding
+(binary, XML, etc.), serialization of primitive types and simple compositions
+of primitives (structs, vectors etc.) is a very mechanical task. Manually
+written serialization code can be susceptible to bugs especially when records
+have a large number of fields or a record definition changes between software
+versions. Lastly, it can be very useful for applications written in different
+programming languages to be able to share and interchange data. This can be 
+made a lot easier by describing the data records manipulated by these
+applications in a language agnostic manner and using the descriptions to derive
+implementations of serialization in multiple target languages. 
+
+This document describes Hadoop Record I/O, a mechanism that is aimed 
+at
+<ul> 
+<li> enabling the specification of simple serializable data types (records) 
+<li> enabling the generation of code in multiple target languages for
+marshaling and unmarshaling such types
+<li> providing target language specific support that will enable application 
+programmers to incorporate generated code into their applications
+</ul>
+
+The goals of Hadoop Record I/O are similar to those of mechanisms such as XDR,
+ASN.1, PADS and ICE. While these systems all include a DDL that enables
+the specification of most record types, they differ widely in what else they
+focus on. The focus in Hadoop Record I/O is on data marshaling and
+multi-lingual support.  We take a translator-based approach to serialization.
+Hadoop users have to describe their data in a simple data description
+language. The Hadoop DDL translator rcc generates code that users
+can invoke in order to read/write their data from/to simple stream 
+abstractions. Next we list explicitly some of the goals and non-goals of
+Hadoop Record I/O.
+
+
+<h3>Goals</h3>
+
+<ul>
+<li> Support for commonly used primitive types. Hadoop should include as
+primitives commonly used builtin types from programming languages we intend to
+support.
+
+<li> Support for common data compositions (including recursive compositions).
+Hadoop should support widely used composite types such as structs and
+vectors.
+
+<li> Code generation in multiple target languages. Hadoop should be capable of
+generating serialization code in multiple target languages and should be
+easily extensible to new target languages. The initial target languages are
+C++ and Java.
+
+<li> Support for generated target languages. Hadooop should include support
+in the form of headers, libraries, packages for supported target languages 
+that enable easy inclusion and use of generated code in applications.
+
+<li> Support for multiple output encodings. Candidates include
+packed binary, comma-separated text, XML etc.
+
+<li> Support for specifying record types in a backwards/forwards compatible
+manner. This will probably be in the form of support for optional fields in
+records. This version of the document does not include a description of the
+planned mechanism, we intend to include it in the next iteration.
+
+</ul>
+
+<h3>Non-Goals</h3>
+
+<ul>
+  <li> Serializing existing arbitrary C++ classes.
+  <li> Serializing complex data structures such as trees, linked lists etc.
+  <li> Built-in indexing schemes, compression, or check-sums.
+  <li> Dynamic construction of objects from an XML schema.
+</ul>
+
+The remainder of this document describes the features of Hadoop record I/O
+in more detail. Section 2 describes the data types supported by the system.
+Section 3 lays out the DDL syntax with some examples of simple records. 
+Section 4 describes the process of code generation with rcc. Section 5
+describes target language mappings and support for Hadoop types. We include a
+fairly complete description of C++ mappings with intent to include Java and
+others in upcoming iterations of this document. The last section talks about
+supported output encodings.
+
+
+<h2>Data Types and Streams</h2>
+
+This section describes the primitive and composite types supported by Hadoop.
+We aim to support a set of types that can be used to simply and efficiently
+express a wide range of record types in different programming languages.
+
+<h3>Primitive Types</h3>
+
+For the most part, the primitive types of Hadoop map directly to primitive
+types in high level programming languages. Special cases are the
+ustring (a Unicode string) and buffer types, which we believe
+find wide use and which are usually implemented in library code and not
+available as language built-ins. Hadoop also supplies these via library code
+when a target language built-in is not present and there is no widely
+adopted "standard" implementation. The complete list of primitive types is:
+
+<ul>
+  <li> byte: An 8-bit unsigned integer.
+  <li> boolean: A boolean value.
+  <li> int: A 32-bit signed integer.
+  <li> long: A 64-bit signed integer.
+  <li> float: A single precision floating point number as described by
+    IEEE-754.
+  <li> double: A double precision floating point number as described by
+    IEEE-754.
+  <li> ustring: A string consisting of Unicode characters.
+  <li> buffer: An arbitrary sequence of bytes. 
+</ul>
+
+
+<h3>Composite Types</h3>
+Hadoop supports a small set of composite types that enable the description
+of simple aggregate types and containers. A composite type is serialized
+by sequentially serializing it constituent elements. The supported
+composite types are:
+
+<ul>
+
+  <li> record: An aggregate type like a C-struct. This is a list of
+typed fields that are together considered a single unit of data. A record
+is serialized by sequentially serializing its constituent fields. In addition
+to serialization a record has comparison operations (equality and less-than)
+implemented for it, these are defined as memberwise comparisons.
+
+  <li>vector: A sequence of entries of the same data type, primitive
+or composite.
+
+  <li> map: An associative container mapping instances of a key type to
+instances of a value type. The key and value types may themselves be primitive
+or composite types. 
+
+</ul>
+
+<h3>Streams</h3>
+
+Hadoop generates code for serializing and deserializing record types to
+abstract streams. For each target language Hadoop defines very simple input
+and output stream interfaces. Application writers can usually develop
+concrete implementations of these by putting a one method wrapper around
+an existing stream implementation.
+
+
+<h2>DDL Syntax and Examples</h2>
+
+We now describe the syntax of the Hadoop data description language. This is
+followed by a few examples of DDL usage.
+ 
+<h3>Hadoop DDL Syntax</h3>
+
+<pre><code>
+recfile = *include module *record
+include = "include" path
+path = (relative-path / absolute-path)
+module = "module" module-name
+module-name = name *("." name)
+record := "class" name "{" 1*(field) "}"
+field := type name ";"
+name :=  ALPHA (ALPHA / DIGIT / "_" )*
+type := (ptype / ctype)
+ptype := ("byte" / "boolean" / "int" |
+          "long" / "float" / "double"
+          "ustring" / "buffer")
+ctype := (("vector" "<" type ">") /
+          ("map" "<" type "," type ">" ) ) / name)
+</code></pre>
+
+A DDL file describes one or more record types. It begins with zero or
+more include declarations, a single mandatory module declaration
+followed by zero or more class declarations. The semantics of each of
+these declarations are described below:
+
+<ul>
+
+<li>include: An include declaration specifies a DDL file to be
+referenced when generating code for types in the current DDL file. Record types
+in the current compilation unit may refer to types in all included files.
+File inclusion is recursive. An include does not trigger code
+generation for the referenced file.
+
+<li> module: Every Hadoop DDL file must have a single module
+declaration that follows the list of includes and precedes all record
+declarations. A module declaration identifies a scope within which
+the names of all types in the current file are visible. Module names are
+mapped to C++ namespaces, Java packages etc. in generated code.
+
+<li> class: Records types are specified through class
+declarations. A class declaration is like a Java class declaration.
+It specifies a named record type and a list of fields that constitute records
+of the type. Usage is illustrated in the following examples.
+
+</ul>
+
+<h3>Examples</h3>
+
+<ul>
+<li>A simple DDL file links.jr with just one record declaration. 
+<pre><code>
+module links {
+    class Link {
+        ustring URL;
+        boolean isRelative;
+        ustring anchorText;
+    };
+}
+</code></pre>
+
+<li> A DDL file outlinks.jr which includes another
+<pre><code>
+include "links.jr"
+
+module outlinks {
+    class OutLinks {
+        ustring baseURL;
+        vector<links.Link> outLinks;
+    };
+}
+</code></pre>
+</ul>
+
+<h2>Code Generation</h2>
+
+The Hadoop translator is written in Java. Invocation is done by executing a 
+wrapper shell script named named rcc. It takes a list of
+record description files as a mandatory argument and an
+optional language argument (the default is Java) --language or
+-l. Thus a typical invocation would look like:
+<pre><code>
+$ rcc -l C++ <filename> ...
+</code></pre>
+
+
+<h2>Target Language Mappings and Support</h2>
+
+For all target languages, the unit of code generation is a record type. 
+For each record type, Hadoop generates code for serialization and
+deserialization, record comparison and access to record members.
+
+<h3>C++</h3>
+
+Support for including Hadoop generated C++ code in applications comes in the
+form of a header file recordio.hh which needs to be included in source
+that uses Hadoop types and a library librecordio.a which applications need
+to be linked with. The header declares the Hadoop C++ namespace which defines
+appropriate types for the various primitives, the basic interfaces for
+records and streams and enumerates the supported serialization encodings.
+Declarations of these interfaces and a description of their semantics follow:
+
+<pre><code>
+namespace hadoop {
+
+  enum RecFormat { kBinary, kXML, kCSV };
+
+  class InStream {
+  public:
+    virtual ssize_t read(void *buf, size_t n) = 0;
+  };
+
+  class OutStream {
+  public:
+    virtual ssize_t write(const void *buf, size_t n) = 0;
+  };
+
+  class IOError : public runtime_error {
+  public:
+    explicit IOError(const std::string& msg);
+  };
+
+  class IArchive;
+  class OArchive;
+
+  class RecordReader {
+  public:
+    RecordReader(InStream& in, RecFormat fmt);
+    virtual ~RecordReader(void);
+
+    virtual void read(Record& rec);
+  };
+
+  class RecordWriter {
+  public:
+    RecordWriter(OutStream& out, RecFormat fmt);
+    virtual ~RecordWriter(void);
+
+    virtual void write(Record& rec);
+  };
+
+
+  class Record {
+  public:
+    virtual std::string type(void) const = 0;
+    virtual std::string signature(void) const = 0;
+  protected:
+    virtual bool validate(void) const = 0;
+
+    virtual void
+    serialize(OArchive& oa, const std::string& tag) const = 0;
+
+    virtual void
+    deserialize(IArchive& ia, const std::string& tag) = 0;
+  };
+}
+</code></pre>
+
+<ul>
+
+<li> RecFormat: An enumeration of the serialization encodings supported
+by this implementation of Hadoop.
+
+<li> InStream: A simple abstraction for an input stream. This has a 
+single public read method that reads n bytes from the stream into
+the buffer buf. Has the same semantics as a blocking read system
+call. Returns the number of bytes read or -1 if an error occurs.
+
+<li> OutStream: A simple abstraction for an output stream. This has a 
+single write method that writes n bytes to the stream from the
+buffer buf. Has the same semantics as a blocking write system
+call. Returns the number of bytes written or -1 if an error occurs.
+
+<li> RecordReader: A RecordReader reads records one at a time from
+an underlying stream in a specified record format. The reader is instantiated
+with a stream and a serialization format. It has a read method that
+takes an instance of a record and deserializes the record from the stream.
+
+<li> RecordWriter: A RecordWriter writes records one at a
+time to an underlying stream in a specified record format. The writer is
+instantiated with a stream and a serialization format. It has a
+write method that takes an instance of a record and serializes the
+record to the stream.
+
+<li> Record: The base class for all generated record types. This has two
+public methods type and signature that return the typename and the
+type signature of the record.
+
+</ul>
+
+Two files are generated for each record file (note: not for each record). If a
+record file is named "name.jr", the generated files are 
+"name.jr.cc" and "name.jr.hh" containing serialization 
+implementations and record type declarations respectively.
+
+For each record in the DDL file, the generated header file will contain a
+class definition corresponding to the record type, method definitions for the
+generated type will be present in the '.cc' file.  The generated class will
+inherit from the abstract class hadoop::Record. The DDL files
+module declaration determines the namespace the record belongs to.
+Each '.' delimited token in the module declaration results in the
+creation of a namespace. For instance, the declaration module docs.links
+results in the creation of a docs namespace and a nested 
+docs::links namespace. In the preceding examples, the Link class
+is placed in the links namespace. The header file corresponding to
+the links.jr file will contain:
+
+<pre><code>
+namespace links {
+  class Link : public hadoop::Record {
+    // ....
+  };
+};
+</code></pre>
+
+Each field within the record will cause the generation of a private member
+declaration of the appropriate type in the class declaration, and one or more
+acccessor methods. The generated class will implement the serialize and
+deserialize methods defined in hadoop::Record+. It will also 
+implement the inspection methods type and signature from
+hadoop::Record. A default constructor and virtual destructor will also
+be generated. Serialization code will read/write records into streams that
+implement the hadoop::InStream and the hadoop::OutStream interfaces.
+
+For each member of a record an accessor method is generated that returns 
+either the member or a reference to the member. For members that are returned 
+by value, a setter method is also generated. This is true for primitive 
+data members of the types byte, int, long, boolean, float and 
+double. For example, for a int field called MyField the folowing
+code is generated.
+
+<pre><code>
+...
+private:
+  int32_t mMyField;
+  ...
+public:
+  int32_t getMyField(void) const {
+    return mMyField;
+  };
+
+  void setMyField(int32_t m) {
+    mMyField = m;
+  };
+  ...
+</code></pre>
+
+For a ustring or buffer or composite field. The generated code
+only contains accessors that return a reference to the field. A const
+and a non-const accessor are generated. For example:
+
+<pre><code>
+...
+private:
+  std::string mMyBuf;
+  ...
+public:
+
+  std::string& getMyBuf() {
+    return mMyBuf;
+  };
+
+  const std::string& getMyBuf() const {
+    return mMyBuf;
+  };
+  ...
+</code></pre>
+
+<h4>Examples</h4>
+
+Suppose the inclrec.jr file contains:
+<pre><code>
+module inclrec {
+    class RI {
+        int      I32;
+        double   D;
+        ustring  S;
+    };
+}
+</code></pre>
+
+and the testrec.jr file contains:
+
+<pre><code>
+include "inclrec.jr"
+module testrec {
+    class R {
+        vector<float> VF;
+        RI            Rec;
+        buffer        Buf;
+    };
+}
+</code></pre>
+
+Then the invocation of rcc such as:
+<pre><code>
+$ rcc -l c++ inclrec.jr testrec.jr
+</code></pre>
+will result in generation of four files:
+inclrec.jr.{cc,hh} and testrec.jr.{cc,hh}.
+
+The inclrec.jr.hh will contain:
+
+<pre><code>
+#ifndef _INCLREC_JR_HH_
+#define _INCLREC_JR_HH_
+
+#include "recordio.hh"
+
+namespace inclrec {
+  
+  class RI : public hadoop::Record {
+
+  private:
+
+    int32_t      I32;
+    double       D;
+    std::string  S;
+
+  public:
+
+    RI(void);
+    virtual ~RI(void);
+
+    virtual bool operator==(const RI& peer) const;
+    virtual bool operator<(const RI& peer) const;
+
+    virtual int32_t getI32(void) const { return I32; }
+    virtual void setI32(int32_t v) { I32 = v; }
+
+    virtual double getD(void) const { return D; }
+    virtual void setD(double v) { D = v; }
+
+    virtual std::string& getS(void) const { return S; }
+    virtual const std::string& getS(void) const { return S; }
+
+    virtual std::string type(void) const;
+    virtual std::string signature(void) const;
+
+  protected:
+
+    virtual void serialize(hadoop::OArchive& a) const;
+    virtual void deserialize(hadoop::IArchive& a);
+  };
+} // end namespace inclrec
+
+#endif /* _INCLREC_JR_HH_ */
+
+</code></pre>
+
+The testrec.jr.hh file will contain:
+
+
+<pre><code>
+
+#ifndef _TESTREC_JR_HH_
+#define _TESTREC_JR_HH_
+
+#include "inclrec.jr.hh"
+
+namespace testrec {
+  class R : public hadoop::Record {
+
+  private:
+
+    std::vector<float> VF;
+    inclrec::RI        Rec;
+    std::string        Buf;
+
+  public:
+
+    R(void);
+    virtual ~R(void);
+
+    virtual bool operator==(const R& peer) const;
+    virtual bool operator<(const R& peer) const;
+
+    virtual std::vector<float>& getVF(void) const;
+    virtual const std::vector<float>& getVF(void) const;
+
+    virtual std::string& getBuf(void) const ;
+    virtual const std::string& getBuf(void) const;
+
+    virtual inclrec::RI& getRec(void) const;
+    virtual const inclrec::RI& getRec(void) const;
+    
+    virtual bool serialize(hadoop::OutArchive& a) const;
+    virtual bool deserialize(hadoop::InArchive& a);
+    
+    virtual std::string type(void) const;
+    virtual std::string signature(void) const;
+  };
+}; // end namespace testrec
+#endif /* _TESTREC_JR_HH_ */
+
+</code></pre>
+
+<h3>Java</h3>
+
+Code generation for Java is similar to that for C++. A Java class is generated
+for each record type with private members corresponding to the fields. Getters
+and setters for fields are also generated. Some differences arise in the
+way comparison is expressed and in the mapping of modules to packages and
+classes to files. For equality testing, an equals method is generated
+for each record type. As per Java requirements a hashCode method is also
+generated. For comparison a compareTo method is generated for each
+record type. This has the semantics as defined by the Java Comparable
+interface, that is, the method returns a negative integer, zero, or a positive
+integer as the invoked object is less than, equal to, or greater than the
+comparison parameter.
+
+A .java file is generated per record type as opposed to per DDL
+file as in C++. The module declaration translates to a Java
+package declaration. The module name maps to an identical Java package
+name. In addition to this mapping, the DDL compiler creates the appropriate
+directory hierarchy for the package and places the generated .java
+files in the correct directories.
+
+<h2>Mapping Summary</h2>
+
+<pre><code>
+DDL Type        C++ Type            Java Type 
+
+boolean         bool                boolean
+byte            int8_t              byte
+int             int32_t             int
+long            int64_t             long
+float           float               float
+double          double              double
+ustring         std::string         java.lang.String
+buffer          std::string         org.apache.hadoop.record.Buffer
+class type      class type          class type
+vector<type>    std::vector<type>   java.util.ArrayList<type>
+map<type,type>  std::map<type,type> java.util.TreeMap<type,type>
+</code></pre>
+
+<h2>Data encodings</h2>
+
+This section describes the format of the data encodings supported by Hadoop.
+Currently, three data encodings are supported, namely binary, CSV and XML.
+
+<h3>Binary Serialization Format</h3>
+
+The binary data encoding format is fairly dense. Serialization of composite
+types is simply defined as a concatenation of serializations of the constituent
+elements (lengths are included in vectors and maps).
+
+Composite types are serialized as follows:
+<ul>
+<li> class: Sequence of serialized members.
+<li> vector: The number of elements serialized as an int. Followed by a
+sequence of serialized elements.
+<li> map: The number of key value pairs serialized as an int. Followed
+by a sequence of serialized (key,value) pairs.
+</ul>
+
+Serialization of primitives is more interesting, with a zero compression
+optimization for integral types and normalization to UTF-8 for strings. 
+Primitive types are serialized as follows:
+
+<ul>
+<li> byte: Represented by 1 byte, as is.
+<li> boolean: Represented by 1-byte (0 or 1)
+<li> int/long: Integers and longs are serialized zero compressed.
+Represented as 1-byte if -120 <= value < 128. Otherwise, serialized as a
+sequence of 2-5 bytes for ints, 2-9 bytes for longs. The first byte represents
+the number of trailing bytes, N, as the negative number (-120-N). For example,
+the number 1024 (0x400) is represented by the byte sequence 'x86 x04 x00'.
+This doesn't help much for 4-byte integers but does a reasonably good job with
+longs without bit twiddling.
+<li> float/double: Serialized in IEEE 754 single and double precision
+format in network byte order. This is the format used by Java.
+<li> ustring: Serialized as 4-byte zero compressed length followed by
+data encoded as UTF-8. Strings are normalized to UTF-8 regardless of native
+language representation.
+<li> buffer: Serialized as a 4-byte zero compressed length followed by the
+raw bytes in the buffer.
+</ul>
+
+
+<h3>CSV Serialization Format</h3>
+
+The CSV serialization format has a lot more structure than the "standard"
+Excel CSV format, but we believe the additional structure is useful because
+
+<ul>
+<li> it makes parsing a lot easier without detracting too much from legibility
+<li> the delimiters around composites make it obvious when one is reading a
+sequence of Hadoop records
+</ul>
+
+Serialization formats for the various types are detailed in the grammar that
+follows. The notable feature of the formats is the use of delimiters for 
+indicating the certain field types.
+
+<ul>
+<li> A string field begins with a single quote (').
+<li> A buffer field begins with a sharp (#).
+<li> A class, vector or map begins with 's{', 'v{' or 'm{' respectively and
+ends with '}'.
+</ul>
+
+The CSV format can be described by the following grammar:
+
+<pre><code>
+record = primitive / struct / vector / map
+primitive = boolean / int / long / float / double / ustring / buffer
+
+boolean = "T" / "F"
+int = ["-"] 1*DIGIT
+long = ";" ["-"] 1*DIGIT
+float = ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
+double = ";" ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
+
+ustring = "'" *(UTF8 char except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
+
+buffer = "#" *(BYTE except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
+
+struct = "s{" record *("," record) "}"
+vector = "v{" [record *("," record)] "}"
+map = "m{" [*(record "," record)] "}"
+</code></pre>
+
+<h3>XML Serialization Format</h3>
+
+The XML serialization format is the same used by Apache XML-RPC
+(http://ws.apache.org/xmlrpc/types.html). This is an extension of the original
+XML-RPC format and adds some additional data types. All record I/O types are
+not directly expressible in this format, and access to a DDL is required in
+order to convert these to valid types. All types primitive or composite are
+represented by &lt;value&gt; elements. The particular XML-RPC type is
+indicated by a nested element in the &lt;value&gt; element. The encoding for
+records is always UTF-8. Primitive types are serialized as follows:
+
+<ul>
+<li> byte: XML tag &lt;ex:i1&gt;. Values: 1-byte unsigned 
+integers represented in US-ASCII
+<li> boolean: XML tag &lt;boolean&gt;. Values: "0" or "1"
+<li> int: XML tags &lt;i4&gt; or &lt;int&gt;. Values: 4-byte
+signed integers represented in US-ASCII.
+<li> long: XML tag &lt;ex:i8&gt;. Values: 8-byte signed integers
+represented in US-ASCII.
+<li> float: XML tag &lt;ex:float&gt;. Values: Single precision
+floating point numbers represented in US-ASCII.
+<li> double: XML tag &lt;double&gt;. Values: Double precision
+floating point numbers represented in US-ASCII.
+<li> ustring: XML tag &lt;;string&gt;. Values: String values
+represented as UTF-8. XML does not permit all Unicode characters in literal
+data. In particular, NULLs and control chars are not allowed. Additionally,
+XML processors are required to replace carriage returns with line feeds and to
+replace CRLF sequences with line feeds. Programming languages that we work
+with do not impose these restrictions on string types. To work around these
+restrictions, disallowed characters and CRs are percent escaped in strings.
+The '%' character is also percent escaped.
+<li> buffer: XML tag &lt;string&&gt;. Values: Arbitrary binary
+data. Represented as hexBinary, each byte is replaced by its 2-byte
+hexadecimal representation.
+</ul>
+
+Composite types are serialized as follows:
+
+<ul>
+<li> class: XML tag &lt;struct&gt;. A struct is a sequence of
+&lt;member&gt; elements. Each &lt;member&gt; element has a &lt;name&gt;
+element and a &lt;value&gt; element. The &lt;name&gt; is a string that must
+match /[a-zA-Z][a-zA-Z0-9_]*/. The value of the member is represented
+by a &lt;value&gt; element.
+
+<li> vector: XML tag &lt;array&lt;. An &lt;array&gt; contains a
+single &lt;data&gt; element. The &lt;data&gt; element is a sequence of
+&lt;value&gt; elements each of which represents an element of the vector.
+
+<li> map: XML tag &lt;array&gt;. Same as vector.
+
+</ul>
+
+For example:
+
+<pre><code>
+class {
+  int           MY_INT;            // value 5
+  vector<float> MY_VEC;            // values 0.1, -0.89, 2.45e4
+  buffer        MY_BUF;            // value '\00\n\tabc%'
+}
+</code></pre>
+
+is serialized as
+
+<pre><code class="XML">
+&lt;value&gt;
+  &lt;struct&gt;
+    &lt;member&gt;
+      &lt;name&gt;MY_INT&lt;/name&gt;
+      &lt;value&gt;&lt;i4&gt;5&lt;/i4&gt;&lt;/value&gt;
+    &lt;/member&gt;
+    &lt;member&gt;
+      &lt;name&gt;MY_VEC&lt;/name&gt;
+      &lt;value&gt;
+        &lt;array&gt;
+          &lt;data&gt;
+            &lt;value&gt;&lt;ex:float&gt;0.1&lt;/ex:float&gt;&lt;/value&gt;
+            &lt;value&gt;&lt;ex:float&gt;-0.89&lt;/ex:float&gt;&lt;/value&gt;
+            &lt;value&gt;&lt;ex:float&gt;2.45e4&lt;/ex:float&gt;&lt;/value&gt;
+          &lt;/data&gt;
+        &lt;/array&gt;
+      &lt;/value&gt;
+    &lt;/member&gt;
+    &lt;member&gt;
+      &lt;name&gt;MY_BUF&lt;/name&gt;
+      &lt;value&gt;&lt;string&gt;%00\n\tabc%25&lt;/string&gt;&lt;/value&gt;
+    &lt;/member&gt;
+  &lt;/struct&gt;
+&lt;/value&gt; 
+</code></pre>
+
+  </body>
+</html>
diff --git a/src/java/org/apache/hadoop/security/AccessControlException.java b/src/java/org/apache/hadoop/security/AccessControlException.java
new file mode 100644
index 00000000000..d04c52948c8
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/AccessControlException.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security;
+
+/**
+ * An exception class for access control related issues.
+ */
+public class AccessControlException 
+    extends org.apache.hadoop.fs.permission.AccessControlException {
+
+  //Required by {@link java.io.Serializable}.
+  private static final long serialVersionUID = 1L;
+
+  /**
+   * Default constructor is needed for unwrapping from 
+   * {@link org.apache.hadoop.ipc.RemoteException}.
+   */
+  public AccessControlException() {
+    super("Permission denied.");
+  }
+
+  /**
+   * Constructs an {@link AccessControlException}
+   * with the specified detail message.
+   * @param s the detail message.
+   */
+  public AccessControlException(String s) {super(s);}
+  
+  /**
+   * Constructs a new exception with the specified cause and a detail
+   * message of <tt>(cause==null ? null : cause.toString())</tt> (which
+   * typically contains the class and detail message of <tt>cause</tt>).
+   * @param  cause the cause (which is saved for later retrieval by the
+   *         {@link #getCause()} method).  (A <tt>null</tt> value is
+   *         permitted, and indicates that the cause is nonexistent or
+   *         unknown.)
+   */
+  public AccessControlException(Throwable cause) {
+    super(cause);
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/AccessKey.java b/src/java/org/apache/hadoop/security/AccessKey.java
new file mode 100644
index 00000000000..81b6383381e
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/AccessKey.java
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.security;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import javax.crypto.Mac;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+
+/**
+ * Key used for generating and verifying access tokens
+ */
+public class AccessKey implements Writable {
+  private long keyID;
+  private Text key;
+  private long expiryDate;
+  private Mac mac;
+
+  public AccessKey() {
+    this(0L, new Text(), 0L);
+  }
+
+  public AccessKey(long keyID, Text key, long expiryDate) {
+    this.keyID = keyID;
+    this.key = key;
+    this.expiryDate = expiryDate;
+  }
+
+  public long getKeyID() {
+    return keyID;
+  }
+
+  public Text getKey() {
+    return key;
+  }
+
+  public long getExpiryDate() {
+    return expiryDate;
+  }
+
+  public Mac getMac() {
+    return mac;
+  }
+
+  public void setMac(Mac mac) {
+    this.mac = mac;
+  }
+
+  static boolean isEqual(Object a, Object b) {
+    return a == null ? b == null : a.equals(b);
+  }
+
+  /** {@inheritDoc} */
+  public boolean equals(Object obj) {
+    if (obj == this) {
+      return true;
+    }
+    if (obj instanceof AccessKey) {
+      AccessKey that = (AccessKey) obj;
+      return this.keyID == that.keyID && isEqual(this.key, that.key)
+          && this.expiryDate == that.expiryDate;
+    }
+    return false;
+  }
+
+  /** {@inheritDoc} */
+  public int hashCode() {
+    return key == null ? 0 : key.hashCode();
+  }
+
+  // ///////////////////////////////////////////////
+  // Writable
+  // ///////////////////////////////////////////////
+  /**
+   */
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeVLong(out, keyID);
+    key.write(out);
+    WritableUtils.writeVLong(out, expiryDate);
+  }
+
+  /**
+   */
+  public void readFields(DataInput in) throws IOException {
+    keyID = WritableUtils.readVLong(in);
+    key.readFields(in);
+    expiryDate = WritableUtils.readVLong(in);
+  }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/security/AccessToken.java b/src/java/org/apache/hadoop/security/AccessToken.java
new file mode 100644
index 00000000000..5a5d9a72f46
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/AccessToken.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.security;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+public class AccessToken implements Writable {
+  public static final AccessToken DUMMY_TOKEN = new AccessToken();
+  private Text tokenID;
+  private Text tokenAuthenticator;
+
+  public AccessToken() {
+    this(new Text(), new Text());
+  }
+
+  public AccessToken(Text tokenID, Text tokenAuthenticator) {
+    this.tokenID = tokenID;
+    this.tokenAuthenticator = tokenAuthenticator;
+  }
+
+  public Text getTokenID() {
+    return tokenID;
+  }
+
+  public Text getTokenAuthenticator() {
+    return tokenAuthenticator;
+  }
+
+  static boolean isEqual(Object a, Object b) {
+    return a == null ? b == null : a.equals(b);
+  }
+
+  /** {@inheritDoc} */
+  public boolean equals(Object obj) {
+    if (obj == this) {
+      return true;
+    }
+    if (obj instanceof AccessToken) {
+      AccessToken that = (AccessToken) obj;
+      return isEqual(this.tokenID, that.tokenID)
+          && isEqual(this.tokenAuthenticator, that.tokenAuthenticator);
+    }
+    return false;
+  }
+
+  /** {@inheritDoc} */
+  public int hashCode() {
+    return tokenAuthenticator == null ? 0 : tokenAuthenticator.hashCode();
+  }
+
+  // ///////////////////////////////////////////////
+  // Writable
+  // ///////////////////////////////////////////////
+  /**
+   */
+  public void write(DataOutput out) throws IOException {
+    tokenID.write(out);
+    tokenAuthenticator.write(out);
+  }
+
+  /**
+   */
+  public void readFields(DataInput in) throws IOException {
+    tokenID.readFields(in);
+    tokenAuthenticator.readFields(in);
+  }
+
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/security/AccessTokenHandler.java b/src/java/org/apache/hadoop/security/AccessTokenHandler.java
new file mode 100644
index 00000000000..8ede2bb3104
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/AccessTokenHandler.java
@@ -0,0 +1,289 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.security;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.security.NoSuchAlgorithmException;
+import java.security.GeneralSecurityException;
+import java.security.SecureRandom;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import javax.crypto.KeyGenerator;
+import javax.crypto.Mac;
+import javax.crypto.spec.SecretKeySpec;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableUtils;
+
+/**
+ * AccessTokenHandler can be instantiated in 2 modes, master mode and slave
+ * mode. Master can generate new access keys and export access keys to slaves,
+ * while slaves can only import and use access keys received from master. Both
+ * master and slave can generate and verify access tokens. Typically, master
+ * mode is used by NN and slave mode is used by DN.
+ */
+public class AccessTokenHandler {
+  private static final Log LOG = LogFactory.getLog(AccessTokenHandler.class);
+  public static final String STRING_ENABLE_ACCESS_TOKEN = "dfs.access.token.enable";
+  public static final String STRING_ACCESS_KEY_UPDATE_INTERVAL = "dfs.access.key.update.interval";
+  public static final String STRING_ACCESS_TOKEN_LIFETIME = "dfs.access.token.lifetime";
+
+  private final boolean isMaster;
+  /*
+   * keyUpdateInterval is the interval that NN updates its access keys. It
+   * should be set long enough so that all live DN's and Balancer should have
+   * sync'ed their access keys with NN at least once during each interval.
+   */
+  private final long keyUpdateInterval;
+  private final long tokenLifetime;
+  private long serialNo = new SecureRandom().nextLong();
+  private KeyGenerator keyGen;
+  private AccessKey currentKey;
+  private AccessKey nextKey;
+  private Map<Long, AccessKey> allKeys;
+
+  public static enum AccessMode {
+    READ, WRITE, COPY, REPLACE
+  };
+
+  /**
+   * Constructor
+   * 
+   * @param isMaster
+   * @param keyUpdateInterval
+   * @param tokenLifetime
+   * @throws IOException
+   */
+  public AccessTokenHandler(boolean isMaster, long keyUpdateInterval,
+      long tokenLifetime) throws IOException {
+    this.isMaster = isMaster;
+    this.keyUpdateInterval = keyUpdateInterval;
+    this.tokenLifetime = tokenLifetime;
+    this.allKeys = new HashMap<Long, AccessKey>();
+    if (isMaster) {
+      try {
+        generateKeys();
+        initMac(currentKey);
+      } catch (GeneralSecurityException e) {
+        throw (IOException) new IOException(
+            "Failed to create AccessTokenHandler").initCause(e);
+      }
+    }
+  }
+
+  /** Initialize access keys */
+  private synchronized void generateKeys() throws NoSuchAlgorithmException {
+    keyGen = KeyGenerator.getInstance("HmacSHA1");
+    /*
+     * Need to set estimated expiry dates for currentKey and nextKey so that if
+     * NN crashes, DN can still expire those keys. NN will stop using the newly
+     * generated currentKey after the first keyUpdateInterval, however it may
+     * still be used by DN and Balancer to generate new tokens before they get a
+     * chance to sync their keys with NN. Since we require keyUpdInterval to be
+     * long enough so that all live DN's and Balancer will sync their keys with
+     * NN at least once during the period, the estimated expiry date for
+     * currentKey is set to now() + 2 * keyUpdateInterval + tokenLifetime.
+     * Similarly, the estimated expiry date for nextKey is one keyUpdateInterval
+     * more.
+     */
+    serialNo++;
+    currentKey = new AccessKey(serialNo, new Text(keyGen.generateKey()
+        .getEncoded()), System.currentTimeMillis() + 2 * keyUpdateInterval
+        + tokenLifetime);
+    serialNo++;
+    nextKey = new AccessKey(serialNo, new Text(keyGen.generateKey()
+        .getEncoded()), System.currentTimeMillis() + 3 * keyUpdateInterval
+        + tokenLifetime);
+    allKeys.put(currentKey.getKeyID(), currentKey);
+    allKeys.put(nextKey.getKeyID(), nextKey);
+  }
+
+  /** Initialize Mac function */
+  private synchronized void initMac(AccessKey key) throws IOException {
+    try {
+      Mac mac = Mac.getInstance("HmacSHA1");
+      mac.init(new SecretKeySpec(key.getKey().getBytes(), "HmacSHA1"));
+      key.setMac(mac);
+    } catch (GeneralSecurityException e) {
+      throw (IOException) new IOException(
+          "Failed to initialize Mac for access key, keyID=" + key.getKeyID())
+          .initCause(e);
+    }
+  }
+
+  /** Export access keys, only to be used in master mode */
+  public synchronized ExportedAccessKeys exportKeys() {
+    if (!isMaster)
+      return null;
+    if (LOG.isDebugEnabled())
+      LOG.debug("Exporting access keys");
+    return new ExportedAccessKeys(true, keyUpdateInterval, tokenLifetime,
+        currentKey, allKeys.values().toArray(new AccessKey[0]));
+  }
+
+  private synchronized void removeExpiredKeys() {
+    long now = System.currentTimeMillis();
+    for (Iterator<Map.Entry<Long, AccessKey>> it = allKeys.entrySet()
+        .iterator(); it.hasNext();) {
+      Map.Entry<Long, AccessKey> e = it.next();
+      if (e.getValue().getExpiryDate() < now) {
+        it.remove();
+      }
+    }
+  }
+
+  /**
+   * Set access keys, only to be used in slave mode
+   */
+  public synchronized void setKeys(ExportedAccessKeys exportedKeys)
+      throws IOException {
+    if (isMaster || exportedKeys == null)
+      return;
+    LOG.info("Setting access keys");
+    removeExpiredKeys();
+    this.currentKey = exportedKeys.getCurrentKey();
+    initMac(currentKey);
+    AccessKey[] receivedKeys = exportedKeys.getAllKeys();
+    for (int i = 0; i < receivedKeys.length; i++) {
+      if (receivedKeys[i] == null)
+        continue;
+      this.allKeys.put(receivedKeys[i].getKeyID(), receivedKeys[i]);
+    }
+  }
+
+  /**
+   * Update access keys, only to be used in master mode
+   */
+  public synchronized void updateKeys() throws IOException {
+    if (!isMaster)
+      return;
+    LOG.info("Updating access keys");
+    removeExpiredKeys();
+    // set final expiry date of retiring currentKey
+    allKeys.put(currentKey.getKeyID(), new AccessKey(currentKey.getKeyID(),
+        currentKey.getKey(), System.currentTimeMillis() + keyUpdateInterval
+            + tokenLifetime));
+    // update the estimated expiry date of new currentKey
+    currentKey = new AccessKey(nextKey.getKeyID(), nextKey.getKey(), System
+        .currentTimeMillis()
+        + 2 * keyUpdateInterval + tokenLifetime);
+    initMac(currentKey);
+    allKeys.put(currentKey.getKeyID(), currentKey);
+    // generate a new nextKey
+    serialNo++;
+    nextKey = new AccessKey(serialNo, new Text(keyGen.generateKey()
+        .getEncoded()), System.currentTimeMillis() + 3 * keyUpdateInterval
+        + tokenLifetime);
+    allKeys.put(nextKey.getKeyID(), nextKey);
+  }
+
+  /** Check if token is well formed */
+  private synchronized Boolean verifyToken(long keyID, AccessToken token)
+      throws IOException {
+    AccessKey key = allKeys.get(keyID);
+    if (key == null) {
+      LOG.warn("Access key for keyID=" + keyID + " doesn't exist.");
+      return false;
+    }
+    if (key.getMac() == null) {
+      initMac(key);
+    }
+    Text tokenID = token.getTokenID();
+    Text authenticator = new Text(key.getMac().doFinal(tokenID.getBytes()));
+    return authenticator.equals(token.getTokenAuthenticator());
+  }
+
+  /** Generate an access token for current user */
+  public AccessToken generateToken(long blockID, EnumSet<AccessMode> modes)
+      throws IOException {
+    UserGroupInformation ugi = UserGroupInformation.getCurrentUGI();
+    String userID = (ugi == null ? null : ugi.getUserName());
+    return generateToken(userID, blockID, modes);
+  }
+
+  /** Generate an access token for a specified user */
+  public synchronized AccessToken generateToken(String userID, long blockID,
+      EnumSet<AccessMode> modes) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Generating access token for user=" + userID + ", blockID="
+          + blockID + ", access modes=" + modes + ", keyID="
+          + currentKey.getKeyID());
+    }
+    if (modes == null || modes.isEmpty())
+      throw new IOException("access modes can't be null or empty");
+    ByteArrayOutputStream buf = new ByteArrayOutputStream(4096);
+    DataOutputStream out = new DataOutputStream(buf);
+    WritableUtils.writeVLong(out, System.currentTimeMillis() + tokenLifetime);
+    WritableUtils.writeVLong(out, currentKey.getKeyID());
+    WritableUtils.writeString(out, userID);
+    WritableUtils.writeVLong(out, blockID);
+    WritableUtils.writeVInt(out, modes.size());
+    for (AccessMode aMode : modes) {
+      WritableUtils.writeEnum(out, aMode);
+    }
+    Text tokenID = new Text(buf.toByteArray());
+    return new AccessToken(tokenID, new Text(currentKey.getMac().doFinal(
+        tokenID.getBytes())));
+  }
+
+  /** Check if access should be allowed. userID is not checked if null */
+  public Boolean checkAccess(AccessToken token, String userID, long blockID,
+      AccessMode mode) throws IOException {
+    long oExpiry = 0;
+    long oKeyID = 0;
+    String oUserID = null;
+    long oBlockID = 0;
+    EnumSet<AccessMode> oModes = EnumSet.noneOf(AccessMode.class);
+
+    try {
+      ByteArrayInputStream buf = new ByteArrayInputStream(token.getTokenID()
+          .getBytes());
+      DataInputStream in = new DataInputStream(buf);
+      oExpiry = WritableUtils.readVLong(in);
+      oKeyID = WritableUtils.readVLong(in);
+      oUserID = WritableUtils.readString(in);
+      oBlockID = WritableUtils.readVLong(in);
+      int length = WritableUtils.readVInt(in);
+      for (int i = 0; i < length; ++i) {
+        oModes.add(WritableUtils.readEnum(in, AccessMode.class));
+      }
+    } catch (IOException e) {
+      throw (IOException) new IOException(
+          "Unable to parse access token for user=" + userID + ", blockID="
+              + blockID + ", access mode=" + mode).initCause(e);
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Verifying access token for user=" + userID + ", blockID="
+          + blockID + ", access mode=" + mode + ", keyID=" + oKeyID);
+    }
+    return (userID == null || userID.equals(oUserID)) && oBlockID == blockID
+        && System.currentTimeMillis() < oExpiry && oModes.contains(mode)
+        && verifyToken(oKeyID, token);
+  }
+
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/security/ExportedAccessKeys.java b/src/java/org/apache/hadoop/security/ExportedAccessKeys.java
new file mode 100644
index 00000000000..e5ab2934b4b
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/ExportedAccessKeys.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.security;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableFactories;
+import org.apache.hadoop.io.WritableFactory;
+
+/**
+ * Object for passing access keys
+ */
+public class ExportedAccessKeys implements Writable {
+  public static final ExportedAccessKeys DUMMY_KEYS = new ExportedAccessKeys();
+  private boolean isAccessTokenEnabled;
+  private long keyUpdateInterval;
+  private long tokenLifetime;
+  private AccessKey currentKey;
+  private AccessKey[] allKeys;
+
+  public ExportedAccessKeys() {
+    this(false, 0, 0, new AccessKey(), new AccessKey[0]);
+  }
+
+  ExportedAccessKeys(boolean isAccessTokenEnabled, long keyUpdateInterval,
+      long tokenLifetime, AccessKey currentKey, AccessKey[] allKeys) {
+    this.isAccessTokenEnabled = isAccessTokenEnabled;
+    this.keyUpdateInterval = keyUpdateInterval;
+    this.tokenLifetime = tokenLifetime;
+    this.currentKey = currentKey;
+    this.allKeys = allKeys;
+  }
+
+  public boolean isAccessTokenEnabled() {
+    return isAccessTokenEnabled;
+  }
+
+  public long getKeyUpdateInterval() {
+    return keyUpdateInterval;
+  }
+
+  public long getTokenLifetime() {
+    return tokenLifetime;
+  }
+
+  public AccessKey getCurrentKey() {
+    return currentKey;
+  }
+
+  public AccessKey[] getAllKeys() {
+    return allKeys;
+  }
+
+  static boolean isEqual(Object a, Object b) {
+    return a == null ? b == null : a.equals(b);
+  }
+
+  /** {@inheritDoc} */
+  public boolean equals(Object obj) {
+    if (obj == this) {
+      return true;
+    }
+    if (obj instanceof ExportedAccessKeys) {
+      ExportedAccessKeys that = (ExportedAccessKeys) obj;
+      return this.isAccessTokenEnabled == that.isAccessTokenEnabled
+          && this.keyUpdateInterval == that.keyUpdateInterval
+          && this.tokenLifetime == that.tokenLifetime
+          && isEqual(this.currentKey, that.currentKey)
+          && Arrays.equals(this.allKeys, that.allKeys);
+    }
+    return false;
+  }
+
+  /** {@inheritDoc} */
+  public int hashCode() {
+    return currentKey == null ? 0 : currentKey.hashCode();
+  }
+
+  // ///////////////////////////////////////////////
+  // Writable
+  // ///////////////////////////////////////////////
+  static { // register a ctor
+    WritableFactories.setFactory(ExportedAccessKeys.class,
+        new WritableFactory() {
+          public Writable newInstance() {
+            return new ExportedAccessKeys();
+          }
+        });
+  }
+
+  /**
+   */
+  public void write(DataOutput out) throws IOException {
+    out.writeBoolean(isAccessTokenEnabled);
+    out.writeLong(keyUpdateInterval);
+    out.writeLong(tokenLifetime);
+    currentKey.write(out);
+    out.writeInt(allKeys.length);
+    for (int i = 0; i < allKeys.length; i++) {
+      allKeys[i].write(out);
+    }
+  }
+
+  /**
+   */
+  public void readFields(DataInput in) throws IOException {
+    isAccessTokenEnabled = in.readBoolean();
+    keyUpdateInterval = in.readLong();
+    tokenLifetime = in.readLong();
+    currentKey.readFields(in);
+    this.allKeys = new AccessKey[in.readInt()];
+    for (int i = 0; i < allKeys.length; i++) {
+      allKeys[i] = new AccessKey();
+      allKeys[i].readFields(in);
+    }
+  }
+
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/security/Group.java b/src/java/org/apache/hadoop/security/Group.java
new file mode 100644
index 00000000000..2bb8caad8f7
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/Group.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security;
+
+import java.security.Principal;
+
+/**
+ * A group to which a user belongs to.
+ */
+public class Group implements Principal {
+  final String group;
+
+  /**
+   * Create a new <code>Group</code> with the given groupname.
+   * @param group group name
+   */
+  public Group(String group) {
+    this.group = group;
+  }
+
+  @Override
+  public String getName() {
+    return group;
+  }
+
+  @Override
+  public String toString() {
+    return group;
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+    result = prime * result + ((group == null) ? 0 : group.hashCode());
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    Group other = (Group) obj;
+    if (group == null) {
+      if (other.group != null)
+        return false;
+    } else if (!group.equals(other.group))
+      return false;
+    return true;
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/InvalidAccessTokenException.java b/src/java/org/apache/hadoop/security/InvalidAccessTokenException.java
new file mode 100644
index 00000000000..eabce15ea3b
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/InvalidAccessTokenException.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.security;
+
+import java.io.IOException;
+
+/**
+ * Access token verification failed.
+ */
+public class InvalidAccessTokenException extends IOException {
+  private static final long serialVersionUID = 168L;
+
+  public InvalidAccessTokenException() {
+    super();
+  }
+
+  public InvalidAccessTokenException(String msg) {
+    super(msg);
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/PermissionChecker.java b/src/java/org/apache/hadoop/security/PermissionChecker.java
new file mode 100644
index 00000000000..ea8246f5132
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/PermissionChecker.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security;
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.UserGroupInformation;
+
+/** Perform permission checking. */
+public class PermissionChecker {
+  static final Log LOG = LogFactory.getLog(UserGroupInformation.class);
+
+  public final String user;
+  protected final Set<String> groups = new HashSet<String>();
+  public final boolean isSuper;
+
+  /**
+   * Checks if the caller has the required permission.
+   * @param owner username of the owner
+   * @param supergroup supergroup that the owner belongs to
+   */
+  public PermissionChecker(String owner, String supergroup
+      ) throws AccessControlException{
+    UserGroupInformation ugi = UserGroupInformation.getCurrentUGI();
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("ugi=" + ugi);
+    }
+
+    if (ugi != null) {
+      user = ugi.getUserName();
+      groups.addAll(Arrays.asList(ugi.getGroupNames()));
+      isSuper = user.equals(owner) || groups.contains(supergroup);
+    }
+    else {
+      throw new AccessControlException("ugi = null");
+    }
+  }
+
+  /**
+   * Check if the callers group contains the required values.
+   * @param group group to check
+   */
+  public boolean containsGroup(String group) {return groups.contains(group);}
+
+  /**
+   * Verify if the caller has the required permission. This will result into 
+   * an exception if the caller is not allowed to access the resource.
+   * @param owner owner of the system
+   * @param supergroup supergroup of the system
+   */
+  public static void checkSuperuserPrivilege(UserGroupInformation owner, 
+                                             String supergroup) 
+  throws AccessControlException {
+    PermissionChecker checker = 
+      new PermissionChecker(owner.getUserName(), supergroup);
+    if (!checker.isSuper) {
+      throw new AccessControlException("Access denied for user " 
+          + checker.user + ". Superuser privilege is required");
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/SecurityUtil.java b/src/java/org/apache/hadoop/security/SecurityUtil.java
new file mode 100644
index 00000000000..94b68254c71
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/SecurityUtil.java
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security;
+
+import java.security.Policy;
+import java.security.Principal;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.TreeSet;
+
+import javax.security.auth.Subject;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.authorize.ConfiguredPolicy;
+import org.apache.hadoop.security.authorize.PolicyProvider;
+
+public class SecurityUtil {
+
+  private static final Log LOG = LogFactory.getLog(SecurityUtil.class);
+  
+  static {
+    // Set an empty default policy
+    setPolicy(new ConfiguredPolicy(new Configuration(), 
+                                   PolicyProvider.DEFAULT_POLICY_PROVIDER));
+  }
+  
+  /**
+   * Set the global security policy for Hadoop.
+   * 
+   * @param policy {@link Policy} used for authorization.
+   */
+  public static void setPolicy(Policy policy) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Setting Hadoop security policy");
+    }
+    Policy.setPolicy(policy);
+  }
+
+  /**
+   * Get the current global security policy for Hadoop.
+   * @return the current {@link Policy}
+   */
+  public static Policy getPolicy() {
+    return Policy.getPolicy();
+  }
+  
+  /**
+   * Get the {@link Subject} for the user identified by <code>ugi</code>.
+   * @param ugi user
+   * @return the {@link Subject} for the user identified by <code>ugi</code>
+   */
+  public static Subject getSubject(UserGroupInformation ugi) {
+    if (ugi == null) {
+      return null;
+    }
+    
+    Set<Principal> principals =       // Number of principals = username + #groups 
+      new HashSet<Principal>(ugi.getGroupNames().length+1);
+    User userPrincipal = new User(ugi.getUserName()); 
+    principals.add(userPrincipal);
+    for (String group : ugi.getGroupNames()) {
+      Group groupPrincipal = new Group(group);
+      principals.add(groupPrincipal);
+    }
+    principals.add(ugi);
+    Subject user = 
+      new Subject(false, principals, new HashSet<Object>(), new HashSet<Object>());
+    
+    return user;
+  }
+  
+  /**
+   * Class representing a configured access control list.
+   */
+  public static class AccessControlList {
+    
+    // Indicates an ACL string that represents access to all users
+    public static final String WILDCARD_ACL_VALUE = "*";
+
+    // Set of users who are granted access.
+    private Set<String> users;
+    // Set of groups which are granted access
+    private Set<String> groups;
+    // Whether all users are granted access.
+    private boolean allAllowed;
+    
+    /**
+     * Construct a new ACL from a String representation of the same.
+     * 
+     * The String is a a comma separated list of users and groups.
+     * The user list comes first and is separated by a space followed 
+     * by the group list. For e.g. "user1,user2 group1,group2"
+     * 
+     * @param aclString String representation of the ACL
+     */
+    public AccessControlList(String aclString) {
+      users = new TreeSet<String>();
+      groups = new TreeSet<String>();
+      if (aclString.contains(WILDCARD_ACL_VALUE) && 
+          aclString.trim().equals(WILDCARD_ACL_VALUE)) {
+        allAllowed = true;
+      } else {
+        String[] userGroupStrings = aclString.split(" ", 2);
+        
+        if (userGroupStrings.length >= 1) {
+          String[] usersStr = userGroupStrings[0].split(",");
+          if (usersStr.length >= 1) {
+            addToSet(users, usersStr);
+          }
+        }
+        
+        if (userGroupStrings.length == 2) {
+          String[] groupsStr = userGroupStrings[1].split(",");
+          if (groupsStr.length >= 1) {
+            addToSet(groups, groupsStr);
+          }
+        }
+      }
+    }
+    
+    public boolean allAllowed() {
+      return allAllowed;
+    }
+    
+    public Set<String> getUsers() {
+      return users;
+    }
+    
+    public Set<String> getGroups() {
+      return groups;
+    }
+    
+    private static final void addToSet(Set<String> set, String[] strings) {
+      for (String s : strings) {
+        s = s.trim();
+        if (s.length() > 0) {
+          set.add(s);
+        }
+      }
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/UnixUserGroupInformation.java b/src/java/org/apache/hadoop/security/UnixUserGroupInformation.java
new file mode 100644
index 00000000000..62cbb659869
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/UnixUserGroupInformation.java
@@ -0,0 +1,432 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.security;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.StringTokenizer;
+import java.util.TreeSet;
+
+import javax.security.auth.login.LoginException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableUtils;
+
+/** An implementation of UserGroupInformation in the Unix system */
+public class UnixUserGroupInformation extends UserGroupInformation {
+  public static final String DEFAULT_USERNAME = "DrWho";
+  public static final String DEFAULT_GROUP = "Tardis";
+
+  final static public String UGI_PROPERTY_NAME = "hadoop.job.ugi";
+  final static private HashMap<String, UnixUserGroupInformation> user2UGIMap =
+    new HashMap<String, UnixUserGroupInformation>();
+
+  /** Create an immutable {@link UnixUserGroupInformation} object. */
+  public static UnixUserGroupInformation createImmutable(String[] ugi) {
+    return new UnixUserGroupInformation(ugi) {
+      public void readFields(DataInput in) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+    };
+  }
+
+  private String userName;
+  private String[] groupNames;
+
+  /** Default constructor
+   */
+  public UnixUserGroupInformation() {
+  }
+
+  /** Constructor with parameters user name and its group names.
+   * The first entry in the groups list is the default  group.
+   * 
+   * @param userName a user's name
+   * @param groupNames groups list, first of which is the default group
+   * @exception IllegalArgumentException if any argument is null
+   */
+  public UnixUserGroupInformation(String userName, String[] groupNames) {
+    setUserGroupNames(userName, groupNames);
+  }
+
+  /** Constructor with parameter user/group names
+   * 
+   * @param ugi an array containing user/group names, the first
+   *                     element of which is the user name, the second of
+   *                     which is the default group name.
+   * @exception IllegalArgumentException if the array size is less than 2 
+   *                                     or any element is null.
+   */
+  public UnixUserGroupInformation(String[] ugi) {
+    if (ugi==null || ugi.length < 2) {
+      throw new IllegalArgumentException( "Parameter does contain at least "+
+          "one user name and one group name");
+    }
+    String[] groupNames = new String[ugi.length-1];
+    System.arraycopy(ugi, 1, groupNames, 0, groupNames.length);
+    setUserGroupNames(ugi[0], groupNames);
+  }
+  
+  /* Set this object's user name and group names
+   * 
+   * @param userName a user's name
+   * @param groupNames groups list, the first of which is the default group
+   * @exception IllegalArgumentException if any argument is null
+   */
+  private void setUserGroupNames(String userName, String[] groupNames) {
+    if (userName==null || userName.length()==0 ||
+        groupNames== null || groupNames.length==0) {
+      throw new IllegalArgumentException(
+          "Parameters should not be null or an empty string/array");
+    }
+    for (int i=0; i<groupNames.length; i++) {
+      if(groupNames[i] == null || groupNames[i].length() == 0) {
+        throw new IllegalArgumentException("A null group name at index " + i);
+      }
+    }
+    this.userName = userName;
+    this.groupNames = groupNames;
+  }
+
+  /** Return an array of group names
+   */
+  public String[] getGroupNames() {
+    return groupNames;
+  }
+
+  /** Return the user's name
+   */
+  public String getUserName() {
+    return userName;
+  }
+
+  /* The following two methods implements Writable interface */
+  final private static String UGI_TECHNOLOGY = "STRING_UGI"; 
+  /** Deserialize this object
+   * First check if this is a UGI in the string format.
+   * If no, throw an IOException; otherwise
+   * set this object's fields by reading them from the given data input
+   *  
+   *  @param in input stream
+   *  @exception IOException is thrown if encounter any error when reading
+   */
+  public void readFields(DataInput in) throws IOException {
+    // read UGI type first
+    String ugiType = Text.readString(in);
+    if (!UGI_TECHNOLOGY.equals(ugiType)) {
+      throw new IOException("Expect UGI prefix: " + UGI_TECHNOLOGY +
+          ", but receive a prefix: " + ugiType);
+    }
+    
+    // read this object
+    userName = Text.readString(in);
+    int numOfGroups = WritableUtils.readVInt(in);
+    groupNames = new String[numOfGroups];
+    for (int i = 0; i < numOfGroups; i++) {
+      groupNames[i] = Text.readString(in);
+    }
+  }
+
+  /** Serialize this object
+   * First write a string marking that this is a UGI in the string format,
+   * then write this object's serialized form to the given data output
+   * 
+   * @param out output stream
+   * @exception IOException if encounter any error during writing
+   */
+  public void write(DataOutput out) throws IOException {
+    // write a prefix indicating the type of UGI being written
+    Text.writeString(out, UGI_TECHNOLOGY);
+    // write this object
+    Text.writeString(out, userName);
+    WritableUtils.writeVInt(out, groupNames.length);
+    for (String groupName : groupNames) {
+      Text.writeString(out, groupName);
+    }
+  }
+
+  /* The following two methods deal with transferring UGI through conf. 
+   * In this pass of implementation we store UGI as a string in conf. 
+   * Later we may change it to be a more general approach that stores 
+   * it as a byte array */
+  /** Store the given <code>ugi</code> as a comma separated string in
+   * <code>conf</code> as a property <code>attr</code>
+   * 
+   * The String starts with the user name followed by the default group names,
+   * and other group names.
+   * 
+   * @param conf configuration
+   * @param attr property name
+   * @param ugi a UnixUserGroupInformation
+   */
+  public static void saveToConf(Configuration conf, String attr, 
+      UnixUserGroupInformation ugi ) {
+    conf.set(attr, ugi.toString());
+  }
+  
+  /** Read a UGI from the given <code>conf</code>
+   * 
+   * The object is expected to store with the property name <code>attr</code>
+   * as a comma separated string that starts
+   * with the user name followed by group names.
+   * If the property name is not defined, return null.
+   * It's assumed that there is only one UGI per user. If this user already
+   * has a UGI in the ugi map, return the ugi in the map.
+   * Otherwise, construct a UGI from the configuration, store it in the
+   * ugi map and return it.
+   * 
+   * @param conf configuration
+   * @param attr property name
+   * @return a UnixUGI
+   * @throws LoginException if the stored string is ill-formatted.
+   */
+  public static UnixUserGroupInformation readFromConf(
+      Configuration conf, String attr) throws LoginException {
+    String[] ugi = conf.getStrings(attr);
+    if(ugi == null) {
+      return null;
+    }
+    UnixUserGroupInformation currentUGI = null;
+    if (ugi.length>0 ){
+      currentUGI = user2UGIMap.get(ugi[0]);
+    }
+    if (currentUGI == null) {
+      try {
+        currentUGI = new UnixUserGroupInformation(ugi);
+        user2UGIMap.put(currentUGI.getUserName(), currentUGI);
+      } catch (IllegalArgumentException e) {
+        throw new LoginException("Login failed: "+e.getMessage());
+      }
+    }
+    
+    return currentUGI;
+  }
+  
+  /**
+   * Get current user's name and the names of all its groups from Unix.
+   * It's assumed that there is only one UGI per user. If this user already
+   * has a UGI in the ugi map, return the ugi in the map.
+   * Otherwise get the current user's information from Unix, store it
+   * in the map, and return it.
+   *
+   * If the current user's UNIX username or groups are configured in such a way
+   * to throw an Exception, for example if the user uses LDAP, then this method
+   * will use a the {@link #DEFAULT_USERNAME} and {@link #DEFAULT_GROUP}
+   * constants.
+   */
+  public static UnixUserGroupInformation login() throws LoginException {
+    try {
+      String userName;
+
+      // if an exception occurs, then uses the
+      // default user
+      try {
+        userName =  getUnixUserName();
+      } catch (Exception e) {
+        userName = DEFAULT_USERNAME;
+      }
+
+      // check if this user already has a UGI object in the ugi map
+      UnixUserGroupInformation ugi = user2UGIMap.get(userName);
+      if (ugi != null) {
+        return ugi;
+      }
+
+      /* get groups list from UNIX. 
+       * It's assumed that the first group is the default group.
+       */
+      String[]  groupNames;
+
+      // if an exception occurs, then uses the
+      // default group
+      try {
+        groupNames = getUnixGroups();
+      } catch (Exception e) {
+        groupNames = new String[1];
+        groupNames[0] = DEFAULT_GROUP;
+      }
+
+      // construct a Unix UGI
+      ugi = new UnixUserGroupInformation(userName, groupNames);
+      user2UGIMap.put(ugi.getUserName(), ugi);
+      return ugi;
+    } catch (Exception e) {
+      throw new LoginException("Login failed: "+e.getMessage());
+    }
+  }
+
+  /** Equivalent to login(conf, false). */
+  public static UnixUserGroupInformation login(Configuration conf)
+    throws LoginException {
+    return login(conf, false);
+  }
+  
+  /** Get a user's name & its group names from the given configuration; 
+   * If it is not defined in the configuration, get the current user's
+   * information from Unix.
+   * If the user has a UGI in the ugi map, return the one in
+   * the UGI map.
+   * 
+   *  @param conf either a job configuration or client's configuration
+   *  @param save saving it to conf?
+   *  @return UnixUserGroupInformation a user/group information
+   *  @exception LoginException if not able to get the user/group information
+   */
+  public static UnixUserGroupInformation login(Configuration conf, boolean save
+      ) throws LoginException {
+    UnixUserGroupInformation ugi = readFromConf(conf, UGI_PROPERTY_NAME);
+    if (ugi == null) {
+      ugi = login();
+      LOG.debug("Unix Login: " + ugi);
+      if (save) {
+        saveToConf(conf, UGI_PROPERTY_NAME, ugi);
+      }
+    }
+    return ugi;
+  }
+  
+  /* Return a string representation of a string array.
+   * Two strings are separated by a blank.
+   */
+  private static String toString(String[] strArray) {
+    if (strArray==null || strArray.length==0) {
+      return "";
+    }
+    StringBuilder buf = new StringBuilder(strArray[0]);
+    for (int i=1; i<strArray.length; i++) {
+      buf.append(' ');
+      buf.append(strArray[i]);
+    }
+    return buf.toString();
+  }
+  
+  /** Get current user's name from Unix by running the command whoami.
+   * 
+   * @return current user's name
+   * @throws IOException if encounter any error while running the command
+   */
+  static String getUnixUserName() throws IOException {
+    String[] result = executeShellCommand(
+        new String[]{Shell.USER_NAME_COMMAND});
+    if (result.length!=1) {
+      throw new IOException("Expect one token as the result of " + 
+          Shell.USER_NAME_COMMAND + ": " + toString(result));
+    }
+    return result[0];
+  }
+
+  /** Get the current user's group list from Unix by running the command groups
+   * 
+   * @return the groups list that the current user belongs to
+   * @throws IOException if encounter any error when running the command
+   */
+  private static String[] getUnixGroups() throws IOException {
+    return executeShellCommand(Shell.getGROUPS_COMMAND());
+  }
+  
+  /* Execute a command and return the result as an array of Strings */
+  private static String[] executeShellCommand(String[] command)
+  throws IOException {
+    String groups = Shell.execCommand(command);
+    StringTokenizer tokenizer = new StringTokenizer(groups);
+    int numOfTokens = tokenizer.countTokens();
+    String[] tokens = new String[numOfTokens];
+    for (int i=0; tokenizer.hasMoreTokens(); i++) {
+      tokens[i] = tokenizer.nextToken();
+    }
+
+    return tokens;
+  }
+
+  /** Decide if two UGIs are the same
+   *
+   * @param other other object
+   * @return true if they are the same; false otherwise.
+   */
+  public boolean equals(Object other) {
+    if (this == other) {
+      return true;
+    }
+    
+    if (!(other instanceof UnixUserGroupInformation)) {
+      return false;
+    }
+    
+    UnixUserGroupInformation otherUGI = (UnixUserGroupInformation)other;
+    
+    // check userName
+    if (userName == null) {
+      if (otherUGI.getUserName() != null) {
+        return false;
+      }
+    } else {
+      if (!userName.equals(otherUGI.getUserName())) {
+        return false;
+      }
+    }
+    
+    // checkGroupNames
+    if (groupNames == otherUGI.groupNames) {
+      return true;
+    }
+    if (groupNames.length != otherUGI.groupNames.length) {
+      return false;
+    }
+    // check default group name
+    if (groupNames.length>0 && !groupNames[0].equals(otherUGI.groupNames[0])) {
+      return false;
+    }
+    // check all group names, ignoring the order
+    return new TreeSet<String>(Arrays.asList(groupNames)).equals(
+           new TreeSet<String>(Arrays.asList(otherUGI.groupNames)));
+  }
+
+  /** Returns a hash code for this UGI. 
+   * The hash code for a UGI is the hash code of its user name string.
+   * 
+   * @return  a hash code value for this UGI.
+   */
+  public int hashCode() {
+    return getUserName().hashCode();
+  }
+  
+  /** Convert this object to a string
+   * 
+   * @return a comma separated string containing the user name and group names
+   */
+  public String toString() {
+    StringBuilder buf = new StringBuilder();
+    buf.append(userName);
+    for (String groupName : groupNames) {
+      buf.append(',');
+      buf.append(groupName);
+    }
+    return buf.toString();
+  }
+
+  @Override
+  public String getName() {
+    return toString();
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/User.java b/src/java/org/apache/hadoop/security/User.java
new file mode 100644
index 00000000000..dd62debcf8d
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/User.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security;
+
+import java.security.Principal;
+
+/**
+ * The username of a user.
+ */
+public class User implements Principal {
+  final String user;
+
+  /**
+   * Create a new <code>User</code> with the given username.
+   * @param user user name
+   */
+  public User(String user) {
+    this.user = user;
+  }
+  
+  @Override
+  public String getName() {
+    return user;
+  }
+
+  @Override
+  public String toString() {
+    return user;
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+    result = prime * result + ((user == null) ? 0 : user.hashCode());
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    User other = (User) obj;
+    if (user == null) {
+      if (other.user != null)
+        return false;
+    } else if (!user.equals(other.user))
+      return false;
+    return true;
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/UserGroupInformation.java b/src/java/org/apache/hadoop/security/UserGroupInformation.java
new file mode 100644
index 00000000000..ada9dcf2958
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/UserGroupInformation.java
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security;
+
+import java.io.IOException;
+import java.security.AccessController;
+import java.security.Principal;
+import java.util.Set;
+
+import javax.security.auth.Subject;
+import javax.security.auth.login.LoginException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Writable;
+
+/** A {@link Writable} abstract class for storing user and groups information.
+ */
+public abstract class UserGroupInformation implements Writable, Principal {
+  public static final Log LOG = LogFactory.getLog(UserGroupInformation.class);
+  private static UserGroupInformation LOGIN_UGI = null;
+  
+  private static final ThreadLocal<Subject> currentUser =
+    new ThreadLocal<Subject>();
+  
+  /** @return the {@link UserGroupInformation} for the current thread */ 
+  public static UserGroupInformation getCurrentUGI() {
+    Subject user = getCurrentUser();
+    
+    if (user == null) {
+      user = currentUser.get();
+      if (user == null) {
+        return null;
+      }
+    }
+    
+    Set<UserGroupInformation> ugiPrincipals = 
+      user.getPrincipals(UserGroupInformation.class);
+    
+    UserGroupInformation ugi = null;
+    if (ugiPrincipals != null && ugiPrincipals.size() == 1) {
+      ugi = ugiPrincipals.iterator().next();
+      if (ugi == null) {
+        throw new RuntimeException("Cannot find _current user_ UGI in the Subject!");
+      }
+    } else {
+      throw new RuntimeException("Cannot resolve current user from subject, " +
+      		                       "which had " + ugiPrincipals.size() + 
+      		                       " UGI principals!");
+    }
+    return ugi;
+  }
+
+  /** 
+   * Set the {@link UserGroupInformation} for the current thread
+   * @deprecated Use {@link #setCurrentUser(UserGroupInformation)} 
+   */ 
+  @Deprecated
+  public static void setCurrentUGI(UserGroupInformation ugi) {
+    setCurrentUser(ugi);
+  }
+
+  /**
+   * Return the current user <code>Subject</code>.
+   * @return the current user <code>Subject</code>
+   */
+  static Subject getCurrentUser() {
+    return Subject.getSubject(AccessController.getContext());
+  }
+  
+  /**
+   * Set the {@link UserGroupInformation} for the current thread
+   * WARNING - This method should be used only in test cases and other exceptional
+   * cases!
+   * @param ugi {@link UserGroupInformation} for the current thread
+   */
+  public static void setCurrentUser(UserGroupInformation ugi) {
+    Subject user = SecurityUtil.getSubject(ugi);
+    currentUser.set(user);
+  }
+  
+  /** Get username
+   * 
+   * @return the user's name
+   */
+  public abstract String getUserName();
+  
+  /** Get the name of the groups that the user belong to
+   * 
+   * @return an array of group names
+   */
+  public abstract String[] getGroupNames();
+
+  /** Login and return a UserGroupInformation object. */
+  public static UserGroupInformation login(Configuration conf
+      ) throws LoginException {
+    if (LOGIN_UGI == null) {
+      LOGIN_UGI = UnixUserGroupInformation.login(conf);
+    }
+    return LOGIN_UGI;
+  }
+
+  /** Read a {@link UserGroupInformation} from conf */
+  public static UserGroupInformation readFrom(Configuration conf
+      ) throws IOException {
+    try {
+      return UnixUserGroupInformation.readFromConf(conf,
+        UnixUserGroupInformation.UGI_PROPERTY_NAME);
+    } catch (LoginException e) {
+      throw (IOException)new IOException().initCause(e);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/authorize/AuthorizationException.java b/src/java/org/apache/hadoop/security/authorize/AuthorizationException.java
new file mode 100644
index 00000000000..c001a2dd6f5
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/authorize/AuthorizationException.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security.authorize;
+
+import java.io.PrintStream;
+import java.io.PrintWriter;
+
+import org.apache.hadoop.security.AccessControlException;
+
+/**
+ * An exception class for authorization-related issues.
+ * 
+ * This class <em>does not</em> provide the stack trace for security purposes.
+ */
+public class AuthorizationException extends AccessControlException {
+  private static final long serialVersionUID = 1L;
+
+  public AuthorizationException() {
+    super();
+  }
+
+  public AuthorizationException(String message) {
+    super(message);
+  }
+  
+  /**
+   * Constructs a new exception with the specified cause and a detail
+   * message of <tt>(cause==null ? null : cause.toString())</tt> (which
+   * typically contains the class and detail message of <tt>cause</tt>).
+   * @param  cause the cause (which is saved for later retrieval by the
+   *         {@link #getCause()} method).  (A <tt>null</tt> value is
+   *         permitted, and indicates that the cause is nonexistent or
+   *         unknown.)
+   */
+  public AuthorizationException(Throwable cause) {
+    super(cause);
+  }
+  
+  private static StackTraceElement[] stackTrace = new StackTraceElement[0];
+  @Override
+  public StackTraceElement[] getStackTrace() {
+    // Do not provide the stack-trace
+    return stackTrace;
+  }
+
+  @Override
+  public void printStackTrace() {
+    // Do not provide the stack-trace
+  }
+
+  @Override
+  public void printStackTrace(PrintStream s) {
+    // Do not provide the stack-trace
+  }
+
+  @Override
+  public void printStackTrace(PrintWriter s) {
+    // Do not provide the stack-trace
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/security/authorize/ConfiguredPolicy.java b/src/java/org/apache/hadoop/security/authorize/ConfiguredPolicy.java
new file mode 100644
index 00000000000..6b90829aa85
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/authorize/ConfiguredPolicy.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security.authorize;
+
+import java.security.Permission;
+import java.security.PermissionCollection;
+import java.security.Policy;
+import java.security.Principal;
+import java.security.ProtectionDomain;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.Group;
+import org.apache.hadoop.security.User;
+import org.apache.hadoop.security.SecurityUtil.AccessControlList;
+
+/**
+ * A {@link Configuration} based security {@link Policy} for Hadoop.
+ *
+ * {@link ConfiguredPolicy} works in conjunction with a {@link PolicyProvider}
+ * for providing service-level authorization for Hadoop.
+ */
+public class ConfiguredPolicy extends Policy implements Configurable {
+  public static final String HADOOP_POLICY_FILE = "hadoop-policy.xml";
+  private static final Log LOG = LogFactory.getLog(ConfiguredPolicy.class);
+      
+  private Configuration conf;
+  private PolicyProvider policyProvider;
+  private volatile Map<Principal, Set<Permission>> permissions;
+  private volatile Set<Permission> allowedPermissions;
+
+  public ConfiguredPolicy(Configuration conf, PolicyProvider policyProvider) {
+    this.conf = conf;      
+    this.policyProvider = policyProvider;
+    refresh();
+  }
+
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    refresh();
+  }
+
+  @Override
+  public boolean implies(ProtectionDomain domain, Permission permission) {
+    // Only make checks for domains having principals 
+    if(domain.getPrincipals().length == 0) {
+      return true; 
+    }
+
+    return super.implies(domain, permission);
+  }
+
+  @Override
+  public PermissionCollection getPermissions(ProtectionDomain domain) {
+    PermissionCollection permissionCollection = super.getPermissions(domain);
+    for (Principal principal : domain.getPrincipals()) {
+      Set<Permission> principalPermissions = permissions.get(principal);
+      if (principalPermissions != null) {
+        for (Permission permission : principalPermissions) {
+          permissionCollection.add(permission);
+        }
+      }
+
+      for (Permission permission : allowedPermissions) {
+        permissionCollection.add(permission);
+      }
+    }
+    return permissionCollection;
+  }
+
+  @Override
+  public void refresh() {
+    // Get the system property 'hadoop.policy.file'
+    String policyFile = 
+      System.getProperty("hadoop.policy.file", HADOOP_POLICY_FILE);
+    
+    // Make a copy of the original config, and load the policy file
+    Configuration policyConf = new Configuration(conf);
+    policyConf.addResource(policyFile);
+    
+    Map<Principal, Set<Permission>> newPermissions = 
+      new HashMap<Principal, Set<Permission>>();
+    Set<Permission> newAllowPermissions = new HashSet<Permission>();
+
+    // Parse the config file
+    Service[] services = policyProvider.getServices();
+    if (services != null) {
+      for (Service service : services) {
+        AccessControlList acl = 
+          new AccessControlList(
+              policyConf.get(service.getServiceKey(), 
+                             AccessControlList.WILDCARD_ACL_VALUE)
+              );
+        
+        if (acl.allAllowed()) {
+          newAllowPermissions.add(service.getPermission());
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Policy - " + service.getPermission() + " * ");
+          }
+        } else {
+          for (String user : acl.getUsers()) {
+            addPermission(newPermissions, new User(user), service.getPermission());
+          }
+
+          for (String group : acl.getGroups()) {
+            addPermission(newPermissions, new Group(group), service.getPermission());
+          }
+        }
+      }
+    }
+
+    // Flip to the newly parsed permissions
+    allowedPermissions = newAllowPermissions;
+    permissions = newPermissions;
+  }
+
+  private void addPermission(Map<Principal, Set<Permission>> permissions,
+                             Principal principal, Permission permission) {
+    Set<Permission> principalPermissions = permissions.get(principal);
+    if (principalPermissions == null) {
+      principalPermissions = new HashSet<Permission>();
+      permissions.put(principal, principalPermissions);
+    }
+    principalPermissions.add(permission);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Policy - Adding  " + permission + " to " + principal);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/authorize/ConnectionPermission.java b/src/java/org/apache/hadoop/security/authorize/ConnectionPermission.java
new file mode 100644
index 00000000000..7099f0ee2c3
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/authorize/ConnectionPermission.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security.authorize;
+
+import java.security.Permission;
+
+import org.apache.hadoop.ipc.VersionedProtocol;
+
+/**
+ * {@link Permission} to initiate a connection to a given service.
+ */
+public class ConnectionPermission extends Permission {
+
+  private static final long serialVersionUID = 1L;
+  private final Class<?> protocol;
+
+  /**
+   * {@link ConnectionPermission} for a given service.
+   * @param protocol service to be accessed
+   */
+  public ConnectionPermission(Class<?> protocol) {
+    super(protocol.getName());
+    this.protocol = protocol;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj instanceof ConnectionPermission) {
+      return protocol == ((ConnectionPermission)obj).protocol;
+    }
+    return false;
+  }
+
+  @Override
+  public String getActions() {
+    return "ALLOW";
+  }
+
+  @Override
+  public int hashCode() {
+    return protocol.hashCode();
+  }
+
+  @Override
+  public boolean implies(Permission permission) {
+    if (permission instanceof ConnectionPermission) {
+      ConnectionPermission that = (ConnectionPermission)permission;
+      if (that.protocol.equals(VersionedProtocol.class)) {
+        return true;
+      }
+      return this.protocol.equals(that.protocol);
+    }
+    return false;
+  }
+
+  public String toString() {
+    return "ConnectionPermission(" + protocol.getName() + ")";
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/authorize/PolicyProvider.java b/src/java/org/apache/hadoop/security/authorize/PolicyProvider.java
new file mode 100644
index 00000000000..fb75b7f5818
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/authorize/PolicyProvider.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security.authorize;
+
+import java.security.Policy;
+
+/**
+ * {@link PolicyProvider} provides the {@link Service} definitions to the
+ * security {@link Policy} in effect for Hadoop.
+ *
+ */
+public abstract class PolicyProvider {
+
+  /**
+   * Configuration key for the {@link PolicyProvider} implementation.
+   */
+  public static final String POLICY_PROVIDER_CONFIG = 
+    "hadoop.security.authorization.policyprovider";
+  
+  /**
+   * A default {@link PolicyProvider} without any defined services.
+   */
+  public static final PolicyProvider DEFAULT_POLICY_PROVIDER =
+    new PolicyProvider() {
+    public Service[] getServices() {
+      return null;
+    }
+  };
+  
+  /**
+   * Get the {@link Service} definitions from the {@link PolicyProvider}.
+   * @return the {@link Service} definitions
+   */
+  public abstract Service[] getServices();
+}
diff --git a/src/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java b/src/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java
new file mode 100644
index 00000000000..7f9b530a37c
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security.authorize;
+
+import java.io.IOException;
+
+import org.apache.hadoop.ipc.VersionedProtocol;
+
+/**
+ * Protocol which is used to refresh the authorization policy in use currently.
+ */
+public interface RefreshAuthorizationPolicyProtocol extends VersionedProtocol {
+  
+  /**
+   * Version 1: Initial version
+   */
+  public static final long versionID = 1L;
+
+  /**
+   * Refresh the service-level authorization policy in-effect.
+   * @throws IOException
+   */
+  void refreshServiceAcl() throws IOException;
+}
diff --git a/src/java/org/apache/hadoop/security/authorize/Service.java b/src/java/org/apache/hadoop/security/authorize/Service.java
new file mode 100644
index 00000000000..cd0f8ed9799
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/authorize/Service.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security.authorize;
+
+import java.security.Permission;
+
+/**
+ * An abstract definition of <em>service</em> as related to 
+ * Service Level Authorization for Hadoop.
+ * 
+ * Each service defines it's configuration key and also the necessary
+ * {@link Permission} required to access the service.
+ */
+public class Service {
+  private String key;
+  private Permission permission;
+  
+  public Service(String key, Class<?> protocol) {
+    this.key = key;
+    this.permission = new ConnectionPermission(protocol);
+  }
+  
+  /**
+   * Get the configuration key for the service.
+   * @return the configuration key for the service
+   */
+  public String getServiceKey() {
+    return key;
+  }
+  
+  /**
+   * Get the {@link Permission} required to access the service.
+   * @return the {@link Permission} required to access the service
+   */
+  public Permission getPermission() {
+    return permission;
+  }
+}
diff --git a/src/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java b/src/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java
new file mode 100644
index 00000000000..3573467afaa
--- /dev/null
+++ b/src/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security.authorize;
+
+import java.security.AccessControlException;
+import java.security.AccessController;
+import java.security.Permission;
+import java.security.PrivilegedActionException;
+import java.security.PrivilegedExceptionAction;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.security.auth.Subject;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.security.UserGroupInformation;
+
+/**
+ * An authorization manager which handles service-level authorization
+ * for incoming service requests.
+ */
+public class ServiceAuthorizationManager {
+
+  private static final Log LOG = 
+    LogFactory.getLog(ServiceAuthorizationManager.class);
+  
+  /**
+   * Configuration key for controlling service-level authorization for Hadoop.
+   */
+  public static final String SERVICE_AUTHORIZATION_CONFIG = 
+    "hadoop.security.authorization";
+  
+  private static Map<Class<?>, Permission> protocolToPermissionMap = 
+    Collections.synchronizedMap(new HashMap<Class<?>, Permission>());
+
+  /**
+   * Authorize the user to access the protocol being used.
+   * 
+   * @param user user accessing the service 
+   * @param protocol service being accessed
+   * @throws AuthorizationException on authorization failure
+   */
+  public static void authorize(Subject user, Class<?> protocol) 
+  throws AuthorizationException {
+    Permission permission = protocolToPermissionMap.get(protocol);
+    if (permission == null) {
+      permission = new ConnectionPermission(protocol);
+      protocolToPermissionMap.put(protocol, permission);
+    }
+    
+    checkPermission(user, permission);
+  }
+  
+  /**
+   * Check if the given {@link Subject} has all of necessary {@link Permission} 
+   * set.
+   * 
+   * @param user <code>Subject</code> to be authorized
+   * @param permissions <code>Permission</code> set
+   * @throws AuthorizationException if the authorization failed
+   */
+  private static void checkPermission(final Subject user, 
+                                      final Permission... permissions) 
+  throws AuthorizationException {
+    try{
+      Subject.doAs(user, 
+                   new PrivilegedExceptionAction<Void>() {
+                     @Override
+                     public Void run() throws Exception {
+                       try {
+                         for(Permission permission : permissions) {
+                           AccessController.checkPermission(permission);
+                         }
+                       } catch (AccessControlException ace) {
+                         LOG.info("Authorization failed for " + 
+                                  UserGroupInformation.getCurrentUGI(), ace);
+                         throw new AuthorizationException(ace);
+                       }
+                      return null;
+                     }
+                   }
+                  );
+    } catch (PrivilegedActionException e) {
+      throw new AuthorizationException(e.getException());
+    }
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/util/CyclicIteration.java b/src/java/org/apache/hadoop/util/CyclicIteration.java
new file mode 100644
index 00000000000..4bfd96a3296
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/CyclicIteration.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.NoSuchElementException;
+
+/** Provide an cyclic {@link Iterator} for a {@link NavigableMap}.
+ * The {@link Iterator} navigates the entries of the map
+ * according to the map's ordering.
+ * If the {@link Iterator} hits the last entry of the map,
+ * it will then continue from the first entry.
+ */
+public class CyclicIteration<K, V> implements Iterable<Map.Entry<K, V>> {
+  private final NavigableMap<K, V> navigablemap;
+  private final NavigableMap<K, V> tailmap;
+
+  /** Construct an {@link Iterable} object,
+   * so that an {@link Iterator} can be created  
+   * for iterating the given {@link NavigableMap}.
+   * The iteration begins from the starting key exclusively.
+   */
+  public CyclicIteration(NavigableMap<K, V> navigablemap, K startingkey) {
+    if (navigablemap == null || navigablemap.isEmpty()) {
+      this.navigablemap = null;
+      this.tailmap = null;
+    }
+    else {
+      this.navigablemap = navigablemap;
+      this.tailmap = navigablemap.tailMap(startingkey, false); 
+    }
+  }
+
+  /** {@inheritDoc} */
+  public Iterator<Map.Entry<K, V>> iterator() {
+    return new CyclicIterator();
+  }
+
+  /** An {@link Iterator} for {@link CyclicIteration}. */
+  private class CyclicIterator implements Iterator<Map.Entry<K, V>> {
+    private boolean hasnext;
+    private Iterator<Map.Entry<K, V>> i;
+    /** The first entry to begin. */
+    private final Map.Entry<K, V> first;
+    /** The next entry. */
+    private Map.Entry<K, V> next;
+    
+    private CyclicIterator() {
+      hasnext = navigablemap != null;
+      if (hasnext) {
+        i = tailmap.entrySet().iterator();
+        first = nextEntry();
+        next = first;
+      }
+      else {
+        i = null;
+        first = null;
+        next = null;
+      }
+    }
+
+    private Map.Entry<K, V> nextEntry() {
+      if (!i.hasNext()) {
+        i = navigablemap.entrySet().iterator();
+      }
+      return i.next();
+    }
+
+    /** {@inheritDoc} */
+    public boolean hasNext() {
+      return hasnext;
+    }
+
+    /** {@inheritDoc} */
+    public Map.Entry<K, V> next() {
+      if (!hasnext) {
+        throw new NoSuchElementException();
+      }
+
+      final Map.Entry<K, V> curr = next;
+      next = nextEntry();
+      hasnext = !next.equals(first);
+      return curr;
+    }
+
+    /** Not supported */
+    public void remove() {
+      throw new UnsupportedOperationException("Not supported");
+    }
+  }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/util/Daemon.java b/src/java/org/apache/hadoop/util/Daemon.java
new file mode 100644
index 00000000000..71928af80d0
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/Daemon.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+/** A thread that has called {@link Thread#setDaemon(boolean) } with true.*/
+public class Daemon extends Thread {
+
+  {
+    setDaemon(true);                              // always a daemon
+  }
+
+  Runnable runnable = null;
+  /** Construct a daemon thread. */
+  public Daemon() {
+    super();
+  }
+
+  /** Construct a daemon thread. */
+  public Daemon(Runnable runnable) {
+    super(runnable);
+    this.runnable = runnable;
+    this.setName(((Object)runnable).toString());
+  }
+
+  /** Construct a daemon thread to be part of a specified thread group. */
+  public Daemon(ThreadGroup group, Runnable runnable) {
+    super(group, runnable);
+    this.runnable = runnable;
+    this.setName(((Object)runnable).toString());
+  }
+
+  public Runnable getRunnable() {
+    return runnable;
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/DataChecksum.java b/src/java/org/apache/hadoop/util/DataChecksum.java
new file mode 100644
index 00000000000..9aa339025b3
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/DataChecksum.java
@@ -0,0 +1,247 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.util.zip.Checksum;
+import java.util.zip.CRC32;
+
+import java.io.*;
+
+/**
+ * This class provides inteface and utilities for processing checksums for
+ * DFS data transfers.
+ */
+
+public class DataChecksum implements Checksum {
+  
+  // Misc constants
+  public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len
+  
+  // checksum types
+  public static final int CHECKSUM_NULL    = 0;
+  public static final int CHECKSUM_CRC32   = 1;
+  
+  private static final int CHECKSUM_NULL_SIZE  = 0;
+  private static final int CHECKSUM_CRC32_SIZE = 4;
+  
+  
+  public static DataChecksum newDataChecksum( int type, int bytesPerChecksum ) {
+    if ( bytesPerChecksum <= 0 ) {
+      return null;
+    }
+    
+    switch ( type ) {
+    case CHECKSUM_NULL :
+      return new DataChecksum( CHECKSUM_NULL, new ChecksumNull(), 
+                               CHECKSUM_NULL_SIZE, bytesPerChecksum );
+    case CHECKSUM_CRC32 :
+      return new DataChecksum( CHECKSUM_CRC32, new CRC32(), 
+                               CHECKSUM_CRC32_SIZE, bytesPerChecksum );
+    default:
+      return null;  
+    }
+  }
+  
+  /**
+   * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
+   * @return DataChecksum of the type in the array or null in case of an error.
+   */
+  public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
+    if ( offset < 0 || bytes.length < offset + HEADER_LEN ) {
+      return null;
+    }
+    
+    // like readInt():
+    int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 
+                           ( (bytes[offset+2] & 0xff) << 16 ) |
+                           ( (bytes[offset+3] & 0xff) << 8 )  |
+                           ( (bytes[offset+4] & 0xff) );
+    return newDataChecksum( bytes[0], bytesPerChecksum );
+  }
+  
+  /**
+   * This constructucts a DataChecksum by reading HEADER_LEN bytes from
+   * input stream <i>in</i>
+   */
+  public static DataChecksum newDataChecksum( DataInputStream in )
+                                 throws IOException {
+    int type = in.readByte();
+    int bpc = in.readInt();
+    DataChecksum summer = newDataChecksum( type, bpc );
+    if ( summer == null ) {
+      throw new IOException( "Could not create DataChecksum of type " +
+                             type + " with bytesPerChecksum " + bpc );
+    }
+    return summer;
+  }
+  
+  /**
+   * Writes the checksum header to the output stream <i>out</i>.
+   */
+  public void writeHeader( DataOutputStream out ) 
+                           throws IOException { 
+    out.writeByte( type );
+    out.writeInt( bytesPerChecksum );
+  }
+
+  public byte[] getHeader() {
+    byte[] header = new byte[DataChecksum.HEADER_LEN];
+    header[0] = (byte) (type & 0xff);
+    // Writing in buffer just like DataOutput.WriteInt()
+    header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
+    header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
+    header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
+    header[1+3] = (byte) (bytesPerChecksum & 0xff);
+    return header;
+  }
+  
+  /**
+   * Writes the current checksum to the stream.
+   * If <i>reset</i> is true, then resets the checksum.
+   * @return number of bytes written. Will be equal to getChecksumSize();
+   */
+   public int writeValue( DataOutputStream out, boolean reset )
+                          throws IOException {
+     if ( size <= 0 ) {
+       return 0;
+     }
+
+     if ( type == CHECKSUM_CRC32 ) {
+       out.writeInt( (int) summer.getValue() );
+     } else {
+       throw new IOException( "Unknown Checksum " + type );
+     }
+     
+     if ( reset ) {
+       reset();
+     }
+     
+     return size;
+   }
+   
+   /**
+    * Writes the current checksum to a buffer.
+    * If <i>reset</i> is true, then resets the checksum.
+    * @return number of bytes written. Will be equal to getChecksumSize();
+    */
+    public int writeValue( byte[] buf, int offset, boolean reset )
+                           throws IOException {
+      if ( size <= 0 ) {
+        return 0;
+      }
+
+      if ( type == CHECKSUM_CRC32 ) {
+        int checksum = (int) summer.getValue();
+        buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
+        buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
+        buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
+        buf[offset+3] = (byte) (checksum & 0xff);
+      } else {
+        throw new IOException( "Unknown Checksum " + type );
+      }
+      
+      if ( reset ) {
+        reset();
+      }
+      
+      return size;
+    }
+   
+   /**
+    * Compares the checksum located at buf[offset] with the current checksum.
+    * @return true if the checksum matches and false otherwise.
+    */
+   public boolean compare( byte buf[], int offset ) {
+     if ( size > 0 && type == CHECKSUM_CRC32 ) {
+       int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 
+                      ( (buf[offset+1] & 0xff) << 16 ) |
+                      ( (buf[offset+2] & 0xff) << 8 )  |
+                      ( (buf[offset+3] & 0xff) );
+       return checksum == (int) summer.getValue();
+     }
+     return size == 0;
+   }
+   
+  private final int type;
+  private final int size;
+  private final Checksum summer;
+  private final int bytesPerChecksum;
+  private int inSum = 0;
+  
+  private DataChecksum( int checksumType, Checksum checksum,
+                        int sumSize, int chunkSize ) {
+    type = checksumType;
+    summer = checksum;
+    size = sumSize;
+    bytesPerChecksum = chunkSize;
+  }
+  
+  // Accessors
+  public int getChecksumType() {
+    return type;
+  }
+  public int getChecksumSize() {
+    return size;
+  }
+  public int getBytesPerChecksum() {
+    return bytesPerChecksum;
+  }
+  public int getNumBytesInSum() {
+    return inSum;
+  }
+  
+  public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
+  static public int getChecksumHeaderSize() {
+    return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
+  }
+  //Checksum Interface. Just a wrapper around member summer.
+  public long getValue() {
+    return summer.getValue();
+  }
+  public void reset() {
+    summer.reset();
+    inSum = 0;
+  }
+  public void update( byte[] b, int off, int len ) {
+    if ( len > 0 ) {
+      summer.update( b, off, len );
+      inSum += len;
+    }
+  }
+  public void update( int b ) {
+    summer.update( b );
+    inSum += 1;
+  }
+  
+  /**
+   * This just provides a dummy implimentation for Checksum class
+   * This is used when there is no checksum available or required for 
+   * data
+   */
+  static class ChecksumNull implements Checksum {
+    
+    public ChecksumNull() {}
+    
+    //Dummy interface
+    public long getValue() { return 0; }
+    public void reset() {}
+    public void update(byte[] b, int off, int len) {}
+    public void update(int b) {}
+  };
+}
diff --git a/src/java/org/apache/hadoop/util/DiskChecker.java b/src/java/org/apache/hadoop/util/DiskChecker.java
new file mode 100644
index 00000000000..4c471dbce83
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/DiskChecker.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Class that provides utility functions for checking disk problem
+ */
+
+public class DiskChecker {
+
+  public static class DiskErrorException extends IOException {
+    public DiskErrorException(String msg) {
+      super(msg);
+    }
+  }
+    
+  public static class DiskOutOfSpaceException extends IOException {
+    public DiskOutOfSpaceException(String msg) {
+      super(msg);
+    }
+  }
+      
+  /** 
+   * The semantics of mkdirsWithExistsCheck method is different from the mkdirs
+   * method provided in the Sun's java.io.File class in the following way:
+   * While creating the non-existent parent directories, this method checks for
+   * the existence of those directories if the mkdir fails at any point (since
+   * that directory might have just been created by some other process).
+   * If both mkdir() and the exists() check fails for any seemingly 
+   * non-existent directory, then we signal an error; Sun's mkdir would signal
+   * an error (return false) if a directory it is attempting to create already
+   * exists or the mkdir fails.
+   * @param dir
+   * @return true on success, false on failure
+   */
+  public static boolean mkdirsWithExistsCheck(File dir) {
+    if (dir.mkdir() || dir.exists()) {
+      return true;
+    }
+    File canonDir = null;
+    try {
+      canonDir = dir.getCanonicalFile();
+    } catch (IOException e) {
+      return false;
+    }
+    String parent = canonDir.getParent();
+    return (parent != null) && 
+           (mkdirsWithExistsCheck(new File(parent)) &&
+                                      (canonDir.mkdir() || canonDir.exists()));
+  }
+  
+  public static void checkDir(File dir) throws DiskErrorException {
+    if (!mkdirsWithExistsCheck(dir))
+      throw new DiskErrorException("can not create directory: " 
+                                   + dir.toString());
+        
+    if (!dir.isDirectory())
+      throw new DiskErrorException("not a directory: " 
+                                   + dir.toString());
+            
+    if (!dir.canRead())
+      throw new DiskErrorException("directory is not readable: " 
+                                   + dir.toString());
+            
+    if (!dir.canWrite())
+      throw new DiskErrorException("directory is not writable: " 
+                                   + dir.toString());
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/util/GenericOptionsParser.java b/src/java/org/apache/hadoop/util/GenericOptionsParser.java
new file mode 100644
index 00000000000..28323787551
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/GenericOptionsParser.java
@@ -0,0 +1,408 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.net.URL;
+import java.net.URLClassLoader;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * <code>GenericOptionsParser</code> is a utility to parse command line
+ * arguments generic to the Hadoop framework. 
+ * 
+ * <code>GenericOptionsParser</code> recognizes several standarad command 
+ * line arguments, enabling applications to easily specify a namenode, a 
+ * jobtracker, additional configuration resources etc.
+ * 
+ * <h4 id="GenericOptions">Generic Options</h4>
+ * 
+ * <p>The supported generic options are:</p>
+ * <p><blockquote><pre>
+ *     -conf &lt;configuration file&gt;     specify a configuration file
+ *     -D &lt;property=value&gt;            use value for given property
+ *     -fs &lt;local|namenode:port&gt;      specify a namenode
+ *     -jt &lt;local|jobtracker:port&gt;    specify a job tracker
+ *     -files &lt;comma separated list of files&gt;    specify comma separated
+ *                            files to be copied to the map reduce cluster
+ *     -libjars &lt;comma separated list of jars&gt;   specify comma separated
+ *                            jar files to include in the classpath.
+ *     -archives &lt;comma separated list of archives&gt;    specify comma
+ *             separated archives to be unarchived on the compute machines.
+
+ * </pre></blockquote></p>
+ * 
+ * <p>The general command line syntax is:</p>
+ * <p><tt><pre>
+ * bin/hadoop command [genericOptions] [commandOptions]
+ * </pre></tt></p>
+ * 
+ * <p>Generic command line arguments <strong>might</strong> modify 
+ * <code>Configuration </code> objects, given to constructors.</p>
+ * 
+ * <p>The functionality is implemented using Commons CLI.</p>
+ *
+ * <p>Examples:</p>
+ * <p><blockquote><pre>
+ * $ bin/hadoop dfs -fs darwin:8020 -ls /data
+ * list /data directory in dfs with namenode darwin:8020
+ * 
+ * $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
+ * list /data directory in dfs with namenode darwin:8020
+ *     
+ * $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
+ * list /data directory in dfs with conf specified in hadoop-site.xml
+ *     
+ * $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
+ * submit a job to job tracker darwin:50020
+ *     
+ * $ bin/hadoop job -jt darwin:50020 -submit job.xml
+ * submit a job to job tracker darwin:50020
+ *     
+ * $ bin/hadoop job -jt local -submit job.xml
+ * submit a job to local runner
+ * 
+ * $ bin/hadoop jar -libjars testlib.jar 
+ * -archives test.tgz -files file.txt inputjar args
+ * job submission with libjars, files and archives
+ * </pre></blockquote></p>
+ *
+ * @see Tool
+ * @see ToolRunner
+ */
+public class GenericOptionsParser {
+
+  private static final Log LOG = LogFactory.getLog(GenericOptionsParser.class);
+  private Configuration conf;
+  private CommandLine commandLine;
+
+  /**
+   * Create an options parser with the given options to parse the args.
+   * @param opts the options
+   * @param args the command line arguments
+   */
+  public GenericOptionsParser(Options opts, String[] args) {
+    this(new Configuration(), new Options(), args);
+  }
+
+  /**
+   * Create an options parser to parse the args.
+   * @param args the command line arguments
+   */
+  public GenericOptionsParser(String[] args) {
+    this(new Configuration(), new Options(), args);
+  }
+  
+  /** 
+   * Create a <code>GenericOptionsParser<code> to parse only the generic Hadoop  
+   * arguments. 
+   * 
+   * The array of string arguments other than the generic arguments can be 
+   * obtained by {@link #getRemainingArgs()}.
+   * 
+   * @param conf the <code>Configuration</code> to modify.
+   * @param args command-line arguments.
+   */
+  public GenericOptionsParser(Configuration conf, String[] args) {
+    this(conf, new Options(), args); 
+  }
+
+  /** 
+   * Create a <code>GenericOptionsParser</code> to parse given options as well 
+   * as generic Hadoop options. 
+   * 
+   * The resulting <code>CommandLine</code> object can be obtained by 
+   * {@link #getCommandLine()}.
+   * 
+   * @param conf the configuration to modify  
+   * @param options options built by the caller 
+   * @param args User-specified arguments
+   */
+  public GenericOptionsParser(Configuration conf, Options options, String[] args) {
+    parseGeneralOptions(options, conf, args);
+    this.conf = conf;
+  }
+
+  /**
+   * Returns an array of Strings containing only application-specific arguments.
+   * 
+   * @return array of <code>String</code>s containing the un-parsed arguments
+   * or <strong>empty array</strong> if commandLine was not defined.
+   */
+  public String[] getRemainingArgs() {
+    return (commandLine == null) ? new String[]{} : commandLine.getArgs();
+  }
+
+  /**
+   * Get the modified configuration
+   * @return the configuration that has the modified parameters.
+   */
+  public Configuration getConfiguration() {
+    return conf;
+  }
+
+  /**
+   * Returns the commons-cli <code>CommandLine</code> object 
+   * to process the parsed arguments. 
+   * 
+   * Note: If the object is created with 
+   * {@link #GenericOptionsParser(Configuration, String[])}, then returned 
+   * object will only contain parsed generic options.
+   * 
+   * @return <code>CommandLine</code> representing list of arguments 
+   *         parsed against Options descriptor.
+   */
+  public CommandLine getCommandLine() {
+    return commandLine;
+  }
+
+  /**
+   * Specify properties of each generic option
+   */
+  @SuppressWarnings("static-access")
+  private static Options buildGeneralOptions(Options opts) {
+    Option fs = OptionBuilder.withArgName("local|namenode:port")
+    .hasArg()
+    .withDescription("specify a namenode")
+    .create("fs");
+    Option jt = OptionBuilder.withArgName("local|jobtracker:port")
+    .hasArg()
+    .withDescription("specify a job tracker")
+    .create("jt");
+    Option oconf = OptionBuilder.withArgName("configuration file")
+    .hasArg()
+    .withDescription("specify an application configuration file")
+    .create("conf");
+    Option property = OptionBuilder.withArgName("property=value")
+    .hasArgs()
+    .withArgPattern("=", 1)
+    .withDescription("use value for given property")
+    .create('D');
+    Option libjars = OptionBuilder.withArgName("paths")
+    .hasArg()
+    .withDescription("comma separated jar files to include in the classpath.")
+    .create("libjars");
+    Option files = OptionBuilder.withArgName("paths")
+    .hasArg()
+    .withDescription("comma separated files to be copied to the " +
+           "map reduce cluster")
+    .create("files");
+    Option archives = OptionBuilder.withArgName("paths")
+    .hasArg()
+    .withDescription("comma separated archives to be unarchived" +
+                     " on the compute machines.")
+    .create("archives");
+
+    opts.addOption(fs);
+    opts.addOption(jt);
+    opts.addOption(oconf);
+    opts.addOption(property);
+    opts.addOption(libjars);
+    opts.addOption(files);
+    opts.addOption(archives);
+
+    return opts;
+  }
+
+  /**
+   * Modify configuration according user-specified generic options
+   * @param conf Configuration to be modified
+   * @param line User-specified generic options
+   */
+  private void processGeneralOptions(Configuration conf,
+      CommandLine line) {
+    if (line.hasOption("fs")) {
+      FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
+    }
+
+    if (line.hasOption("jt")) {
+      conf.set("mapred.job.tracker", line.getOptionValue("jt"));
+    }
+    if (line.hasOption("conf")) {
+      String[] values = line.getOptionValues("conf");
+      for(String value : values) {
+        conf.addResource(new Path(value));
+      }
+    }
+    try {
+      if (line.hasOption("libjars")) {
+        conf.set("tmpjars", 
+                 validateFiles(line.getOptionValue("libjars"), conf));
+        //setting libjars in client classpath
+        URL[] libjars = getLibJars(conf);
+        if(libjars!=null && libjars.length>0) {
+          conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
+          Thread.currentThread().setContextClassLoader(
+              new URLClassLoader(libjars, 
+                  Thread.currentThread().getContextClassLoader()));
+        }
+      }
+      if (line.hasOption("files")) {
+        conf.set("tmpfiles", 
+                 validateFiles(line.getOptionValue("files"), conf));
+      }
+      if (line.hasOption("archives")) {
+        conf.set("tmparchives", 
+                  validateFiles(line.getOptionValue("archives"), conf));
+      }
+    } catch (IOException ioe) {
+      System.err.println(StringUtils.stringifyException(ioe));
+    }
+    if (line.hasOption('D')) {
+      String[] property = line.getOptionValues('D');
+      for(int i=0; i<property.length-1; i=i+2) {
+        if (property[i]!=null)
+          conf.set(property[i], property[i+1]);
+      }
+    }
+    conf.setBoolean("mapred.used.genericoptionsparser", true);
+  }
+  
+  /**
+   * If libjars are set in the conf, parse the libjars.
+   * @param conf
+   * @return libjar urls
+   * @throws IOException
+   */
+  public static URL[] getLibJars(Configuration conf) throws IOException {
+    String jars = conf.get("tmpjars");
+    if(jars==null) {
+      return null;
+    }
+    String[] files = jars.split(",");
+    URL[] cp = new URL[files.length];
+    for (int i=0;i<cp.length;i++) {
+      Path tmp = new Path(files[i]);
+      cp[i] = FileSystem.getLocal(conf).pathToFile(tmp).toURI().toURL();
+    }
+    return cp;
+  }
+
+  /**
+   * takes input as a comma separated list of files
+   * and verifies if they exist. It defaults for file:///
+   * if the files specified do not have a scheme.
+   * it returns the paths uri converted defaulting to file:///.
+   * So an input of  /home/user/file1,/home/user/file2 would return
+   * file:///home/user/file1,file:///home/user/file2
+   * @param files
+   * @return
+   */
+  private String validateFiles(String files, Configuration conf) throws IOException  {
+    if (files == null) 
+      return null;
+    String[] fileArr = files.split(",");
+    String[] finalArr = new String[fileArr.length];
+    for (int i =0; i < fileArr.length; i++) {
+      String tmp = fileArr[i];
+      String finalPath;
+      Path path = new Path(tmp);
+      URI pathURI =  path.toUri();
+      FileSystem localFs = FileSystem.getLocal(conf);
+      if (pathURI.getScheme() == null) {
+        //default to the local file system
+        //check if the file exists or not first
+        if (!localFs.exists(path)) {
+          throw new FileNotFoundException("File " + tmp + " does not exist.");
+        }
+        finalPath = path.makeQualified(localFs).toString();
+      }
+      else {
+        // check if the file exists in this file system
+        // we need to recreate this filesystem object to copy
+        // these files to the file system jobtracker is running
+        // on.
+        FileSystem fs = path.getFileSystem(conf);
+        if (!fs.exists(path)) {
+          throw new FileNotFoundException("File " + tmp + " does not exist.");
+        }
+        finalPath = path.makeQualified(fs).toString();
+        try {
+          fs.close();
+        } catch(IOException e){};
+      }
+      finalArr[i] = finalPath;
+    }
+    return StringUtils.arrayToString(finalArr);
+  }
+  
+
+  /**
+   * Parse the user-specified options, get the generic options, and modify
+   * configuration accordingly
+   * @param conf Configuration to be modified
+   * @param args User-specified arguments
+   * @return Command-specific arguments
+   */
+  private String[] parseGeneralOptions(Options opts, Configuration conf, 
+      String[] args) {
+    opts = buildGeneralOptions(opts);
+    CommandLineParser parser = new GnuParser();
+    try {
+      commandLine = parser.parse(opts, args, true);
+      processGeneralOptions(conf, commandLine);
+      return commandLine.getArgs();
+    } catch(ParseException e) {
+      LOG.warn("options parsing failed: "+e.getMessage());
+
+      HelpFormatter formatter = new HelpFormatter();
+      formatter.printHelp("general options are: ", opts);
+    }
+    return args;
+  }
+
+  /**
+   * Print the usage message for generic command-line options supported.
+   * 
+   * @param out stream to print the usage message to.
+   */
+  public static void printGenericCommandUsage(PrintStream out) {
+    
+    out.println("Generic options supported are");
+    out.println("-conf <configuration file>     specify an application configuration file");
+    out.println("-D <property=value>            use value for given property");
+    out.println("-fs <local|namenode:port>      specify a namenode");
+    out.println("-jt <local|jobtracker:port>    specify a job tracker");
+    out.println("-files <comma separated list of files>    " + 
+      "specify comma separated files to be copied to the map reduce cluster");
+    out.println("-libjars <comma separated list of jars>    " +
+      "specify comma separated jar files to include in the classpath.");
+    out.println("-archives <comma separated list of archives>    " +
+                "specify comma separated archives to be unarchived" +
+                " on the compute machines.\n");
+    out.println("The general command line syntax is");
+    out.println("bin/hadoop command [genericOptions] [commandOptions]\n");
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/util/GenericsUtil.java b/src/java/org/apache/hadoop/util/GenericsUtil.java
new file mode 100644
index 00000000000..ca4ea7ebb44
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/GenericsUtil.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.lang.reflect.Array;
+import java.util.List;
+
+/**
+ * Contains utility methods for dealing with Java Generics. 
+ */
+public class GenericsUtil {
+
+  /**
+   * Returns the Class object (of type <code>Class&lt;T&gt;</code>) of the  
+   * argument of type <code>T</code>. 
+   * @param <T> The type of the argument
+   * @param t the object to get it class
+   * @return <code>Class&lt;T&gt;</code>
+   */
+  public static <T> Class<T> getClass(T t) {
+    @SuppressWarnings("unchecked")
+    Class<T> clazz = (Class<T>)t.getClass();
+    return clazz;
+  }
+
+  /**
+   * Converts the given <code>List&lt;T&gt;</code> to a an array of 
+   * <code>T[]</code>.
+   * @param c the Class object of the items in the list
+   * @param list the list to convert
+   */
+  public static <T> T[] toArray(Class<T> c, List<T> list)
+  {
+    @SuppressWarnings("unchecked")
+    T[] ta= (T[])Array.newInstance(c, list.size());
+
+    for (int i= 0; i<list.size(); i++)
+      ta[i]= list.get(i);
+    return ta;
+  }
+
+
+  /**
+   * Converts the given <code>List&lt;T&gt;</code> to a an array of 
+   * <code>T[]</code>. 
+   * @param list the list to convert
+   * @throws ArrayIndexOutOfBoundsException if the list is empty. 
+   * Use {@link #toArray(Class, List)} if the list may be empty.
+   */
+  public static <T> T[] toArray(List<T> list) {
+    return toArray(getClass(list.get(0)), list);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/util/HeapSort.java b/src/java/org/apache/hadoop/util/HeapSort.java
new file mode 100644
index 00000000000..068a72e8778
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/HeapSort.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+/**
+ * An implementation of the core algorithm of HeapSort.
+ */
+public final class HeapSort implements IndexedSorter {
+
+  public HeapSort() { }
+
+  private static void downHeap(final IndexedSortable s, final int b,
+      int i, final int N) {
+    for (int idx = i << 1; idx < N; idx = i << 1) {
+      if (idx + 1 < N && s.compare(b + idx, b + idx + 1) < 0) {
+        if (s.compare(b + i, b + idx + 1) < 0) {
+          s.swap(b + i, b + idx + 1);
+        } else return;
+        i = idx + 1;
+      } else if (s.compare(b + i, b + idx) < 0) {
+        s.swap(b + i, b + idx);
+        i = idx;
+      } else return;
+    }
+  }
+
+  /**
+   * Sort the given range of items using heap sort.
+   * {@inheritDoc}
+   */
+  public void sort(IndexedSortable s, int p, int r) {
+    sort(s, p, r, null);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  public void sort(final IndexedSortable s, final int p, final int r,
+      final Progressable rep) {
+    final int N = r - p;
+    // build heap w/ reverse comparator, then write in-place from end
+    final int t = Integer.highestOneBit(N);
+    for (int i = t; i > 1; i >>>= 1) {
+      for (int j = i >>> 1; j < i; ++j) {
+        downHeap(s, p-1, j, N + 1);
+      }
+      if (null != rep) {
+        rep.progress();
+      }
+    }
+    for (int i = r - 1; i > p; --i) {
+      s.swap(p, i);
+      downHeap(s, p - 1, 1, i - p + 1);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/HostsFileReader.java b/src/java/org/apache/hadoop/util/HostsFileReader.java
new file mode 100644
index 00000000000..89cdb39b0c8
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/HostsFileReader.java
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.*;
+import java.util.Set;
+import java.util.HashSet;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+
+// Keeps track of which datanodes/tasktrackers are allowed to connect to the 
+// namenode/jobtracker.
+public class HostsFileReader {
+  private Set<String> includes;
+  private Set<String> excludes;
+  private String includesFile;
+  private String excludesFile;
+  
+  private static final Log LOG = LogFactory.getLog(HostsFileReader.class);
+
+  public HostsFileReader(String inFile, 
+                         String exFile) throws IOException {
+    includes = new HashSet<String>();
+    excludes = new HashSet<String>();
+    includesFile = inFile;
+    excludesFile = exFile;
+    refresh();
+  }
+
+  private void readFileToSet(String filename, Set<String> set) throws IOException {
+    File file = new File(filename);
+    if (!file.exists()) {
+      return;
+    }
+    FileInputStream fis = new FileInputStream(file);
+    BufferedReader reader = null;
+    try {
+      reader = new BufferedReader(new InputStreamReader(fis));
+      String line;
+      while ((line = reader.readLine()) != null) {
+        String[] nodes = line.split("[ \t\n\f\r]+");
+        if (nodes != null) {
+          for (int i = 0; i < nodes.length; i++) {
+            if (!nodes[i].equals("")) {
+              set.add(nodes[i]);  // might need to add canonical name
+            }
+          }
+        }
+      }   
+    } finally {
+      if (reader != null) {
+        reader.close();
+      }
+      fis.close();
+    }  
+  }
+
+  public synchronized void refresh() throws IOException {
+    LOG.info("Refreshing hosts (include/exclude) list");
+    if (!includesFile.equals("")) {
+      Set<String> newIncludes = new HashSet<String>();
+      readFileToSet(includesFile, newIncludes);
+      // switch the new hosts that are to be included
+      includes = newIncludes;
+    }
+    if (!excludesFile.equals("")) {
+      Set<String> newExcludes = new HashSet<String>();
+      readFileToSet(excludesFile, newExcludes);
+      // switch the excluded hosts
+      excludes = newExcludes;
+    }
+  }
+
+  public synchronized Set<String> getHosts() {
+    return includes;
+  }
+
+  public synchronized Set<String> getExcludedHosts() {
+    return excludes;
+  }
+
+  public synchronized void setIncludesFile(String includesFile) {
+    LOG.info("Setting the includes file to " + includesFile);
+    this.includesFile = includesFile;
+  }
+  
+  public synchronized void setExcludesFile(String excludesFile) {
+    LOG.info("Setting the excludes file to " + excludesFile);
+    this.excludesFile = excludesFile;
+  }
+
+  public synchronized void updateFileNames(String includesFile, 
+                                           String excludesFile) 
+                                           throws IOException {
+    setIncludesFile(includesFile);
+    setExcludesFile(excludesFile);
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/IndexedSortable.java b/src/java/org/apache/hadoop/util/IndexedSortable.java
new file mode 100644
index 00000000000..fdd758c519e
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/IndexedSortable.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+/**
+ * Interface for collections capable of being sorted by {@link IndexedSorter}
+ * algorithms.
+ */
+public interface IndexedSortable {
+
+  /**
+   * Compare items at the given addresses consistent with the semantics of
+   * {@link java.util.Comparator#compare(Object, Object)}.
+   */
+  int compare(int i, int j);
+
+  /**
+   * Swap items at the given addresses.
+   */
+  void swap(int i, int j);
+}
diff --git a/src/java/org/apache/hadoop/util/IndexedSorter.java b/src/java/org/apache/hadoop/util/IndexedSorter.java
new file mode 100644
index 00000000000..77c725fb2b7
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/IndexedSorter.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+/**
+ * Interface for sort algorithms accepting {@link IndexedSortable} items.
+ *
+ * A sort algorithm implementing this interface may only
+ * {@link IndexedSortable#compare} and {@link IndexedSortable#swap} items
+ * for a range of indices to effect a sort across that range.
+ */
+public interface IndexedSorter {
+
+  /**
+   * Sort the items accessed through the given IndexedSortable over the given
+   * range of logical indices. From the perspective of the sort algorithm,
+   * each index between l (inclusive) and r (exclusive) is an addressable
+   * entry.
+   * @see IndexedSortable#compare
+   * @see IndexedSortable#swap
+   */
+  void sort(IndexedSortable s, int l, int r);
+
+  /**
+   * Same as {@link #sort(IndexedSortable,int,int)}, but indicate progress
+   * periodically.
+   * @see #sort(IndexedSortable,int,int)
+   */
+  void sort(IndexedSortable s, int l, int r, Progressable rep);
+
+}
diff --git a/src/java/org/apache/hadoop/util/LineReader.java b/src/java/org/apache/hadoop/util/LineReader.java
new file mode 100644
index 00000000000..b5c6e6843dd
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/LineReader.java
@@ -0,0 +1,190 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+
+/**
+ * A class that provides a line reader from an input stream.
+ */
+public class LineReader {
+  private static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
+  private int bufferSize = DEFAULT_BUFFER_SIZE;
+  private InputStream in;
+  private byte[] buffer;
+  // the number of bytes of real data in the buffer
+  private int bufferLength = 0;
+  // the current position in the buffer
+  private int bufferPosn = 0;
+
+  private static final byte CR = '\r';
+  private static final byte LF = '\n';
+
+  /**
+   * Create a line reader that reads from the given stream using the
+   * default buffer-size (64k).
+   * @param in The input stream
+   * @throws IOException
+   */
+  public LineReader(InputStream in) {
+    this(in, DEFAULT_BUFFER_SIZE);
+  }
+
+  /**
+   * Create a line reader that reads from the given stream using the 
+   * given buffer-size.
+   * @param in The input stream
+   * @param bufferSize Size of the read buffer
+   * @throws IOException
+   */
+  public LineReader(InputStream in, int bufferSize) {
+    this.in = in;
+    this.bufferSize = bufferSize;
+    this.buffer = new byte[this.bufferSize];
+  }
+
+  /**
+   * Create a line reader that reads from the given stream using the
+   * <code>io.file.buffer.size</code> specified in the given
+   * <code>Configuration</code>.
+   * @param in input stream
+   * @param conf configuration
+   * @throws IOException
+   */
+  public LineReader(InputStream in, Configuration conf) throws IOException {
+    this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE));
+  }
+
+  /**
+   * Close the underlying stream.
+   * @throws IOException
+   */
+  public void close() throws IOException {
+    in.close();
+  }
+  
+  /**
+   * Read one line from the InputStream into the given Text.  A line
+   * can be terminated by one of the following: '\n' (LF) , '\r' (CR),
+   * or '\r\n' (CR+LF).  EOF also terminates an otherwise unterminated
+   * line.
+   *
+   * @param str the object to store the given line (without newline)
+   * @param maxLineLength the maximum number of bytes to store into str;
+   *  the rest of the line is silently discarded.
+   * @param maxBytesToConsume the maximum number of bytes to consume
+   *  in this call.  This is only a hint, because if the line cross
+   *  this threshold, we allow it to happen.  It can overshoot
+   *  potentially by as much as one buffer length.
+   *
+   * @return the number of bytes read including the (longest) newline
+   * found.
+   *
+   * @throws IOException if the underlying stream throws
+   */
+  public int readLine(Text str, int maxLineLength,
+                      int maxBytesToConsume) throws IOException {
+    /* We're reading data from in, but the head of the stream may be
+     * already buffered in buffer, so we have several cases:
+     * 1. No newline characters are in the buffer, so we need to copy
+     *    everything and read another buffer from the stream.
+     * 2. An unambiguously terminated line is in buffer, so we just
+     *    copy to str.
+     * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
+     *    in CR.  In this case we copy everything up to CR to str, but
+     *    we also need to see what follows CR: if it's LF, then we
+     *    need consume LF as well, so next call to readLine will read
+     *    from after that.
+     * We use a flag prevCharCR to signal if previous character was CR
+     * and, if it happens to be at the end of the buffer, delay
+     * consuming it until we have a chance to look at the char that
+     * follows.
+     */
+    str.clear();
+    int txtLength = 0; //tracks str.getLength(), as an optimization
+    int newlineLength = 0; //length of terminating newline
+    boolean prevCharCR = false; //true of prev char was CR
+    long bytesConsumed = 0;
+    do {
+      int startPosn = bufferPosn; //starting from where we left off the last time
+      if (bufferPosn >= bufferLength) {
+        startPosn = bufferPosn = 0;
+        if (prevCharCR)
+          ++bytesConsumed; //account for CR from previous read
+        bufferLength = in.read(buffer);
+        if (bufferLength <= 0)
+          break; // EOF
+      }
+      for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
+        if (buffer[bufferPosn] == LF) {
+          newlineLength = (prevCharCR) ? 2 : 1;
+          ++bufferPosn; // at next invocation proceed from following byte
+          break;
+        }
+        if (prevCharCR) { //CR + notLF, we are at notLF
+          newlineLength = 1;
+          break;
+        }
+        prevCharCR = (buffer[bufferPosn] == CR);
+      }
+      int readLength = bufferPosn - startPosn;
+      if (prevCharCR && newlineLength == 0)
+        --readLength; //CR at the end of the buffer
+      bytesConsumed += readLength;
+      int appendLength = readLength - newlineLength;
+      if (appendLength > maxLineLength - txtLength) {
+        appendLength = maxLineLength - txtLength;
+      }
+      if (appendLength > 0) {
+        str.append(buffer, startPosn, appendLength);
+        txtLength += appendLength;
+      }
+    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);
+
+    if (bytesConsumed > (long)Integer.MAX_VALUE)
+      throw new IOException("Too many bytes before newline: " + bytesConsumed);    
+    return (int)bytesConsumed;
+  }
+
+  /**
+   * Read from the InputStream into the given Text.
+   * @param str the object to store the given line
+   * @param maxLineLength the maximum number of bytes to store into str.
+   * @return the number of bytes read including the newline
+   * @throws IOException if the underlying stream throws
+   */
+  public int readLine(Text str, int maxLineLength) throws IOException {
+    return readLine(str, maxLineLength, Integer.MAX_VALUE);
+}
+
+  /**
+   * Read from the InputStream into the given Text.
+   * @param str the object to store the given line
+   * @return the number of bytes read including the newline
+   * @throws IOException if the underlying stream throws
+   */
+  public int readLine(Text str) throws IOException {
+    return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/util/LinuxMemoryCalculatorPlugin.java b/src/java/org/apache/hadoop/util/LinuxMemoryCalculatorPlugin.java
new file mode 100644
index 00000000000..3870a4715a3
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/LinuxMemoryCalculatorPlugin.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Plugin to calculate virtual and physical memories on Linux systems.
+ */
+public class LinuxMemoryCalculatorPlugin extends MemoryCalculatorPlugin {
+  private static final Log LOG =
+      LogFactory.getLog(LinuxMemoryCalculatorPlugin.class);
+
+  /**
+   * proc's meminfo virtual file has keys-values in the format
+   * "key:[ \t]*value[ \t]kB".
+   */
+  private static final String PROCFS_MEMFILE = "/proc/meminfo";
+  private static final Pattern PROCFS_MEMFILE_FORMAT =
+      Pattern.compile("^([a-zA-Z]*):[ \t]*([0-9]*)[ \t]kB");
+
+  // We just need the values for the keys MemTotal and SwapTotal
+  private static final String MEMTOTAL_STRING = "MemTotal";
+  private static final String SWAPTOTAL_STRING = "SwapTotal";
+
+  private long ramSize = 0;
+  private long swapSize = 0;
+
+  boolean readMemInfoFile = false;
+
+  private void readProcMemInfoFile() {
+
+    if (readMemInfoFile) {
+      return;
+    }
+
+    // Read "/proc/memInfo" file
+    BufferedReader in = null;
+    FileReader fReader = null;
+    try {
+      fReader = new FileReader(PROCFS_MEMFILE);
+      in = new BufferedReader(fReader);
+    } catch (FileNotFoundException f) {
+      // shouldn't happen....
+      return;
+    }
+
+    Matcher mat = null;
+
+    try {
+      String str = in.readLine();
+      while (str != null) {
+        mat = PROCFS_MEMFILE_FORMAT.matcher(str);
+        if (mat.find()) {
+          if (mat.group(1).equals(MEMTOTAL_STRING)) {
+            ramSize = Long.parseLong(mat.group(2));
+          } else if (mat.group(1).equals(SWAPTOTAL_STRING)) {
+            swapSize = Long.parseLong(mat.group(2));
+          }
+        }
+        str = in.readLine();
+      }
+    } catch (IOException io) {
+      LOG.warn("Error reading the stream " + io);
+    } finally {
+      // Close the streams
+      try {
+        fReader.close();
+        try {
+          in.close();
+        } catch (IOException i) {
+          LOG.warn("Error closing the stream " + in);
+        }
+      } catch (IOException i) {
+        LOG.warn("Error closing the stream " + fReader);
+      }
+    }
+
+    readMemInfoFile = true;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public long getPhysicalMemorySize() {
+    readProcMemInfoFile();
+    return ramSize * 1024;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public long getVirtualMemorySize() {
+    readProcMemInfoFile();
+    return (ramSize + swapSize) * 1024;
+  }
+
+  /**
+   * Test the {@link LinuxMemoryCalculatorPlugin}
+   * 
+   * @param args
+   */
+  public static void main(String[] args) {
+    LinuxMemoryCalculatorPlugin plugin = new LinuxMemoryCalculatorPlugin();
+    System.out.println("Physical memory Size(bytes) : "
+        + plugin.getPhysicalMemorySize());
+    System.out.println("Total Virtual memory Size(bytes) : "
+        + plugin.getVirtualMemorySize());
+  }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/util/MemoryCalculatorPlugin.java b/src/java/org/apache/hadoop/util/MemoryCalculatorPlugin.java
new file mode 100644
index 00000000000..a767b663d46
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/MemoryCalculatorPlugin.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+
+/**
+ * Plugin to calculate virtual and physical memories on the system.
+ * 
+ */
+public abstract class MemoryCalculatorPlugin extends Configured {
+
+  /**
+   * Obtain the total size of the virtual memory present in the system.
+   * 
+   * @return virtual memory size in bytes.
+   */
+  public abstract long getVirtualMemorySize();
+
+  /**
+   * Obtain the total size of the physical memory present in the system.
+   * 
+   * @return physical memory size bytes.
+   */
+  public abstract long getPhysicalMemorySize();
+
+  /**
+   * Get the MemoryCalculatorPlugin from the class name and configure it. If
+   * class name is null, this method will try and return a memory calculator
+   * plugin available for this system.
+   * 
+   * @param clazz class-name
+   * @param conf configure the plugin with this.
+   * @return MemoryCalculatorPlugin
+   */
+  public static MemoryCalculatorPlugin getMemoryCalculatorPlugin(
+      Class<? extends MemoryCalculatorPlugin> clazz, Configuration conf) {
+
+    if (clazz != null) {
+      return ReflectionUtils.newInstance(clazz, conf);
+    }
+
+    // No class given, try a os specific class
+    try {
+      String osName = System.getProperty("os.name");
+      if (osName.startsWith("Linux")) {
+        return new LinuxMemoryCalculatorPlugin();
+      }
+    } catch (SecurityException se) {
+      // Failed to get Operating System name.
+      return null;
+    }
+
+    // Not supported on this system.
+    return null;
+  }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/util/MergeSort.java b/src/java/org/apache/hadoop/util/MergeSort.java
new file mode 100644
index 00000000000..3c104924563
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/MergeSort.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.util.Comparator;
+import org.apache.hadoop.io.IntWritable;
+
+/** An implementation of the core algorithm of MergeSort. */
+public class MergeSort {
+  //Reusable IntWritables
+  IntWritable I = new IntWritable(0);
+  IntWritable J = new IntWritable(0);
+  
+  //the comparator that the algo should use
+  private Comparator<IntWritable> comparator;
+  
+  public MergeSort(Comparator<IntWritable> comparator) {
+    this.comparator = comparator;
+  }
+  
+  public void mergeSort(int src[], int dest[], int low, int high) {
+    int length = high - low;
+
+    // Insertion sort on smallest arrays
+    if (length < 7) {
+      for (int i=low; i<high; i++) {
+        for (int j=i;j > low; j--) {
+          I.set(dest[j-1]);
+          J.set(dest[j]);
+          if (comparator.compare(I, J)>0)
+            swap(dest, j, j-1);
+        }
+      }
+      return;
+    }
+
+    // Recursively sort halves of dest into src
+    int mid = (low + high) >>> 1;
+    mergeSort(dest, src, low, mid);
+    mergeSort(dest, src, mid, high);
+
+    I.set(src[mid-1]);
+    J.set(src[mid]);
+    // If list is already sorted, just copy from src to dest.  This is an
+    // optimization that results in faster sorts for nearly ordered lists.
+    if (comparator.compare(I, J) <= 0) {
+      System.arraycopy(src, low, dest, low, length);
+      return;
+    }
+
+    // Merge sorted halves (now in src) into dest
+    for (int i = low, p = low, q = mid; i < high; i++) {
+      if (q < high && p < mid) {
+        I.set(src[p]);
+        J.set(src[q]);
+      }
+      if (q>=high || p<mid && comparator.compare(I, J) <= 0)
+        dest[i] = src[p++];
+      else
+        dest[i] = src[q++];
+    }
+  }
+
+  private void swap(int x[], int a, int b) {
+    int t = x[a];
+    x[a] = x[b];
+    x[b] = t;
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/NativeCodeLoader.java b/src/java/org/apache/hadoop/util/NativeCodeLoader.java
new file mode 100644
index 00000000000..d62253927ad
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/NativeCodeLoader.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * A helper to load the native hadoop code i.e. libhadoop.so.
+ * This handles the fallback to either the bundled libhadoop-Linux-i386-32.so
+ * or the default java implementations where appropriate.
+ *  
+ */
+public class NativeCodeLoader {
+
+  private static final Log LOG =
+    LogFactory.getLog(NativeCodeLoader.class);
+  
+  private static boolean nativeCodeLoaded = false;
+  
+  static {
+    // Try to load native hadoop library and set fallback flag appropriately
+    LOG.debug("Trying to load the custom-built native-hadoop library...");
+    try {
+      System.loadLibrary("hadoop");
+      LOG.info("Loaded the native-hadoop library");
+      nativeCodeLoaded = true;
+    } catch (Throwable t) {
+      // Ignore failure to load
+      LOG.debug("Failed to load native-hadoop with error: " + t);
+      LOG.debug("java.library.path=" + System.getProperty("java.library.path"));
+    }
+    
+    if (!nativeCodeLoaded) {
+      LOG.warn("Unable to load native-hadoop library for your platform... " +
+               "using builtin-java classes where applicable");
+    }
+  }
+
+  /**
+   * Check if native-hadoop code is loaded for this platform.
+   * 
+   * @return <code>true</code> if native-hadoop is loaded, 
+   *         else <code>false</code>
+   */
+  public static boolean isNativeCodeLoaded() {
+    return nativeCodeLoaded;
+  }
+
+  /**
+   * Return if native hadoop libraries, if present, can be used for this job.
+   * @param conf configuration
+   * 
+   * @return <code>true</code> if native hadoop libraries, if present, can be 
+   *         used for this job; <code>false</code> otherwise.
+   */
+  public boolean getLoadNativeLibraries(Configuration conf) {
+    return conf.getBoolean("hadoop.native.lib", true);
+  }
+  
+  /**
+   * Set if native hadoop libraries, if present, can be used for this job.
+   * 
+   * @param conf configuration
+   * @param loadNativeLibraries can native hadoop libraries be loaded
+   */
+  public void setLoadNativeLibraries(Configuration conf, 
+                                     boolean loadNativeLibraries) {
+    conf.setBoolean("hadoop.native.lib", loadNativeLibraries);
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/util/PlatformName.java b/src/java/org/apache/hadoop/util/PlatformName.java
new file mode 100644
index 00000000000..7c173f8515c
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/PlatformName.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+/**
+ * A helper class for getting build-info of the java-vm. 
+ * 
+ */
+public class PlatformName {
+  /**
+   * The complete platform 'name' to identify the platform as 
+   * per the java-vm.
+   */
+  private static final String platformName = System.getProperty("os.name") + "-" + 
+    System.getProperty("os.arch") + "-" +
+    System.getProperty("sun.arch.data.model");
+  
+  /**
+   * Get the complete platform as per the java-vm.
+   * @return returns the complete platform as per the java-vm.
+   */
+  public static String getPlatformName() {
+    return platformName;
+  }
+  
+  public static void main(String[] args) {
+    System.out.println(platformName);
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/PrintJarMainClass.java b/src/java/org/apache/hadoop/util/PrintJarMainClass.java
new file mode 100644
index 00000000000..d693ebb8525
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/PrintJarMainClass.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.util.jar.*;
+
+/**
+ * A micro-application that prints the main class name out of a jar file.
+ */
+public class PrintJarMainClass {
+  
+  /**
+   * @param args
+   */
+  public static void main(String[] args) {
+    try {
+      JarFile jar_file = new JarFile(args[0]);
+      if (jar_file != null) {
+        Manifest manifest = jar_file.getManifest();
+        if (manifest != null) {
+          String value = manifest.getMainAttributes().getValue("Main-Class");
+          if (value != null) {
+            System.out.println(value.replaceAll("/", "."));
+            return;
+          }
+        }
+      }
+    } catch (Throwable e) {
+      // ignore it
+    }
+    System.out.println("UNKNOWN");
+    System.exit(1);
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/util/PriorityQueue.java b/src/java/org/apache/hadoop/util/PriorityQueue.java
new file mode 100644
index 00000000000..12324103e0d
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/PriorityQueue.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+
+/** A PriorityQueue maintains a partial ordering of its elements such that the
+  least element can always be found in constant time.  Put()'s and pop()'s
+  require log(size) time. */
+public abstract class PriorityQueue<T> {
+  private T[] heap;
+  private int size;
+  private int maxSize;
+
+  /** Determines the ordering of objects in this priority queue.  Subclasses
+      must define this one method. */
+  protected abstract boolean lessThan(Object a, Object b);
+
+  /** Subclass constructors must call this. */
+  @SuppressWarnings("unchecked")
+  protected final void initialize(int maxSize) {
+    size = 0;
+    int heapSize = maxSize + 1;
+    heap = (T[]) new Object[heapSize];
+    this.maxSize = maxSize;
+  }
+
+  /**
+   * Adds an Object to a PriorityQueue in log(size) time.
+   * If one tries to add more objects than maxSize from initialize
+   * a RuntimeException (ArrayIndexOutOfBound) is thrown.
+   */
+  public final void put(T element) {
+    size++;
+    heap[size] = element;
+    upHeap();
+  }
+
+  /**
+   * Adds element to the PriorityQueue in log(size) time if either
+   * the PriorityQueue is not full, or not lessThan(element, top()).
+   * @param element
+   * @return true if element is added, false otherwise.
+   */
+  public boolean insert(T element){
+    if (size < maxSize){
+      put(element);
+      return true;
+    }
+    else if (size > 0 && !lessThan(element, top())){
+      heap[1] = element;
+      adjustTop();
+      return true;
+    }
+    else
+      return false;
+  }
+
+  /** Returns the least element of the PriorityQueue in constant time. */
+  public final T top() {
+    if (size > 0)
+      return heap[1];
+    else
+      return null;
+  }
+
+  /** Removes and returns the least element of the PriorityQueue in log(size)
+      time. */
+  public final T pop() {
+    if (size > 0) {
+      T result = heap[1];			  // save first value
+      heap[1] = heap[size];			  // move last to first
+      heap[size] = null;			  // permit GC of objects
+      size--;
+      downHeap();				  // adjust heap
+      return result;
+    } else
+      return null;
+  }
+
+  /** Should be called when the Object at top changes values.  Still log(n)
+   * worst case, but it's at least twice as fast to <pre>
+   *  { pq.top().change(); pq.adjustTop(); }
+   * </pre> instead of <pre>
+   *  { o = pq.pop(); o.change(); pq.push(o); }
+   * </pre>
+   */
+  public final void adjustTop() {
+    downHeap();
+  }
+
+
+  /** Returns the number of elements currently stored in the PriorityQueue. */
+  public final int size() {
+    return size;
+  }
+
+  /** Removes all entries from the PriorityQueue. */
+  public final void clear() {
+    for (int i = 0; i <= size; i++)
+      heap[i] = null;
+    size = 0;
+  }
+
+  private final void upHeap() {
+    int i = size;
+    T node = heap[i];			  // save bottom node
+    int j = i >>> 1;
+    while (j > 0 && lessThan(node, heap[j])) {
+      heap[i] = heap[j];			  // shift parents down
+      i = j;
+      j = j >>> 1;
+    }
+    heap[i] = node;				  // install saved node
+  }
+
+  private final void downHeap() {
+    int i = 1;
+    T node = heap[i];			  // save top node
+    int j = i << 1;				  // find smaller child
+    int k = j + 1;
+    if (k <= size && lessThan(heap[k], heap[j])) {
+      j = k;
+    }
+    while (j <= size && lessThan(heap[j], node)) {
+      heap[i] = heap[j];			  // shift up child
+      i = j;
+      j = i << 1;
+      k = j + 1;
+      if (k <= size && lessThan(heap[k], heap[j])) {
+	j = k;
+      }
+    }
+    heap[i] = node;				  // install saved node
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/ProcessTree.java b/src/java/org/apache/hadoop/util/ProcessTree.java
new file mode 100644
index 00000000000..62b5c058ee8
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/ProcessTree.java
@@ -0,0 +1,239 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.util.Shell.ExitCodeException;
+import org.apache.hadoop.util.Shell.ShellCommandExecutor;
+
+/** 
+ * Process tree related operations
+ */
+public class ProcessTree {
+
+  private static final Log LOG = LogFactory.getLog(ProcessTree.class);
+
+  public static final long DEFAULT_SLEEPTIME_BEFORE_SIGKILL = 5000L;
+
+  public static final boolean isSetsidAvailable = isSetsidSupported();
+  private static boolean isSetsidSupported() {
+    ShellCommandExecutor shexec = null;
+    boolean setsidSupported = true;
+    try {
+      String[] args = {"setsid", "bash", "-c", "echo $$"};
+      shexec = new ShellCommandExecutor(args);
+      shexec.execute();
+    } catch (IOException ioe) {
+      LOG.warn("setsid is not available on this machine. So not using it.");
+      setsidSupported = false;
+    } finally { // handle the exit code
+      LOG.info("setsid exited with exit code " + shexec.getExitCode());
+      return setsidSupported;
+    }
+  }
+
+  /**
+   * Kills the process(OR process group) by sending the signal SIGKILL
+   * in the current thread
+   * @param pid Process id(OR process group id) of to-be-deleted-process
+   * @param isProcessGroup Is pid a process group id of to-be-deleted-processes
+   * @param sleepTimeBeforeSigKill wait time before sending SIGKILL after
+   *  sending SIGTERM
+   */
+  private static void sigKillInCurrentThread(String pid, boolean isProcessGroup,
+      long sleepTimeBeforeSigKill) {
+    // Kill the subprocesses of root process(even if the root process is not
+    // alive) if process group is to be killed.
+    if (isProcessGroup || ProcessTree.isAlive(pid)) {
+      try {
+        // Sleep for some time before sending SIGKILL
+        Thread.sleep(sleepTimeBeforeSigKill);
+      } catch (InterruptedException i) {
+        LOG.warn("Thread sleep is interrupted.");
+      }
+
+      ShellCommandExecutor shexec = null;
+
+      try {
+        String pid_pgrpid;
+        if(isProcessGroup) {//kill the whole process group
+          pid_pgrpid = "-" + pid;
+        }
+        else {//kill single process
+          pid_pgrpid = pid;
+        }
+        
+        String[] args = { "kill", "-9", pid_pgrpid };
+        shexec = new ShellCommandExecutor(args);
+        shexec.execute();
+      } catch (IOException ioe) {
+        LOG.warn("Error executing shell command " + ioe);
+      } finally {
+        if(isProcessGroup) {
+          LOG.info("Killing process group" + pid + " with SIGKILL. Exit code "
+            + shexec.getExitCode());
+        }
+        else {
+          LOG.info("Killing process " + pid + " with SIGKILL. Exit code "
+                    + shexec.getExitCode());
+        }
+      }
+    }
+  }
+
+  /** Kills the process(OR process group) by sending the signal SIGKILL
+   * @param pid Process id(OR process group id) of to-be-deleted-process
+   * @param isProcessGroup Is pid a process group id of to-be-deleted-processes
+   * @param sleeptimeBeforeSigkill The time to wait before sending SIGKILL
+   *                               after sending SIGTERM
+   * @param inBackground Process is to be killed in the back ground with
+   *                     a separate thread
+   */
+  private static void sigKill(String pid, boolean isProcessGroup,
+                        long sleeptimeBeforeSigkill, boolean inBackground) {
+
+    if(inBackground) { // use a separate thread for killing
+      SigKillThread sigKillThread = new SigKillThread(pid, isProcessGroup,
+                                                      sleeptimeBeforeSigkill);
+      sigKillThread.setDaemon(true);
+      sigKillThread.start();
+    }
+    else {
+      sigKillInCurrentThread(pid, isProcessGroup, sleeptimeBeforeSigkill);
+    }
+  }
+
+  /** Destroy the process.
+   * @param pid Process id of to-be-killed-process
+   * @param sleeptimeBeforeSigkill The time to wait before sending SIGKILL
+   *                               after sending SIGTERM
+   * @param inBackground Process is to be killed in the back ground with
+   *                     a separate thread
+   */
+  protected static void destroyProcess(String pid, long sleeptimeBeforeSigkill,
+                                    boolean inBackground) {
+    ShellCommandExecutor shexec = null;
+    try {
+      String[] args = { "kill", pid };
+      shexec = new ShellCommandExecutor(args);
+      shexec.execute();
+    } catch (IOException ioe) {
+      LOG.warn("Error executing shell command " + ioe);
+    } finally {
+      LOG.info("Killing process " + pid +
+               " with SIGTERM. Exit code " + shexec.getExitCode());
+    }
+    
+    sigKill(pid, false, sleeptimeBeforeSigkill, inBackground);
+  }
+  
+  /** Destroy the process group.
+   * @param pgrpId Process group id of to-be-killed-processes
+   * @param sleeptimeBeforeSigkill The time to wait before sending SIGKILL
+   *                               after sending SIGTERM
+   * @param inBackground Process group is to be killed in the back ground with
+   *                     a separate thread
+   */
+  protected static void destroyProcessGroup(String pgrpId,
+                       long sleeptimeBeforeSigkill, boolean inBackground) {
+    ShellCommandExecutor shexec = null;
+    try {
+      String[] args = { "kill", "--", "-" + pgrpId };
+      shexec = new ShellCommandExecutor(args);
+      shexec.execute();
+    } catch (IOException ioe) {
+      LOG.warn("Error executing shell command " + ioe);
+    } finally {
+      LOG.info("Killing all processes in the process group " + pgrpId +
+               " with SIGTERM. Exit code " + shexec.getExitCode());
+    }
+    
+    sigKill(pgrpId, true, sleeptimeBeforeSigkill, inBackground);
+  }
+
+  /**
+   * Destroy the process-tree.
+   * @param pid process id of the root process of the subtree of processes
+   *            to be killed
+   * @param sleeptimeBeforeSigkill The time to wait before sending SIGKILL
+   *                               after sending SIGTERM
+   * @param isProcessGroup pid is a process group leader or not
+   * @param inBackground Process is to be killed in the back ground with
+   *                     a separate thread
+   */
+  public static void destroy(String pid, long sleeptimeBeforeSigkill,
+                             boolean isProcessGroup, boolean inBackground) {
+    if(isProcessGroup) {
+      destroyProcessGroup(pid, sleeptimeBeforeSigkill, inBackground);
+    }
+    else {
+      //TODO: Destroy all the processes in the subtree in this case also.
+      // For the time being, killing only the root process.
+      destroyProcess(pid, sleeptimeBeforeSigkill, inBackground);
+    }
+  }
+
+
+  /**
+   * Is the process with PID pid still alive?
+   * This method assumes that isAlive is called on a pid that was alive not
+   * too long ago, and hence assumes no chance of pid-wrapping-around.
+   */
+  public static boolean isAlive(String pid) {
+    ShellCommandExecutor shexec = null;
+    try {
+      String[] args = { "kill", "-0", pid };
+      shexec = new ShellCommandExecutor(args);
+      shexec.execute();
+    } catch (ExitCodeException ee) {
+      return false;
+    } catch (IOException ioe) {
+      LOG.warn("Error executing shell command "
+          + Arrays.toString(shexec.getExecString()) + ioe);
+      return false;
+    }
+    return (shexec.getExitCode() == 0 ? true : false);
+  }
+
+  /**
+   * Helper thread class that kills process-tree with SIGKILL in background
+   */
+  static class SigKillThread extends Thread {
+    private String pid = null;
+    private boolean isProcessGroup = false;
+
+    private long sleepTimeBeforeSigKill = DEFAULT_SLEEPTIME_BEFORE_SIGKILL;
+
+    private SigKillThread(String pid, boolean isProcessGroup, long interval) {
+      this.pid = pid;
+      this.isProcessGroup = isProcessGroup;
+      this.setName(this.getClass().getName() + "-" + pid);
+      sleepTimeBeforeSigKill = interval;
+    }
+
+    public void run() {
+      sigKillInCurrentThread(pid, isProcessGroup, sleepTimeBeforeSigKill);
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/ProcfsBasedProcessTree.java b/src/java/org/apache/hadoop/util/ProcfsBasedProcessTree.java
new file mode 100644
index 00000000000..52dd36ecc14
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/ProcfsBasedProcessTree.java
@@ -0,0 +1,448 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.LinkedList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * A Proc file-system based ProcessTree. Works only on Linux.
+ */
+public class ProcfsBasedProcessTree extends ProcessTree {
+
+  private static final Log LOG = LogFactory
+      .getLog(ProcfsBasedProcessTree.class);
+
+  private static final String PROCFS = "/proc/";
+
+  private static final Pattern PROCFS_STAT_FILE_FORMAT = Pattern
+      .compile("^([0-9-]+)\\s([^\\s]+)\\s[^\\s]\\s([0-9-]+)\\s([0-9-]+)\\s([0-9-]+)\\s([0-9-]+\\s){16}([0-9]+)(\\s[0-9-]+){16}");
+
+  private Integer pid = -1;
+  private boolean setsidUsed = false;
+  private long sleeptimeBeforeSigkill = DEFAULT_SLEEPTIME_BEFORE_SIGKILL;
+
+  private Map<Integer, ProcessInfo> processTree = new HashMap<Integer, ProcessInfo>();
+
+  public ProcfsBasedProcessTree(String pid) {
+    this(pid, false, DEFAULT_SLEEPTIME_BEFORE_SIGKILL);
+  }
+
+  public ProcfsBasedProcessTree(String pid, boolean setsidUsed,
+                                long sigkillInterval) {
+    this.pid = getValidPID(pid);
+    this.setsidUsed = setsidUsed;
+    sleeptimeBeforeSigkill = sigkillInterval;
+  }
+
+  /**
+   * Sets SIGKILL interval
+   * @deprecated Use {@link ProcfsBasedProcessTree#ProcfsBasedProcessTree(
+   *                  String, boolean, long)} instead
+   * @param interval The time to wait before sending SIGKILL
+   *                 after sending SIGTERM
+   */
+  @Deprecated
+  public void setSigKillInterval(long interval) {
+    sleeptimeBeforeSigkill = interval;
+  }
+
+  /**
+   * Checks if the ProcfsBasedProcessTree is available on this system.
+   * 
+   * @return true if ProcfsBasedProcessTree is available. False otherwise.
+   */
+  public static boolean isAvailable() {
+    try {
+      String osName = System.getProperty("os.name");
+      if (!osName.startsWith("Linux")) {
+        LOG.info("ProcfsBasedProcessTree currently is supported only on "
+            + "Linux.");
+        return false;
+      }
+    } catch (SecurityException se) {
+      LOG.warn("Failed to get Operating System name. " + se);
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Get the process-tree with latest state. If the root-process is not alive,
+   * an empty tree will be returned.
+   * 
+   * @return the process-tree with latest state.
+   */
+  public ProcfsBasedProcessTree getProcessTree() {
+    if (pid != -1) {
+      // Get the list of processes
+      List<Integer> processList = getProcessList();
+
+      Map<Integer, ProcessInfo> allProcessInfo = new HashMap<Integer, ProcessInfo>();
+      processTree.clear();
+
+      ProcessInfo me = null;
+      for (Integer proc : processList) {
+        // Get information for each process
+        ProcessInfo pInfo = new ProcessInfo(proc);
+        if (constructProcessInfo(pInfo) != null) {
+          allProcessInfo.put(proc, pInfo);
+          if (proc.equals(this.pid)) {
+            me = pInfo; // cache 'me'
+            processTree.put(proc, pInfo);
+          }
+        }
+      }
+
+      if (me == null) {
+        return this; 
+      }
+
+      // Add each process to its parent.
+      for (Map.Entry<Integer, ProcessInfo> entry : allProcessInfo.entrySet()) {
+        Integer pID = entry.getKey();
+        if (pID != 1) {
+          ProcessInfo pInfo = entry.getValue();
+          ProcessInfo parentPInfo = allProcessInfo.get(pInfo.getPpid());
+          if (parentPInfo != null) {
+            parentPInfo.addChild(pInfo);
+          }
+        }
+      }
+
+      // now start constructing the process-tree
+      LinkedList<ProcessInfo> pInfoQueue = new LinkedList<ProcessInfo>();
+      pInfoQueue.addAll(me.getChildren());
+      while (!pInfoQueue.isEmpty()) {
+        ProcessInfo pInfo = pInfoQueue.remove();
+        if (!processTree.containsKey(pInfo.getPid())) {
+          processTree.put(pInfo.getPid(), pInfo);
+        }
+        pInfoQueue.addAll(pInfo.getChildren());
+      }
+
+      if (LOG.isDebugEnabled()) {
+        // Log.debug the ProcfsBasedProcessTree
+        LOG.debug(this.toString());
+      }
+    }
+    return this;
+  }
+
+  /**
+   * Is the root-process alive?
+   * 
+   * @return true if the root-process is alive, false otherwise.
+   */
+  public boolean isAlive() {
+    if (pid == -1) {
+      return false;
+    } else {
+      return isAlive(pid.toString());
+    }
+  }
+
+  /**
+   * Is any of the subprocesses in the process-tree alive?
+   * 
+   * @return true if any of the processes in the process-tree is
+   *           alive, false otherwise.
+   */
+  public boolean isAnyProcessInTreeAlive() {
+    for (Integer pId : processTree.keySet()) {
+      if (isAlive(pId.toString())) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /** Verify that the given process id is same as its process group id.
+   * @param pidStr Process id of the to-be-verified-process
+   */
+  private static boolean assertPidPgrpidForMatch(String pidStr) {
+    Integer pId = Integer.parseInt(pidStr);
+    // Get information for this process
+    ProcessInfo pInfo = new ProcessInfo(pId);
+    pInfo = constructProcessInfo(pInfo);
+    //make sure that pId and its pgrpId match
+    if (!pInfo.getPgrpId().equals(pId)) {
+      LOG.warn("Unexpected: Process with PID " + pId +
+               " is not a process group leader.");
+      return false;
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(pId + " is a process group leader, as expected.");
+    }
+    return true;
+  }
+
+  /** Make sure that the given pid is a process group leader and then
+   * destroy the process group.
+   * @param pgrpId   Process group id of to-be-killed-processes
+   * @param interval The time to wait before sending SIGKILL
+   *                 after sending SIGTERM
+   * @param inBackground Process is to be killed in the back ground with
+   *                     a separate thread
+   */
+  public static void assertAndDestroyProcessGroup(String pgrpId, long interval,
+                       boolean inBackground)
+         throws IOException {
+    // Make sure that the pid given is a process group leader
+    if (!assertPidPgrpidForMatch(pgrpId)) {
+      throw new IOException("Process with PID " + pgrpId  +
+                          " is not a process group leader.");
+    }
+    destroyProcessGroup(pgrpId, interval, inBackground);
+  }
+
+  /**
+   * Destroy the process-tree.
+   */
+  public void destroy() {
+    destroy(true);
+  }
+  
+  /**
+   * Destroy the process-tree.
+   * @param inBackground Process is to be killed in the back ground with
+   *                     a separate thread
+   */
+  public void destroy(boolean inBackground) {
+    LOG.debug("Killing ProcfsBasedProcessTree of " + pid);
+    if (pid == -1) {
+      return;
+    }
+
+    if (isAlive(pid.toString())) {
+      if (isSetsidAvailable && setsidUsed) {
+        // In this case, we know that pid got created using setsid. So kill the
+        // whole processGroup.
+        try {
+          assertAndDestroyProcessGroup(pid.toString(), sleeptimeBeforeSigkill,
+                              inBackground);
+        } catch (IOException e) {
+          LOG.warn(StringUtils.stringifyException(e));
+        }
+      }
+      else {
+        //TODO: Destroy all the processes in the subtree in this case also.
+        // For the time being, killing only the root process.
+        destroyProcess(pid.toString(), sleeptimeBeforeSigkill, inBackground);
+      }
+    }
+  }
+
+  /**
+   * Get the cumulative virtual memory used by all the processes in the
+   * process-tree.
+   * 
+   * @return cumulative virtual memory used by the process-tree in bytes.
+   */
+  public long getCumulativeVmem() {
+    long total = 0;
+    for (ProcessInfo p : processTree.values()) {
+      if (p != null) {
+        total += p.getVmem();
+      }
+    }
+    return total;
+  }
+
+  private static Integer getValidPID(String pid) {
+    Integer retPid = -1;
+    try {
+      retPid = Integer.parseInt(pid);
+      if (retPid <= 0) {
+        retPid = -1;
+      }
+    } catch (NumberFormatException nfe) {
+      retPid = -1;
+    }
+    return retPid;
+  }
+
+  /**
+   * Get the list of all processes in the system.
+   */
+  private List<Integer> getProcessList() {
+    String[] processDirs = (new File(PROCFS)).list();
+    List<Integer> processList = new ArrayList<Integer>();
+
+    for (String dir : processDirs) {
+      try {
+        int pd = Integer.parseInt(dir);
+        if ((new File(PROCFS + dir)).isDirectory()) {
+          processList.add(Integer.valueOf(pd));
+        }
+      } catch (NumberFormatException n) {
+        // skip this directory
+      } catch (SecurityException s) {
+        // skip this process
+      }
+    }
+    return processList;
+  }
+
+  /**
+   * 
+   * Construct the ProcessInfo using the process' PID and procfs and return the
+   * same. Returns null on failing to read from procfs,
+   */
+  private static ProcessInfo constructProcessInfo(ProcessInfo pinfo) {
+    ProcessInfo ret = null;
+    // Read "/proc/<pid>/stat" file
+    BufferedReader in = null;
+    FileReader fReader = null;
+    try {
+      fReader = new FileReader(PROCFS + pinfo.getPid() + "/stat");
+      in = new BufferedReader(fReader);
+    } catch (FileNotFoundException f) {
+      // The process vanished in the interim!
+      return ret;
+    }
+
+    ret = pinfo;
+    try {
+      String str = in.readLine(); // only one line
+      Matcher m = PROCFS_STAT_FILE_FORMAT.matcher(str);
+      boolean mat = m.find();
+      if (mat) {
+        // Set ( name ) ( ppid ) ( pgrpId ) (session ) (vsize )
+        pinfo.update(m.group(2), Integer.parseInt(m.group(3)), Integer
+            .parseInt(m.group(4)), Integer.parseInt(m.group(5)), Long
+            .parseLong(m.group(7)));
+      }
+    } catch (IOException io) {
+      LOG.warn("Error reading the stream " + io);
+      ret = null;
+    } finally {
+      // Close the streams
+      try {
+        if (fReader != null) {
+          fReader.close();
+        }
+        try {
+          if (in != null) {
+            in.close();
+          }
+        } catch (IOException i) {
+          LOG.warn("Error closing the stream " + in);
+        }
+      } catch (IOException i) {
+        LOG.warn("Error closing the stream " + fReader);
+      }
+    }
+
+    return ret;
+  }
+
+  /**
+   * Returns a string printing PIDs of process present in the
+   * ProcfsBasedProcessTree. Output format : [pid pid ..]
+   */
+  public String toString() {
+    StringBuffer pTree = new StringBuffer("[ ");
+    for (Integer p : processTree.keySet()) {
+      pTree.append(p);
+      pTree.append(" ");
+    }
+    return pTree.substring(0, pTree.length()) + "]";
+  }
+
+  /**
+   * 
+   * Class containing information of a process.
+   * 
+   */
+  private static class ProcessInfo {
+    private Integer pid; // process-id
+    private String name; // command name
+    private Integer pgrpId; // process group-id
+    private Integer ppid; // parent process-id
+    private Integer sessionId; // session-id
+    private Long vmem; // virtual memory usage
+    private List<ProcessInfo> children = new ArrayList<ProcessInfo>(); // list of children
+
+    public ProcessInfo(int pid) {
+      this.pid = Integer.valueOf(pid);
+    }
+
+    public Integer getPid() {
+      return pid;
+    }
+
+    public String getName() {
+      return name;
+    }
+
+    public Integer getPgrpId() {
+      return pgrpId;
+    }
+
+    public Integer getPpid() {
+      return ppid;
+    }
+
+    public Integer getSessionId() {
+      return sessionId;
+    }
+
+    public Long getVmem() {
+      return vmem;
+    }
+
+    public boolean isParent(ProcessInfo p) {
+      if (pid.equals(p.getPpid())) {
+        return true;
+      }
+      return false;
+    }
+
+    public void update(String name, Integer ppid, Integer pgrpId,
+        Integer sessionId, Long vmem) {
+      this.name = name;
+      this.ppid = ppid;
+      this.pgrpId = pgrpId;
+      this.sessionId = sessionId;
+      this.vmem = vmem;
+    }
+
+    public boolean addChild(ProcessInfo p) {
+      return children.add(p);
+    }
+
+    public List<ProcessInfo> getChildren() {
+      return children;
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/ProgramDriver.java b/src/java/org/apache/hadoop/util/ProgramDriver.java
new file mode 100644
index 00000000000..c5880e7fe63
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/ProgramDriver.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.Map;
+import java.util.TreeMap;
+
+/** A driver that is used to run programs added to it
+ */
+
+public class ProgramDriver {
+    
+  /**
+   * A description of a program based on its class and a 
+   * human-readable description.
+   * @date april 2006
+   */
+  Map<String, ProgramDescription> programs;
+     
+  public ProgramDriver(){
+    programs = new TreeMap<String, ProgramDescription>();
+  }
+     
+  static private class ProgramDescription {
+	
+    static final Class<?>[] paramTypes = new Class<?>[] {String[].class};
+	
+    /**
+     * Create a description of an example program.
+     * @param mainClass the class with the main for the example program
+     * @param description a string to display to the user in help messages
+     * @throws SecurityException if we can't use reflection
+     * @throws NoSuchMethodException if the class doesn't have a main method
+     */
+    public ProgramDescription(Class<?> mainClass, 
+                              String description)
+      throws SecurityException, NoSuchMethodException {
+      this.main = mainClass.getMethod("main", paramTypes);
+      this.description = description;
+    }
+	
+    /**
+     * Invoke the example application with the given arguments
+     * @param args the arguments for the application
+     * @throws Throwable The exception thrown by the invoked method
+     */
+    public void invoke(String[] args)
+      throws Throwable {
+      try {
+        main.invoke(null, new Object[]{args});
+      } catch (InvocationTargetException except) {
+        throw except.getCause();
+      }
+    }
+	
+    public String getDescription() {
+      return description;
+    }
+	
+    private Method main;
+    private String description;
+  }
+    
+  private static void printUsage(Map<String, ProgramDescription> programs) {
+    System.out.println("Valid program names are:");
+    for(Map.Entry<String, ProgramDescription> item : programs.entrySet()) {
+      System.out.println("  " + item.getKey() + ": " +
+                         item.getValue().getDescription());         
+    } 
+  }
+    
+  /**
+   * This is the method that adds the classed to the repository
+   * @param name The name of the string you want the class instance to be called with
+   * @param mainClass The class that you want to add to the repository
+   * @param description The description of the class
+   * @throws NoSuchMethodException 
+   * @throws SecurityException 
+   */
+  public void addClass (String name, Class mainClass, String description) throws Throwable {
+    programs.put(name , new ProgramDescription(mainClass, description));
+  }
+    
+  /**
+   * This is a driver for the example programs.
+   * It looks at the first command line argument and tries to find an
+   * example program with that name.
+   * If it is found, it calls the main method in that class with the rest 
+   * of the command line arguments.
+   * @param args The argument from the user. args[0] is the command to run.
+   * @return -1 on error, 0 on success
+   * @throws NoSuchMethodException 
+   * @throws SecurityException 
+   * @throws IllegalAccessException 
+   * @throws IllegalArgumentException 
+   * @throws Throwable Anything thrown by the example program's main
+   */
+  public int driver(String[] args) 
+    throws Throwable 
+  {
+    // Make sure they gave us a program name.
+    if (args.length == 0) {
+      System.out.println("An example program must be given as the" + 
+                         " first argument.");
+      printUsage(programs);
+      return -1;
+    }
+	
+    // And that it is good.
+    ProgramDescription pgm = programs.get(args[0]);
+    if (pgm == null) {
+      System.out.println("Unknown program '" + args[0] + "' chosen.");
+      printUsage(programs);
+      return -1;
+    }
+	
+    // Remove the leading argument and call main
+    String[] new_args = new String[args.length - 1];
+    for(int i=1; i < args.length; ++i) {
+      new_args[i-1] = args[i];
+    }
+    pgm.invoke(new_args);
+    return 0;
+  }
+    
+}
diff --git a/src/java/org/apache/hadoop/util/Progress.java b/src/java/org/apache/hadoop/util/Progress.java
new file mode 100644
index 00000000000..81be35c8e01
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/Progress.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.util.ArrayList;
+
+/** Utility to assist with generation of progress reports.  Applications build
+ * a hierarchy of {@link Progress} instances, each modelling a phase of
+ * execution.  The root is constructed with {@link #Progress()}.  Nodes for
+ * sub-phases are created by calling {@link #addPhase()}.
+ */
+public class Progress {
+  private String status = "";
+  private float progress;
+  private int currentPhase;
+  private ArrayList<Progress> phases = new ArrayList<Progress>();
+  private Progress parent;
+  private float progressPerPhase;
+
+  /** Creates a new root node. */
+  public Progress() {}
+
+  /** Adds a named node to the tree. */
+  public Progress addPhase(String status) {
+    Progress phase = addPhase();
+    phase.setStatus(status);
+    return phase;
+  }
+
+  /** Adds a node to the tree. */
+  public synchronized Progress addPhase() {
+    Progress phase = new Progress();
+    phases.add(phase);
+    phase.setParent(this);
+    progressPerPhase = 1.0f / (float)phases.size();
+    return phase;
+  }
+
+  synchronized Progress getParent() { return parent; }
+  synchronized void setParent(Progress parent) { this.parent = parent; }
+  
+  /** Called during execution to move to the next phase at this level in the
+   * tree. */
+  public synchronized void startNextPhase() {
+    currentPhase++;
+  }
+
+  /** Returns the current sub-node executing. */
+  public synchronized Progress phase() {
+    return phases.get(currentPhase);
+  }
+
+  /** Completes this node, moving the parent node to its next child. */
+  public void complete() {
+    // we have to traverse up to our parent, so be careful about locking.
+    Progress myParent;
+    synchronized(this) {
+      progress = 1.0f;
+      myParent = parent;
+    }
+    if (myParent != null) {
+      // this will synchronize on the parent, so we make sure we release
+      // our lock before getting the parent's, since we're traversing 
+      // against the normal traversal direction used by get() or toString().
+      // We don't need transactional semantics, so we're OK doing this. 
+      myParent.startNextPhase();
+    }
+  }
+
+  /** Called during execution on a leaf node to set its progress. */
+  public synchronized void set(float progress) {
+    this.progress = progress;
+  }
+
+  /** Returns the overall progress of the root. */
+  // this method probably does not need to be synchronized as getINternal() is synchronized 
+  // and the node's parent never changes. Still, it doesn't hurt. 
+  public synchronized float get() {
+    Progress node = this;
+    while (node.getParent() != null) {                 // find the root
+      node = parent;
+    }
+    return node.getInternal();
+  }
+
+  /** Computes progress in this node. */
+  private synchronized float getInternal() {
+    int phaseCount = phases.size();
+    if (phaseCount != 0) {
+      float subProgress =
+        currentPhase < phaseCount ? phase().getInternal() : 0.0f;
+      return progressPerPhase*(currentPhase + subProgress);
+    } else {
+      return progress;
+    }
+  }
+
+  public synchronized void setStatus(String status) {
+    this.status = status;
+  }
+
+  public String toString() {
+    StringBuffer result = new StringBuffer();
+    toString(result);
+    return result.toString();
+  }
+
+  private synchronized void toString(StringBuffer buffer) {
+    buffer.append(status);
+    if (phases.size() != 0 && currentPhase < phases.size()) {
+      buffer.append(" > ");
+      phase().toString(buffer);
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/util/Progressable.java b/src/java/org/apache/hadoop/util/Progressable.java
new file mode 100644
index 00000000000..5bdd7daeac1
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/Progressable.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+/**
+ * A facility for reporting progress.
+ * 
+ * <p>Clients and/or applications can use the provided <code>Progressable</code>
+ * to explicitly report progress to the Hadoop framework. This is especially
+ * important for operations which take an insignificant amount of time since,
+ * in-lieu of the reported progress, the framework has to assume that an error
+ * has occured and time-out the operation.</p>
+ */
+public interface Progressable {
+  /**
+   * Report progress to the Hadoop framework.
+   */
+  public void progress();
+}
diff --git a/src/java/org/apache/hadoop/util/QuickSort.java b/src/java/org/apache/hadoop/util/QuickSort.java
new file mode 100644
index 00000000000..74bf0c1ab16
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/QuickSort.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+/**
+ * An implementation of the core algorithm of QuickSort.
+ */
+public final class QuickSort implements IndexedSorter {
+
+  private static final IndexedSorter alt = new HeapSort();
+
+  public QuickSort() { }
+
+  private static void fix(IndexedSortable s, int p, int r) {
+    if (s.compare(p, r) > 0) {
+      s.swap(p, r);
+    }
+  }
+
+  /**
+   * Deepest recursion before giving up and doing a heapsort.
+   * Returns 2 * ceil(log(n)).
+   */
+  protected static int getMaxDepth(int x) {
+    if (x <= 0)
+      throw new IllegalArgumentException("Undefined for " + x);
+    return (32 - Integer.numberOfLeadingZeros(x - 1)) << 2;
+  }
+
+  /**
+   * Sort the given range of items using quick sort.
+   * {@inheritDoc} If the recursion depth falls below {@link #getMaxDepth},
+   * then switch to {@link HeapSort}.
+   */
+  public void sort(IndexedSortable s, int p, int r) {
+    sort(s, p, r, null);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  public void sort(final IndexedSortable s, int p, int r,
+      final Progressable rep) {
+    sortInternal(s, p, r, rep, getMaxDepth(r - p));
+  }
+
+  private static void sortInternal(final IndexedSortable s, int p, int r,
+      final Progressable rep, int depth) {
+    if (null != rep) {
+      rep.progress();
+    }
+    while (true) {
+    if (r-p < 13) {
+      for (int i = p; i < r; ++i) {
+        for (int j = i; j > p && s.compare(j-1, j) > 0; --j) {
+          s.swap(j, j-1);
+        }
+      }
+      return;
+    }
+    if (--depth < 0) {
+      // give up
+      alt.sort(s, p, r, rep);
+      return;
+    }
+
+    // select, move pivot into first position
+    fix(s, (p+r) >>> 1, p);
+    fix(s, (p+r) >>> 1, r - 1);
+    fix(s, p, r-1);
+
+    // Divide
+    int i = p;
+    int j = r;
+    int ll = p;
+    int rr = r;
+    int cr;
+    while(true) {
+      while (++i < j) {
+        if ((cr = s.compare(i, p)) > 0) break;
+        if (0 == cr && ++ll != i) {
+          s.swap(ll, i);
+        }
+      }
+      while (--j > i) {
+        if ((cr = s.compare(p, j)) > 0) break;
+        if (0 == cr && --rr != j) {
+          s.swap(rr, j);
+        }
+      }
+      if (i < j) s.swap(i, j);
+      else break;
+    }
+    j = i;
+    // swap pivot- and all eq values- into position
+    while (ll >= p) {
+      s.swap(ll--, --i);
+    }
+    while (rr < r) {
+      s.swap(rr++, j++);
+    }
+
+    // Conquer
+    // Recurse on smaller interval first to keep stack shallow
+    assert i != j;
+    if (i - p < r - j) {
+      sortInternal(s, p, i, rep, depth);
+      p = j;
+    } else {
+      sortInternal(s, j, r, rep, depth);
+      r = i;
+    }
+    }
+  }
+
+}
diff --git a/src/java/org/apache/hadoop/util/ReflectionUtils.java b/src/java/org/apache/hadoop/util/ReflectionUtils.java
new file mode 100644
index 00000000000..d1718bf3560
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/ReflectionUtils.java
@@ -0,0 +1,291 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.io.*;
+import java.lang.management.*;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.serializer.Deserializer;
+import org.apache.hadoop.io.serializer.SerializationFactory;
+import org.apache.hadoop.io.serializer.Serializer;
+
+/**
+ * General reflection utils
+ */
+
+public class ReflectionUtils {
+    
+  private static final Class<?>[] EMPTY_ARRAY = new Class[]{};
+  private static SerializationFactory serialFactory = null;
+
+  /** 
+   * Cache of constructors for each class. Pins the classes so they
+   * can't be garbage collected until ReflectionUtils can be collected.
+   */
+  private static final Map<Class<?>, Constructor<?>> CONSTRUCTOR_CACHE = 
+    new ConcurrentHashMap<Class<?>, Constructor<?>>();
+
+  /**
+   * Check and set 'configuration' if necessary.
+   * 
+   * @param theObject object for which to set configuration
+   * @param conf Configuration
+   */
+  public static void setConf(Object theObject, Configuration conf) {
+    if (conf != null) {
+      if (theObject instanceof Configurable) {
+        ((Configurable) theObject).setConf(conf);
+      }
+      setJobConf(theObject, conf);
+    }
+  }
+  
+  /**
+   * This code is to support backward compatibility and break the compile  
+   * time dependency of core on mapred.
+   * This should be made deprecated along with the mapred package HADOOP-1230. 
+   * Should be removed when mapred package is removed.
+   */
+  private static void setJobConf(Object theObject, Configuration conf) {
+    //If JobConf and JobConfigurable are in classpath, AND
+    //theObject is of type JobConfigurable AND
+    //conf is of type JobConf then
+    //invoke configure on theObject
+    try {
+      Class<?> jobConfClass = 
+        conf.getClassByName("org.apache.hadoop.mapred.JobConf");
+      Class<?> jobConfigurableClass = 
+        conf.getClassByName("org.apache.hadoop.mapred.JobConfigurable");
+       if (jobConfClass.isAssignableFrom(conf.getClass()) &&
+            jobConfigurableClass.isAssignableFrom(theObject.getClass())) {
+        Method configureMethod = 
+          jobConfigurableClass.getMethod("configure", jobConfClass);
+        configureMethod.invoke(theObject, conf);
+      }
+    } catch (ClassNotFoundException e) {
+      //JobConf/JobConfigurable not in classpath. no need to configure
+    } catch (Exception e) {
+      throw new RuntimeException("Error in configuring object", e);
+    }
+  }
+
+  /** Create an object for the given class and initialize it from conf
+   * 
+   * @param theClass class of which an object is created
+   * @param conf Configuration
+   * @return a new object
+   */
+  @SuppressWarnings("unchecked")
+  public static <T> T newInstance(Class<T> theClass, Configuration conf) {
+    T result;
+    try {
+      Constructor<T> meth = (Constructor<T>) CONSTRUCTOR_CACHE.get(theClass);
+      if (meth == null) {
+        meth = theClass.getDeclaredConstructor(EMPTY_ARRAY);
+        meth.setAccessible(true);
+        CONSTRUCTOR_CACHE.put(theClass, meth);
+      }
+      result = meth.newInstance();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+    setConf(result, conf);
+    return result;
+  }
+
+  static private ThreadMXBean threadBean = 
+    ManagementFactory.getThreadMXBean();
+    
+  public static void setContentionTracing(boolean val) {
+    threadBean.setThreadContentionMonitoringEnabled(val);
+  }
+    
+  private static String getTaskName(long id, String name) {
+    if (name == null) {
+      return Long.toString(id);
+    }
+    return id + " (" + name + ")";
+  }
+    
+  /**
+   * Print all of the thread's information and stack traces.
+   * 
+   * @param stream the stream to
+   * @param title a string title for the stack trace
+   */
+  public static void printThreadInfo(PrintWriter stream,
+                                     String title) {
+    final int STACK_DEPTH = 20;
+    boolean contention = threadBean.isThreadContentionMonitoringEnabled();
+    long[] threadIds = threadBean.getAllThreadIds();
+    stream.println("Process Thread Dump: " + title);
+    stream.println(threadIds.length + " active threads");
+    for (long tid: threadIds) {
+      ThreadInfo info = threadBean.getThreadInfo(tid, STACK_DEPTH);
+      if (info == null) {
+        stream.println("  Inactive");
+        continue;
+      }
+      stream.println("Thread " + 
+                     getTaskName(info.getThreadId(),
+                                 info.getThreadName()) + ":");
+      Thread.State state = info.getThreadState();
+      stream.println("  State: " + state);
+      stream.println("  Blocked count: " + info.getBlockedCount());
+      stream.println("  Waited count: " + info.getWaitedCount());
+      if (contention) {
+        stream.println("  Blocked time: " + info.getBlockedTime());
+        stream.println("  Waited time: " + info.getWaitedTime());
+      }
+      if (state == Thread.State.WAITING) {
+        stream.println("  Waiting on " + info.getLockName());
+      } else  if (state == Thread.State.BLOCKED) {
+        stream.println("  Blocked on " + info.getLockName());
+        stream.println("  Blocked by " + 
+                       getTaskName(info.getLockOwnerId(),
+                                   info.getLockOwnerName()));
+      }
+      stream.println("  Stack:");
+      for (StackTraceElement frame: info.getStackTrace()) {
+        stream.println("    " + frame.toString());
+      }
+    }
+    stream.flush();
+  }
+    
+  private static long previousLogTime = 0;
+    
+  /**
+   * Log the current thread stacks at INFO level.
+   * @param log the logger that logs the stack trace
+   * @param title a descriptive title for the call stacks
+   * @param minInterval the minimum time from the last 
+   */
+  public static void logThreadInfo(Log log,
+                                   String title,
+                                   long minInterval) {
+    boolean dumpStack = false;
+    if (log.isInfoEnabled()) {
+      synchronized (ReflectionUtils.class) {
+        long now = System.currentTimeMillis();
+        if (now - previousLogTime >= minInterval * 1000) {
+          previousLogTime = now;
+          dumpStack = true;
+        }
+      }
+      if (dumpStack) {
+        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+        printThreadInfo(new PrintWriter(buffer), title);
+        log.info(buffer.toString());
+      }
+    }
+  }
+
+  /**
+   * Return the correctly-typed {@link Class} of the given object.
+   *  
+   * @param o object whose correctly-typed <code>Class</code> is to be obtained
+   * @return the correctly typed <code>Class</code> of the given object.
+   */
+  @SuppressWarnings("unchecked")
+  public static <T> Class<T> getClass(T o) {
+    return (Class<T>)o.getClass();
+  }
+  
+  // methods to support testing
+  static void clearCache() {
+    CONSTRUCTOR_CACHE.clear();
+  }
+    
+  static int getCacheSize() {
+    return CONSTRUCTOR_CACHE.size();
+  }
+  /**
+   * A pair of input/output buffers that we use to clone writables.
+   */
+  private static class CopyInCopyOutBuffer {
+    DataOutputBuffer outBuffer = new DataOutputBuffer();
+    DataInputBuffer inBuffer = new DataInputBuffer();
+    /**
+     * Move the data from the output buffer to the input buffer.
+     */
+    void moveData() {
+      inBuffer.reset(outBuffer.getData(), outBuffer.getLength());
+    }
+  }
+  
+  /**
+   * Allocate a buffer for each thread that tries to clone objects.
+   */
+  private static ThreadLocal<CopyInCopyOutBuffer> cloneBuffers
+      = new ThreadLocal<CopyInCopyOutBuffer>() {
+      protected synchronized CopyInCopyOutBuffer initialValue() {
+        return new CopyInCopyOutBuffer();
+      }
+    };
+
+  private static SerializationFactory getFactory(Configuration conf) {
+    if (serialFactory == null) {
+      serialFactory = new SerializationFactory(conf);
+    }
+    return serialFactory;
+  }
+  
+  /**
+   * Make a copy of the writable object using serialization to a buffer
+   * @param dst the object to copy from
+   * @param src the object to copy into, which is destroyed
+   * @throws IOException
+   */
+  @SuppressWarnings("unchecked")
+  public static <T> T copy(Configuration conf, 
+                                T src, T dst) throws IOException {
+    CopyInCopyOutBuffer buffer = cloneBuffers.get();
+    buffer.outBuffer.reset();
+    SerializationFactory factory = getFactory(conf);
+    Class<T> cls = (Class<T>) src.getClass();
+    Serializer<T> serializer = factory.getSerializer(cls);
+    serializer.open(buffer.outBuffer);
+    serializer.serialize(src);
+    buffer.moveData();
+    Deserializer<T> deserializer = factory.getDeserializer(cls);
+    deserializer.open(buffer.inBuffer);
+    dst = deserializer.deserialize(dst);
+    return dst;
+  }
+
+  @Deprecated
+  public static void cloneWritableInto(Writable dst, 
+                                       Writable src) throws IOException {
+    CopyInCopyOutBuffer buffer = cloneBuffers.get();
+    buffer.outBuffer.reset();
+    src.write(buffer.outBuffer);
+    buffer.moveData();
+    dst.readFields(buffer.inBuffer);
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/RunJar.java b/src/java/org/apache/hadoop/util/RunJar.java
new file mode 100644
index 00000000000..70f8ec4f64a
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/RunJar.java
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.util.jar.*;
+import java.lang.reflect.*;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.io.*;
+import java.util.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+
+/** Run a Hadoop job jar. */
+public class RunJar {
+
+  /** Unpack a jar file into a directory. */
+  public static void unJar(File jarFile, File toDir) throws IOException {
+    JarFile jar = new JarFile(jarFile);
+    try {
+      Enumeration entries = jar.entries();
+      while (entries.hasMoreElements()) {
+        JarEntry entry = (JarEntry)entries.nextElement();
+        if (!entry.isDirectory()) {
+          InputStream in = jar.getInputStream(entry);
+          try {
+            File file = new File(toDir, entry.getName());
+            if (!file.getParentFile().mkdirs()) {
+              if (!file.getParentFile().isDirectory()) {
+                throw new IOException("Mkdirs failed to create " + 
+                                      file.getParentFile().toString());
+              }
+            }
+            OutputStream out = new FileOutputStream(file);
+            try {
+              byte[] buffer = new byte[8192];
+              int i;
+              while ((i = in.read(buffer)) != -1) {
+                out.write(buffer, 0, i);
+              }
+            } finally {
+              out.close();
+            }
+          } finally {
+            in.close();
+          }
+        }
+      }
+    } finally {
+      jar.close();
+    }
+  }
+
+  /** Run a Hadoop job jar.  If the main class is not in the jar's manifest,
+   * then it must be provided on the command line. */
+  public static void main(String[] args) throws Throwable {
+    String usage = "RunJar jarFile [mainClass] args...";
+
+    if (args.length < 1) {
+      System.err.println(usage);
+      System.exit(-1);
+    }
+
+    int firstArg = 0;
+    String fileName = args[firstArg++];
+    File file = new File(fileName);
+    String mainClassName = null;
+
+    JarFile jarFile;
+    try {
+      jarFile = new JarFile(fileName);
+    } catch(IOException io) {
+      throw new IOException("Error opening job jar: " + fileName)
+        .initCause(io);
+    }
+
+    Manifest manifest = jarFile.getManifest();
+    if (manifest != null) {
+      mainClassName = manifest.getMainAttributes().getValue("Main-Class");
+    }
+    jarFile.close();
+
+    if (mainClassName == null) {
+      if (args.length < 2) {
+        System.err.println(usage);
+        System.exit(-1);
+      }
+      mainClassName = args[firstArg++];
+    }
+    mainClassName = mainClassName.replaceAll("/", ".");
+
+    File tmpDir = new File(new Configuration().get("hadoop.tmp.dir"));
+    boolean b = tmpDir.mkdirs();
+    if (!b || !tmpDir.isDirectory()) { 
+      System.err.println("Mkdirs failed to create " + tmpDir);
+      System.exit(-1);
+    }
+    final File workDir = File.createTempFile("hadoop-unjar", "", tmpDir);
+    b = workDir.delete();
+    if (!b) {
+      System.err.println("Delete failed for " + workDir);
+      System.exit(-1);
+    }
+    b = workDir.mkdirs();
+    if (!b || !workDir.isDirectory()) {
+      System.err.println("Mkdirs failed to create " + workDir);
+      System.exit(-1);
+    }
+
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+        public void run() {
+          try {
+            FileUtil.fullyDelete(workDir);
+          } catch (IOException e) {
+          }
+        }
+      });
+
+    unJar(file, workDir);
+    
+    ArrayList<URL> classPath = new ArrayList<URL>();
+    classPath.add(new File(workDir+"/").toURL());
+    classPath.add(file.toURL());
+    classPath.add(new File(workDir, "classes/").toURL());
+    File[] libs = new File(workDir, "lib").listFiles();
+    if (libs != null) {
+      for (int i = 0; i < libs.length; i++) {
+        classPath.add(libs[i].toURL());
+      }
+    }
+    
+    ClassLoader loader =
+      new URLClassLoader(classPath.toArray(new URL[0]));
+
+    Thread.currentThread().setContextClassLoader(loader);
+    Class<?> mainClass = Class.forName(mainClassName, true, loader);
+    Method main = mainClass.getMethod("main", new Class[] {
+      Array.newInstance(String.class, 0).getClass()
+    });
+    String[] newArgs = Arrays.asList(args)
+      .subList(firstArg, args.length).toArray(new String[0]);
+    try {
+      main.invoke(null, new Object[] { newArgs });
+    } catch (InvocationTargetException e) {
+      throw e.getTargetException();
+    }
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/util/ServicePlugin.java b/src/java/org/apache/hadoop/util/ServicePlugin.java
new file mode 100644
index 00000000000..a83294eb7d8
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/ServicePlugin.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.io.Closeable;
+
+/**
+ * Service plug-in interface.
+ * 
+ * Service plug-ins may be used to expose functionality of datanodes or
+ * namenodes using arbitrary RPC protocols. Plug-ins are instantiated by the
+ * service instance, and are notified of service life-cycle events using the
+ * methods defined by this class.
+ * 
+ * Service plug-ins are started after the service instance is started, and
+ * stopped before the service instance is stopped.
+ */
+public interface ServicePlugin extends Closeable {
+
+  /**
+   * This method is invoked when the service instance has been started.
+   *
+   * @param service The service instance invoking this method
+   */
+  void start(Object service);
+  
+  /**
+   * This method is invoked when the service instance is about to be shut down.
+   */
+  void stop();
+}
diff --git a/src/java/org/apache/hadoop/util/ServletUtil.java b/src/java/org/apache/hadoop/util/ServletUtil.java
new file mode 100644
index 00000000000..d755b8ec101
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/ServletUtil.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.io.*;
+import java.util.Calendar;
+
+import javax.servlet.*;
+
+public class ServletUtil {
+  /**
+   * Initial HTML header
+   */
+  public static PrintWriter initHTML(ServletResponse response, String title
+      ) throws IOException {
+    response.setContentType("text/html");
+    PrintWriter out = response.getWriter();
+    out.println("<html>\n"
+        + "<link rel='stylesheet' type='text/css' href='/static/hadoop.css'>\n"
+        + "<title>" + title + "</title>\n"
+        + "<body>\n"
+        + "<h1>" + title + "</h1>\n");
+    return out;
+  }
+
+  /**
+   * Get a parameter from a ServletRequest.
+   * Return null if the parameter contains only white spaces.
+   */
+  public static String getParameter(ServletRequest request, String name) {
+    String s = request.getParameter(name);
+    if (s == null) {
+      return null;
+    }
+    s = s.trim();
+    return s.length() == 0? null: s;
+  }
+
+  public static final String HTML_TAIL = "<hr />\n"
+    + "<a href='http://hadoop.apache.org/core'>Hadoop</a>, " 
+    + Calendar.getInstance().get(Calendar.YEAR) + ".\n"
+    + "</body></html>";
+  
+  /**
+   * HTML footer to be added in the jsps.
+   * @return the HTML footer.
+   */
+  public static String htmlFooter() {
+    return HTML_TAIL;
+  }
+  
+  /**
+   * Generate the percentage graph and returns HTML representation string
+   * of the same.
+   * 
+   * @param perc The percentage value for which graph is to be generated
+   * @param width The width of the display table
+   * @return HTML String representation of the percentage graph
+   * @throws IOException
+   */
+  public static String percentageGraph(int perc, int width) throws IOException {
+    assert perc >= 0; assert perc <= 100;
+
+    StringBuilder builder = new StringBuilder();
+
+    builder.append("<table border=\"1px\" width=\""); builder.append(width);
+    builder.append("px\"><tr>");
+    if(perc > 0) {
+      builder.append("<td cellspacing=\"0\" class=\"perc_filled\" width=\"");
+      builder.append(perc); builder.append("%\"></td>");
+    }if(perc < 100) {
+      builder.append("<td cellspacing=\"0\" class=\"perc_nonfilled\" width=\"");
+      builder.append(100 - perc); builder.append("%\"></td>");
+    }
+    builder.append("</tr></table>");
+    return builder.toString();
+  }
+  
+  /**
+   * Generate the percentage graph and returns HTML representation string
+   * of the same.
+   * @param perc The percentage value for which graph is to be generated
+   * @param width The width of the display table
+   * @return HTML String representation of the percentage graph
+   * @throws IOException
+   */
+  public static String percentageGraph(float perc, int width) throws IOException {
+    return percentageGraph((int)perc, width);
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/Shell.java b/src/java/org/apache/hadoop/util/Shell.java
new file mode 100644
index 00000000000..7964721982f
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/Shell.java
@@ -0,0 +1,357 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+
+/** 
+ * A base class for running a Unix command.
+ * 
+ * <code>Shell</code> can be used to run unix commands like <code>du</code> or
+ * <code>df</code>. It also offers facilities to gate commands by 
+ * time-intervals.
+ */
+abstract public class Shell {
+  
+  public static final Log LOG = LogFactory.getLog(Shell.class);
+  
+  /** a Unix command to get the current user's name */
+  public final static String USER_NAME_COMMAND = "whoami";
+  /** a Unix command to get the current user's groups list */
+  public static String[] getGROUPS_COMMAND() {
+    return new String[]{"bash", "-c", "groups"};
+  }
+  /** a Unix command to set permission */
+  public static final String SET_PERMISSION_COMMAND = "chmod";
+  /** a Unix command to set owner */
+  public static final String SET_OWNER_COMMAND = "chown";
+  public static final String SET_GROUP_COMMAND = "chgrp";
+  /** Return a Unix command to get permission information. */
+  public static String[] getGET_PERMISSION_COMMAND() {
+    //force /bin/ls, except on windows.
+    return new String[] {(WINDOWS ? "ls" : "/bin/ls"), "-ld"};
+  }
+
+  /** 
+   * Get the Unix command for setting the maximum virtual memory available
+   * to a given child process. This is only relevant when we are forking a
+   * process from within the {@link org.apache.hadoop.mapred.Mapper} or the 
+   * {@link org.apache.hadoop.mapred.Reducer} implementations 
+   * e.g. <a href="{@docRoot}/org/apache/hadoop/mapred/pipes/package-summary.html">Hadoop Pipes</a> 
+   * or <a href="{@docRoot}/org/apache/hadoop/streaming/package-summary.html">Hadoop Streaming</a>.
+   * 
+   * It also checks to ensure that we are running on a *nix platform else 
+   * (e.g. in Cygwin/Windows) it returns <code>null</code>.
+   * @param conf configuration
+   * @return a <code>String[]</code> with the ulimit command arguments or 
+   *         <code>null</code> if we are running on a non *nix platform or
+   *         if the limit is unspecified.
+   */
+  public static String[] getUlimitMemoryCommand(Configuration conf) {
+    // ulimit isn't supported on Windows
+    if (WINDOWS) {
+      return null;
+    }
+    
+    // get the memory limit from the configuration
+    String ulimit = conf.get("mapred.child.ulimit");
+    if (ulimit == null) {
+      return null;
+    }
+    
+    // Parse it to ensure it is legal/sane
+    int memoryLimit = Integer.valueOf(ulimit);
+
+    return new String[] {"ulimit", "-v", String.valueOf(memoryLimit)};
+  }
+  
+  /** Set to true on Windows platforms */
+  public static final boolean WINDOWS /* borrowed from Path.WINDOWS */
+                = System.getProperty("os.name").startsWith("Windows");
+  
+  private long    interval;   // refresh interval in msec
+  private long    lastTime;   // last time the command was performed
+  private Map<String, String> environment; // env for the command execution
+  private File dir;
+  private Process process; // sub process used to execute the command
+  private int exitCode;
+  
+  public Shell() {
+    this(0L);
+  }
+  
+  /**
+   * @param interval the minimum duration to wait before re-executing the 
+   *        command.
+   */
+  public Shell( long interval ) {
+    this.interval = interval;
+    this.lastTime = (interval<0) ? 0 : -interval;
+  }
+  
+  /** set the environment for the command 
+   * @param env Mapping of environment variables
+   */
+  protected void setEnvironment(Map<String, String> env) {
+    this.environment = env;
+  }
+
+  /** set the working directory 
+   * @param dir The directory where the command would be executed
+   */
+  protected void setWorkingDirectory(File dir) {
+    this.dir = dir;
+  }
+
+  /** check to see if a command needs to be executed and execute if needed */
+  protected void run() throws IOException {
+    if (lastTime + interval > System.currentTimeMillis())
+      return;
+    exitCode = 0; // reset for next run
+    runCommand();
+  }
+
+  /** Run a command */
+  private void runCommand() throws IOException { 
+    ProcessBuilder builder = new ProcessBuilder(getExecString());
+    boolean completed = false;
+    
+    if (environment != null) {
+      builder.environment().putAll(this.environment);
+    }
+    if (dir != null) {
+      builder.directory(this.dir);
+    }
+    
+    process = builder.start();
+    final BufferedReader errReader = 
+            new BufferedReader(new InputStreamReader(process
+                                                     .getErrorStream()));
+    BufferedReader inReader = 
+            new BufferedReader(new InputStreamReader(process
+                                                     .getInputStream()));
+    final StringBuffer errMsg = new StringBuffer();
+    
+    // read error and input streams as this would free up the buffers
+    // free the error stream buffer
+    Thread errThread = new Thread() {
+      @Override
+      public void run() {
+        try {
+          String line = errReader.readLine();
+          while((line != null) && !isInterrupted()) {
+            errMsg.append(line);
+            errMsg.append(System.getProperty("line.separator"));
+            line = errReader.readLine();
+          }
+        } catch(IOException ioe) {
+          LOG.warn("Error reading the error stream", ioe);
+        }
+      }
+    };
+    try {
+      errThread.start();
+    } catch (IllegalStateException ise) { }
+    try {
+      parseExecResult(inReader); // parse the output
+      // clear the input stream buffer
+      String line = inReader.readLine();
+      while(line != null) { 
+        line = inReader.readLine();
+      }
+      // wait for the process to finish and check the exit code
+      exitCode = process.waitFor();
+      try {
+        // make sure that the error thread exits
+        errThread.join();
+      } catch (InterruptedException ie) {
+        LOG.warn("Interrupted while reading the error stream", ie);
+      }
+      completed = true;
+      if (exitCode != 0) {
+        throw new ExitCodeException(exitCode, errMsg.toString());
+      }
+    } catch (InterruptedException ie) {
+      throw new IOException(ie.toString());
+    } finally {
+      // close the input stream
+      try {
+        inReader.close();
+      } catch (IOException ioe) {
+        LOG.warn("Error while closing the input stream", ioe);
+      }
+      if (!completed) {
+        errThread.interrupt();
+      }
+      try {
+        errReader.close();
+      } catch (IOException ioe) {
+        LOG.warn("Error while closing the error stream", ioe);
+      }
+      process.destroy();
+      lastTime = System.currentTimeMillis();
+    }
+  }
+
+  /** return an array containing the command name & its parameters */ 
+  protected abstract String[] getExecString();
+  
+  /** Parse the execution result */
+  protected abstract void parseExecResult(BufferedReader lines)
+  throws IOException;
+
+  /** get the current sub-process executing the given command 
+   * @return process executing the command
+   */
+  public Process getProcess() {
+    return process;
+  }
+
+  /** get the exit code 
+   * @return the exit code of the process
+   */
+  public int getExitCode() {
+    return exitCode;
+  }
+
+  /**
+   * This is an IOException with exit code added.
+   */
+  public static class ExitCodeException extends IOException {
+    int exitCode;
+    
+    public ExitCodeException(int exitCode, String message) {
+      super(message);
+      this.exitCode = exitCode;
+    }
+    
+    public int getExitCode() {
+      return exitCode;
+    }
+  }
+  
+  /**
+   * A simple shell command executor.
+   * 
+   * <code>ShellCommandExecutor</code>should be used in cases where the output 
+   * of the command needs no explicit parsing and where the command, working 
+   * directory and the environment remains unchanged. The output of the command 
+   * is stored as-is and is expected to be small.
+   */
+  public static class ShellCommandExecutor extends Shell {
+    
+    private String[] command;
+    private StringBuffer output;
+    
+    public ShellCommandExecutor(String[] execString) {
+      command = execString.clone();
+    }
+
+    public ShellCommandExecutor(String[] execString, File dir) {
+      this(execString);
+      this.setWorkingDirectory(dir);
+    }
+
+    public ShellCommandExecutor(String[] execString, File dir, 
+                                 Map<String, String> env) {
+      this(execString, dir);
+      this.setEnvironment(env);
+    }
+    
+    /** Execute the shell command. */
+    public void execute() throws IOException {
+      this.run();    
+    }
+
+    protected String[] getExecString() {
+      return command;
+    }
+
+    protected void parseExecResult(BufferedReader lines) throws IOException {
+      output = new StringBuffer();
+      char[] buf = new char[512];
+      int nRead;
+      while ( (nRead = lines.read(buf, 0, buf.length)) > 0 ) {
+        output.append(buf, 0, nRead);
+      }
+    }
+    
+    /** Get the output of the shell command.*/
+    public String getOutput() {
+      return (output == null) ? "" : output.toString();
+    }
+
+    /**
+     * Returns the commands of this instance.
+     * Arguments with spaces in are presented with quotes round; other
+     * arguments are presented raw
+     *
+     * @return a string representation of the object.
+     */
+    public String toString() {
+      StringBuilder builder = new StringBuilder();
+      String[] args = getExecString();
+      for (String s : args) {
+        if (s.indexOf(' ') >= 0) {
+          builder.append('"').append(s).append('"');
+        } else {
+          builder.append(s);
+        }
+        builder.append(' ');
+      }
+      return builder.toString();
+    }
+  }
+  
+  /** 
+   * Static method to execute a shell command. 
+   * Covers most of the simple cases without requiring the user to implement  
+   * the <code>Shell</code> interface.
+   * @param cmd shell command to execute.
+   * @return the output of the executed command.
+   */
+  public static String execCommand(String ... cmd) throws IOException {
+    return execCommand(null, cmd);
+  }
+  
+  /** 
+   * Static method to execute a shell command. 
+   * Covers most of the simple cases without requiring the user to implement  
+   * the <code>Shell</code> interface.
+   * @param env the map of environment key=value
+   * @param cmd shell command to execute.
+   * @return the output of the executed command.
+   */
+  public static String execCommand(Map<String,String> env, String ... cmd) 
+  throws IOException {
+    ShellCommandExecutor exec = new ShellCommandExecutor(cmd);
+    if (env != null) {
+      exec.setEnvironment(env);
+    }
+    exec.execute();
+    return exec.getOutput();
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/StringUtils.java b/src/java/org/apache/hadoop/util/StringUtils.java
new file mode 100644
index 00000000000..8e1caaa19ce
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/StringUtils.java
@@ -0,0 +1,679 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.text.DateFormat;
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.net.NetUtils;
+
+/**
+ * General string utils
+ */
+public class StringUtils {
+
+  private static final DecimalFormat decimalFormat;
+  static {
+          NumberFormat numberFormat = NumberFormat.getNumberInstance(Locale.ENGLISH);
+          decimalFormat = (DecimalFormat) numberFormat;
+          decimalFormat.applyPattern("#.##");
+  }
+
+  /**
+   * Make a string representation of the exception.
+   * @param e The exception to stringify
+   * @return A string with exception name and call stack.
+   */
+  public static String stringifyException(Throwable e) {
+    StringWriter stm = new StringWriter();
+    PrintWriter wrt = new PrintWriter(stm);
+    e.printStackTrace(wrt);
+    wrt.close();
+    return stm.toString();
+  }
+  
+  /**
+   * Given a full hostname, return the word upto the first dot.
+   * @param fullHostname the full hostname
+   * @return the hostname to the first dot
+   */
+  public static String simpleHostname(String fullHostname) {
+    int offset = fullHostname.indexOf('.');
+    if (offset != -1) {
+      return fullHostname.substring(0, offset);
+    }
+    return fullHostname;
+  }
+
+  private static DecimalFormat oneDecimal = new DecimalFormat("0.0");
+  
+  /**
+   * Given an integer, return a string that is in an approximate, but human 
+   * readable format. 
+   * It uses the bases 'k', 'm', and 'g' for 1024, 1024**2, and 1024**3.
+   * @param number the number to format
+   * @return a human readable form of the integer
+   */
+  public static String humanReadableInt(long number) {
+    long absNumber = Math.abs(number);
+    double result = number;
+    String suffix = "";
+    if (absNumber < 1024) {
+      // nothing
+    } else if (absNumber < 1024 * 1024) {
+      result = number / 1024.0;
+      suffix = "k";
+    } else if (absNumber < 1024 * 1024 * 1024) {
+      result = number / (1024.0 * 1024);
+      suffix = "m";
+    } else {
+      result = number / (1024.0 * 1024 * 1024);
+      suffix = "g";
+    }
+    return oneDecimal.format(result) + suffix;
+  }
+  
+  /**
+   * Format a percentage for presentation to the user.
+   * @param done the percentage to format (0.0 to 1.0)
+   * @param digits the number of digits past the decimal point
+   * @return a string representation of the percentage
+   */
+  public static String formatPercent(double done, int digits) {
+    DecimalFormat percentFormat = new DecimalFormat("0.00%");
+    double scale = Math.pow(10.0, digits+2);
+    double rounded = Math.floor(done * scale);
+    percentFormat.setDecimalSeparatorAlwaysShown(false);
+    percentFormat.setMinimumFractionDigits(digits);
+    percentFormat.setMaximumFractionDigits(digits);
+    return percentFormat.format(rounded / scale);
+  }
+  
+  /**
+   * Given an array of strings, return a comma-separated list of its elements.
+   * @param strs Array of strings
+   * @return Empty string if strs.length is 0, comma separated list of strings
+   * otherwise
+   */
+  
+  public static String arrayToString(String[] strs) {
+    if (strs.length == 0) { return ""; }
+    StringBuffer sbuf = new StringBuffer();
+    sbuf.append(strs[0]);
+    for (int idx = 1; idx < strs.length; idx++) {
+      sbuf.append(",");
+      sbuf.append(strs[idx]);
+    }
+    return sbuf.toString();
+  }
+
+  /**
+   * Given an array of bytes it will convert the bytes to a hex string
+   * representation of the bytes
+   * @param bytes
+   * @param start start index, inclusively
+   * @param end end index, exclusively
+   * @return hex string representation of the byte array
+   */
+  public static String byteToHexString(byte[] bytes, int start, int end) {
+    if (bytes == null) {
+      throw new IllegalArgumentException("bytes == null");
+    }
+    StringBuilder s = new StringBuilder(); 
+    for(int i = start; i < end; i++) {
+      s.append(String.format("%02x", bytes[i]));
+    }
+    return s.toString();
+  }
+
+  /** Same as byteToHexString(bytes, 0, bytes.length). */
+  public static String byteToHexString(byte bytes[]) {
+    return byteToHexString(bytes, 0, bytes.length);
+  }
+
+  /**
+   * Given a hexstring this will return the byte array corresponding to the
+   * string
+   * @param hex the hex String array
+   * @return a byte array that is a hex string representation of the given
+   *         string. The size of the byte array is therefore hex.length/2
+   */
+  public static byte[] hexStringToByte(String hex) {
+    byte[] bts = new byte[hex.length() / 2];
+    for (int i = 0; i < bts.length; i++) {
+      bts[i] = (byte) Integer.parseInt(hex.substring(2 * i, 2 * i + 2), 16);
+    }
+    return bts;
+  }
+  /**
+   * 
+   * @param uris
+   */
+  public static String uriToString(URI[] uris){
+    if (uris == null) {
+      return null;
+    }
+    StringBuffer ret = new StringBuffer(uris[0].toString());
+    for(int i = 1; i < uris.length;i++){
+      ret.append(",");
+      ret.append(uris[i].toString());
+    }
+    return ret.toString();
+  }
+  
+  /**
+   * 
+   * @param str
+   */
+  public static URI[] stringToURI(String[] str){
+    if (str == null) 
+      return null;
+    URI[] uris = new URI[str.length];
+    for (int i = 0; i < str.length;i++){
+      try{
+        uris[i] = new URI(str[i]);
+      }catch(URISyntaxException ur){
+        System.out.println("Exception in specified URI's " + StringUtils.stringifyException(ur));
+        //making sure its asssigned to null in case of an error
+        uris[i] = null;
+      }
+    }
+    return uris;
+  }
+  
+  /**
+   * 
+   * @param str
+   */
+  public static Path[] stringToPath(String[] str){
+    if (str == null) {
+      return null;
+    }
+    Path[] p = new Path[str.length];
+    for (int i = 0; i < str.length;i++){
+      p[i] = new Path(str[i]);
+    }
+    return p;
+  }
+  /**
+   * 
+   * Given a finish and start time in long milliseconds, returns a 
+   * String in the format Xhrs, Ymins, Z sec, for the time difference between two times. 
+   * If finish time comes before start time then negative valeus of X, Y and Z wil return. 
+   * 
+   * @param finishTime finish time
+   * @param startTime start time
+   */
+  public static String formatTimeDiff(long finishTime, long startTime){
+    long timeDiff = finishTime - startTime; 
+    return formatTime(timeDiff); 
+  }
+  
+  /**
+   * 
+   * Given the time in long milliseconds, returns a 
+   * String in the format Xhrs, Ymins, Z sec. 
+   * 
+   * @param timeDiff The time difference to format
+   */
+  public static String formatTime(long timeDiff){
+    StringBuffer buf = new StringBuffer();
+    long hours = timeDiff / (60*60*1000);
+    long rem = (timeDiff % (60*60*1000));
+    long minutes =  rem / (60*1000);
+    rem = rem % (60*1000);
+    long seconds = rem / 1000;
+    
+    if (hours != 0){
+      buf.append(hours);
+      buf.append("hrs, ");
+    }
+    if (minutes != 0){
+      buf.append(minutes);
+      buf.append("mins, ");
+    }
+    // return "0sec if no difference
+    buf.append(seconds);
+    buf.append("sec");
+    return buf.toString(); 
+  }
+  /**
+   * Formats time in ms and appends difference (finishTime - startTime) 
+   * as returned by formatTimeDiff().
+   * If finish time is 0, empty string is returned, if start time is 0 
+   * then difference is not appended to return value. 
+   * @param dateFormat date format to use
+   * @param finishTime fnish time
+   * @param startTime start time
+   * @return formatted value. 
+   */
+  public static String getFormattedTimeWithDiff(DateFormat dateFormat, 
+                                                long finishTime, long startTime){
+    StringBuffer buf = new StringBuffer();
+    if (0 != finishTime) {
+      buf.append(dateFormat.format(new Date(finishTime)));
+      if (0 != startTime){
+        buf.append(" (" + formatTimeDiff(finishTime , startTime) + ")");
+      }
+    }
+    return buf.toString();
+  }
+  
+  /**
+   * Returns an arraylist of strings.
+   * @param str the comma seperated string values
+   * @return the arraylist of the comma seperated string values
+   */
+  public static String[] getStrings(String str){
+    Collection<String> values = getStringCollection(str);
+    if(values.size() == 0) {
+      return null;
+    }
+    return values.toArray(new String[values.size()]);
+  }
+
+  /**
+   * Returns a collection of strings.
+   * @param str comma seperated string values
+   * @return an <code>ArrayList</code> of string values
+   */
+  public static Collection<String> getStringCollection(String str){
+    List<String> values = new ArrayList<String>();
+    if (str == null)
+      return values;
+    StringTokenizer tokenizer = new StringTokenizer (str,",");
+    values = new ArrayList<String>();
+    while (tokenizer.hasMoreTokens()) {
+      values.add(tokenizer.nextToken());
+    }
+    return values;
+  }
+
+  final public static char COMMA = ',';
+  final public static String COMMA_STR = ",";
+  final public static char ESCAPE_CHAR = '\\';
+  
+  /**
+   * Split a string using the default separator
+   * @param str a string that may have escaped separator
+   * @return an array of strings
+   */
+  public static String[] split(String str) {
+    return split(str, ESCAPE_CHAR, COMMA);
+  }
+  
+  /**
+   * Split a string using the given separator
+   * @param str a string that may have escaped separator
+   * @param escapeChar a char that be used to escape the separator
+   * @param separator a separator char
+   * @return an array of strings
+   */
+  public static String[] split(
+      String str, char escapeChar, char separator) {
+    if (str==null) {
+      return null;
+    }
+    ArrayList<String> strList = new ArrayList<String>();
+    StringBuilder split = new StringBuilder();
+    int index = 0;
+    while ((index = findNext(str, separator, escapeChar, index, split)) >= 0) {
+      ++index; // move over the separator for next search
+      strList.add(split.toString());
+      split.setLength(0); // reset the buffer 
+    }
+    strList.add(split.toString());
+    // remove trailing empty split(s)
+    int last = strList.size(); // last split
+    while (--last>=0 && "".equals(strList.get(last))) {
+      strList.remove(last);
+    }
+    return strList.toArray(new String[strList.size()]);
+  }
+  
+  /**
+   * Finds the first occurrence of the separator character ignoring the escaped
+   * separators starting from the index. Note the substring between the index
+   * and the position of the separator is passed.
+   * @param str the source string
+   * @param separator the character to find
+   * @param escapeChar character used to escape
+   * @param start from where to search
+   * @param split used to pass back the extracted string
+   */
+  public static int findNext(String str, char separator, char escapeChar, 
+                             int start, StringBuilder split) {
+    int numPreEscapes = 0;
+    for (int i = start; i < str.length(); i++) {
+      char curChar = str.charAt(i);
+      if (numPreEscapes == 0 && curChar == separator) { // separator 
+        return i;
+      } else {
+        split.append(curChar);
+        numPreEscapes = (curChar == escapeChar)
+                        ? (++numPreEscapes) % 2
+                        : 0;
+      }
+    }
+    return -1;
+  }
+  
+  /**
+   * Escape commas in the string using the default escape char
+   * @param str a string
+   * @return an escaped string
+   */
+  public static String escapeString(String str) {
+    return escapeString(str, ESCAPE_CHAR, COMMA);
+  }
+  
+  /**
+   * Escape <code>charToEscape</code> in the string 
+   * with the escape char <code>escapeChar</code>
+   * 
+   * @param str string
+   * @param escapeChar escape char
+   * @param charToEscape the char to be escaped
+   * @return an escaped string
+   */
+  public static String escapeString(
+      String str, char escapeChar, char charToEscape) {
+    return escapeString(str, escapeChar, new char[] {charToEscape});
+  }
+  
+  // check if the character array has the character 
+  private static boolean hasChar(char[] chars, char character) {
+    for (char target : chars) {
+      if (character == target) {
+        return true;
+      }
+    }
+    return false;
+  }
+  
+  /**
+   * @param charsToEscape array of characters to be escaped
+   */
+  public static String escapeString(String str, char escapeChar, 
+                                    char[] charsToEscape) {
+    if (str == null) {
+      return null;
+    }
+    StringBuilder result = new StringBuilder();
+    for (int i=0; i<str.length(); i++) {
+      char curChar = str.charAt(i);
+      if (curChar == escapeChar || hasChar(charsToEscape, curChar)) {
+        // special char
+        result.append(escapeChar);
+      }
+      result.append(curChar);
+    }
+    return result.toString();
+  }
+  
+  /**
+   * Unescape commas in the string using the default escape char
+   * @param str a string
+   * @return an unescaped string
+   */
+  public static String unEscapeString(String str) {
+    return unEscapeString(str, ESCAPE_CHAR, COMMA);
+  }
+  
+  /**
+   * Unescape <code>charToEscape</code> in the string 
+   * with the escape char <code>escapeChar</code>
+   * 
+   * @param str string
+   * @param escapeChar escape char
+   * @param charToEscape the escaped char
+   * @return an unescaped string
+   */
+  public static String unEscapeString(
+      String str, char escapeChar, char charToEscape) {
+    return unEscapeString(str, escapeChar, new char[] {charToEscape});
+  }
+  
+  /**
+   * @param charsToEscape array of characters to unescape
+   */
+  public static String unEscapeString(String str, char escapeChar, 
+                                      char[] charsToEscape) {
+    if (str == null) {
+      return null;
+    }
+    StringBuilder result = new StringBuilder(str.length());
+    boolean hasPreEscape = false;
+    for (int i=0; i<str.length(); i++) {
+      char curChar = str.charAt(i);
+      if (hasPreEscape) {
+        if (curChar != escapeChar && !hasChar(charsToEscape, curChar)) {
+          // no special char
+          throw new IllegalArgumentException("Illegal escaped string " + str + 
+              " unescaped " + escapeChar + " at " + (i-1));
+        } 
+        // otherwise discard the escape char
+        result.append(curChar);
+        hasPreEscape = false;
+      } else {
+        if (hasChar(charsToEscape, curChar)) {
+          throw new IllegalArgumentException("Illegal escaped string " + str + 
+              " unescaped " + curChar + " at " + i);
+        } else if (curChar == escapeChar) {
+          hasPreEscape = true;
+        } else {
+          result.append(curChar);
+        }
+      }
+    }
+    if (hasPreEscape ) {
+      throw new IllegalArgumentException("Illegal escaped string " + str + 
+          ", not expecting " + escapeChar + " in the end." );
+    }
+    return result.toString();
+  }
+  
+  /**
+   * Return a message for logging.
+   * @param prefix prefix keyword for the message
+   * @param msg content of the message
+   * @return a message for logging
+   */
+  private static String toStartupShutdownString(String prefix, String [] msg) {
+    StringBuffer b = new StringBuffer(prefix);
+    b.append("\n/************************************************************");
+    for(String s : msg)
+      b.append("\n" + prefix + s);
+    b.append("\n************************************************************/");
+    return b.toString();
+  }
+
+  /**
+   * Print a log message for starting up and shutting down
+   * @param clazz the class of the server
+   * @param args arguments
+   * @param LOG the target log object
+   */
+  public static void startupShutdownMessage(Class<?> clazz, String[] args,
+                                     final org.apache.commons.logging.Log LOG) {
+    final String hostname = NetUtils.getHostname();
+    final String classname = clazz.getSimpleName();
+    LOG.info(
+        toStartupShutdownString("STARTUP_MSG: ", new String[] {
+            "Starting " + classname,
+            "  host = " + hostname,
+            "  args = " + Arrays.asList(args),
+            "  version = " + VersionInfo.getVersion(),
+            "  classpath = " + System.getProperty("java.class.path"),
+            "  build = " + VersionInfo.getUrl() + " -r "
+                         + VersionInfo.getRevision()  
+                         + "; compiled by '" + VersionInfo.getUser()
+                         + "' on " + VersionInfo.getDate()}
+        )
+      );
+
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      public void run() {
+        LOG.info(toStartupShutdownString("SHUTDOWN_MSG: ", new String[]{
+          "Shutting down " + classname + " at " + hostname}));
+      }
+    });
+  }
+
+  /**
+   * The traditional binary prefixes, kilo, mega, ..., exa,
+   * which can be represented by a 64-bit integer.
+   * TraditionalBinaryPrefix symbol are case insensitive. 
+   */
+  public static enum TraditionalBinaryPrefix {
+    KILO(1024),
+    MEGA(KILO.value << 10),
+    GIGA(MEGA.value << 10),
+    TERA(GIGA.value << 10),
+    PETA(TERA.value << 10),
+    EXA(PETA.value << 10);
+
+    public final long value;
+    public final char symbol;
+
+    TraditionalBinaryPrefix(long value) {
+      this.value = value;
+      this.symbol = toString().charAt(0);
+    }
+
+    /**
+     * @return The TraditionalBinaryPrefix object corresponding to the symbol.
+     */
+    public static TraditionalBinaryPrefix valueOf(char symbol) {
+      symbol = Character.toUpperCase(symbol);
+      for(TraditionalBinaryPrefix prefix : TraditionalBinaryPrefix.values()) {
+        if (symbol == prefix.symbol) {
+          return prefix;
+        }
+      }
+      throw new IllegalArgumentException("Unknown symbol '" + symbol + "'");
+    }
+
+    /**
+     * Convert a string to long.
+     * The input string is first be trimmed
+     * and then it is parsed with traditional binary prefix.
+     *
+     * For example,
+     * "-1230k" will be converted to -1230 * 1024 = -1259520;
+     * "891g" will be converted to 891 * 1024^3 = 956703965184;
+     *
+     * @param s input string
+     * @return a long value represented by the input string.
+     */
+    public static long string2long(String s) {
+      s = s.trim();
+      final int lastpos = s.length() - 1;
+      final char lastchar = s.charAt(lastpos);
+      if (Character.isDigit(lastchar))
+        return Long.parseLong(s);
+      else {
+        long prefix = TraditionalBinaryPrefix.valueOf(lastchar).value;
+        long num = Long.parseLong(s.substring(0, lastpos));
+        if (num > (Long.MAX_VALUE/prefix) || num < (Long.MIN_VALUE/prefix)) {
+          throw new IllegalArgumentException(s + " does not fit in a Long");
+        }
+        return num * prefix;
+      }
+    }
+  }
+  
+    /**
+     * Escapes HTML Special characters present in the string.
+     * @param string
+     * @return HTML Escaped String representation
+     */
+    public static String escapeHTML(String string) {
+      if(string == null) {
+        return null;
+      }
+      StringBuffer sb = new StringBuffer();
+      boolean lastCharacterWasSpace = false;
+      char[] chars = string.toCharArray();
+      for(char c : chars) {
+        if(c == ' ') {
+          if(lastCharacterWasSpace){
+            lastCharacterWasSpace = false;
+            sb.append("&nbsp;");
+          }else {
+            lastCharacterWasSpace=true;
+            sb.append(" ");
+          }
+        }else {
+          lastCharacterWasSpace = false;
+          switch(c) {
+          case '<': sb.append("&lt;"); break;
+          case '>': sb.append("&gt;"); break;
+          case '&': sb.append("&amp;"); break;
+          case '"': sb.append("&quot;"); break;
+          default : sb.append(c);break;
+          }
+        }
+      }
+      
+      return sb.toString();
+    }
+
+  /**
+   * Return an abbreviated English-language desc of the byte length
+   */
+  public static String byteDesc(long len) {
+    double val = 0.0;
+    String ending = "";
+    if (len < 1024 * 1024) {
+      val = (1.0 * len) / 1024;
+      ending = " KB";
+    } else if (len < 1024 * 1024 * 1024) {
+      val = (1.0 * len) / (1024 * 1024);
+      ending = " MB";
+    } else if (len < 1024L * 1024 * 1024 * 1024) {
+      val = (1.0 * len) / (1024 * 1024 * 1024);
+      ending = " GB";
+    } else if (len < 1024L * 1024 * 1024 * 1024 * 1024) {
+      val = (1.0 * len) / (1024L * 1024 * 1024 * 1024);
+      ending = " TB";
+    } else {
+      val = (1.0 * len) / (1024L * 1024 * 1024 * 1024 * 1024);
+      ending = " PB";
+    }
+    return limitDecimalTo2(val) + ending;
+  }
+
+  public static synchronized String limitDecimalTo2(double d) {
+    return decimalFormat.format(d);
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/Tool.java b/src/java/org/apache/hadoop/util/Tool.java
new file mode 100644
index 00000000000..8cc9f47c20d
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/Tool.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import org.apache.hadoop.conf.Configurable;
+
+/**
+ * A tool interface that supports handling of generic command-line options.
+ * 
+ * <p><code>Tool</code>, is the standard for any Map-Reduce tool/application. 
+ * The tool/application should delegate the handling of 
+ * <a href="{@docRoot}/org/apache/hadoop/util/GenericOptionsParser.html#GenericOptions">
+ * standard command-line options</a> to {@link ToolRunner#run(Tool, String[])} 
+ * and only handle its custom arguments.</p>
+ * 
+ * <p>Here is how a typical <code>Tool</code> is implemented:</p>
+ * <p><blockquote><pre>
+ *     public class MyApp extends Configured implements Tool {
+ *     
+ *       public int run(String[] args) throws Exception {
+ *         // <code>Configuration</code> processed by <code>ToolRunner</code>
+ *         Configuration conf = getConf();
+ *         
+ *         // Create a JobConf using the processed <code>conf</code>
+ *         JobConf job = new JobConf(conf, MyApp.class);
+ *         
+ *         // Process custom command-line options
+ *         Path in = new Path(args[1]);
+ *         Path out = new Path(args[2]);
+ *         
+ *         // Specify various job-specific parameters     
+ *         job.setJobName("my-app");
+ *         job.setInputPath(in);
+ *         job.setOutputPath(out);
+ *         job.setMapperClass(MyApp.MyMapper.class);
+ *         job.setReducerClass(MyApp.MyReducer.class);
+ *
+ *         // Submit the job, then poll for progress until the job is complete
+ *         JobClient.runJob(job);
+ *       }
+ *       
+ *       public static void main(String[] args) throws Exception {
+ *         // Let <code>ToolRunner</code> handle generic command-line options 
+ *         int res = ToolRunner.run(new Configuration(), new Sort(), args);
+ *         
+ *         System.exit(res);
+ *       }
+ *     }
+ * </pre></blockquote></p>
+ * 
+ * @see GenericOptionsParser
+ * @see ToolRunner
+ */
+public interface Tool extends Configurable {
+  /**
+   * Execute the command with the given arguments.
+   * 
+   * @param args command specific arguments.
+   * @return exit code.
+   * @throws Exception
+   */
+  int run(String [] args) throws Exception;
+}
diff --git a/src/java/org/apache/hadoop/util/ToolRunner.java b/src/java/org/apache/hadoop/util/ToolRunner.java
new file mode 100644
index 00000000000..27e08a5fb7d
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/ToolRunner.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.io.PrintStream;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * A utility to help run {@link Tool}s.
+ * 
+ * <p><code>ToolRunner</code> can be used to run classes implementing 
+ * <code>Tool</code> interface. It works in conjunction with 
+ * {@link GenericOptionsParser} to parse the 
+ * <a href="{@docRoot}/org/apache/hadoop/util/GenericOptionsParser.html#GenericOptions">
+ * generic hadoop command line arguments</a> and modifies the 
+ * <code>Configuration</code> of the <code>Tool</code>. The 
+ * application-specific options are passed along without being modified.
+ * </p>
+ * 
+ * @see Tool
+ * @see GenericOptionsParser
+ */
+public class ToolRunner {
+ 
+  /**
+   * Runs the given <code>Tool</code> by {@link Tool#run(String[])}, after 
+   * parsing with the given generic arguments. Uses the given 
+   * <code>Configuration</code>, or builds one if null.
+   * 
+   * Sets the <code>Tool</code>'s configuration with the possibly modified 
+   * version of the <code>conf</code>.  
+   * 
+   * @param conf <code>Configuration</code> for the <code>Tool</code>.
+   * @param tool <code>Tool</code> to run.
+   * @param args command-line arguments to the tool.
+   * @return exit code of the {@link Tool#run(String[])} method.
+   */
+  public static int run(Configuration conf, Tool tool, String[] args) 
+    throws Exception{
+    if(conf == null) {
+      conf = new Configuration();
+    }
+    GenericOptionsParser parser = new GenericOptionsParser(conf, args);
+    //set the configuration back, so that Tool can configure itself
+    tool.setConf(conf);
+    
+    //get the args w/o generic hadoop args
+    String[] toolArgs = parser.getRemainingArgs();
+    return tool.run(toolArgs);
+  }
+  
+  /**
+   * Runs the <code>Tool</code> with its <code>Configuration</code>.
+   * 
+   * Equivalent to <code>run(tool.getConf(), tool, args)</code>.
+   * 
+   * @param tool <code>Tool</code> to run.
+   * @param args command-line arguments to the tool.
+   * @return exit code of the {@link Tool#run(String[])} method.
+   */
+  public static int run(Tool tool, String[] args) 
+    throws Exception{
+    return run(tool.getConf(), tool, args);
+  }
+  
+  /**
+   * Prints generic command-line argurments and usage information.
+   * 
+   *  @param out stream to write usage information to.
+   */
+  public static void printGenericCommandUsage(PrintStream out) {
+    GenericOptionsParser.printGenericCommandUsage(out);
+  }
+  
+}
diff --git a/src/java/org/apache/hadoop/util/UTF8ByteArrayUtils.java b/src/java/org/apache/hadoop/util/UTF8ByteArrayUtils.java
new file mode 100644
index 00000000000..498daa80974
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/UTF8ByteArrayUtils.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+public class UTF8ByteArrayUtils {
+  /**
+   * Find the first occurrence of the given byte b in a UTF-8 encoded string
+   * @param utf a byte array containing a UTF-8 encoded string
+   * @param start starting offset
+   * @param end ending position
+   * @param b the byte to find
+   * @return position that first byte occures otherwise -1
+   */
+  public static int findByte(byte [] utf, int start, int end, byte b) {
+    for(int i=start; i<end; i++) {
+      if (utf[i]==b) {
+        return i;
+      }
+    }
+    return -1;      
+  }
+
+  /**
+   * Find the first occurrence of the given bytes b in a UTF-8 encoded string
+   * @param utf a byte array containing a UTF-8 encoded string
+   * @param start starting offset
+   * @param end ending position
+   * @param b the bytes to find
+   * @return position that first byte occures otherwise -1
+   */
+  public static int findBytes(byte [] utf, int start, int end, byte[] b) {
+    int matchEnd = end - b.length;
+    for(int i=start; i<=matchEnd; i++) {
+      boolean matched = true;
+      for(int j=0; j<b.length; j++) {
+        if (utf[i+j] != b[j]) {
+          matched = false;
+          break;
+        }
+      }
+      if (matched) {
+        return i;
+      }
+    }
+    return -1;      
+  }
+    
+  /**
+   * Find the nth occurrence of the given byte b in a UTF-8 encoded string
+   * @param utf a byte array containing a UTF-8 encoded string
+   * @param start starting offset
+   * @param length the length of byte array
+   * @param b the byte to find
+   * @param n the desired occurrence of the given byte
+   * @return position that nth occurrence of the given byte if exists; otherwise -1
+   */
+  public static int findNthByte(byte [] utf, int start, int length, byte b, int n) {
+    int pos = -1;
+    int nextStart = start;
+    for (int i = 0; i < n; i++) {
+      pos = findByte(utf, nextStart, length, b);
+      if (pos < 0) {
+        return pos;
+      }
+      nextStart = pos + 1;
+    }
+    return pos;      
+  }
+  
+  /**
+   * Find the nth occurrence of the given byte b in a UTF-8 encoded string
+   * @param utf a byte array containing a UTF-8 encoded string
+   * @param b the byte to find
+   * @param n the desired occurrence of the given byte
+   * @return position that nth occurrence of the given byte if exists; otherwise -1
+   */
+  public static int findNthByte(byte [] utf, byte b, int n) {
+    return findNthByte(utf, 0, utf.length, b, n);      
+  }
+
+}
+
diff --git a/src/java/org/apache/hadoop/util/VersionInfo.java b/src/java/org/apache/hadoop/util/VersionInfo.java
new file mode 100644
index 00000000000..59cce73da05
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/VersionInfo.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import org.apache.hadoop.HadoopVersionAnnotation;
+
+/**
+ * This class finds the package info for Hadoop and the HadoopVersionAnnotation
+ * information.
+ */
+public class VersionInfo {
+  private static Package myPackage;
+  private static HadoopVersionAnnotation version;
+  
+  static {
+    myPackage = HadoopVersionAnnotation.class.getPackage();
+    version = myPackage.getAnnotation(HadoopVersionAnnotation.class);
+  }
+
+  /**
+   * Get the meta-data for the Hadoop package.
+   * @return
+   */
+  static Package getPackage() {
+    return myPackage;
+  }
+  
+  /**
+   * Get the Hadoop version.
+   * @return the Hadoop version string, eg. "0.6.3-dev"
+   */
+  public static String getVersion() {
+    return version != null ? version.version() : "Unknown";
+  }
+  
+  /**
+   * Get the subversion revision number for the root directory
+   * @return the revision number, eg. "451451"
+   */
+  public static String getRevision() {
+    return version != null ? version.revision() : "Unknown";
+  }
+
+  /**
+   * Get the branch on which this originated.
+   * @return The branch name, e.g. "trunk" or "branches/branch-0.20"
+   */
+  public static String getBranch() {
+    return version != null ? version.branch() : "Unknown";
+  }
+
+  /**
+   * The date that Hadoop was compiled.
+   * @return the compilation date in unix date format
+   */
+  public static String getDate() {
+    return version != null ? version.date() : "Unknown";
+  }
+  
+  /**
+   * The user that compiled Hadoop.
+   * @return the username of the user
+   */
+  public static String getUser() {
+    return version != null ? version.user() : "Unknown";
+  }
+  
+  /**
+   * Get the subversion URL for the root Hadoop directory.
+   */
+  public static String getUrl() {
+    return version != null ? version.url() : "Unknown";
+  }
+
+  /**
+   * Get the checksum of the source files from which Hadoop was
+   * built.
+   **/
+  public static String getSrcChecksum() {
+    return version != null ? version.srcChecksum() : "Unknown";
+  }
+
+  /**
+   * Returns the buildVersion which includes version, 
+   * revision, user and date. 
+   */
+  public static String getBuildVersion(){
+    return VersionInfo.getVersion() + 
+    " from " + VersionInfo.getRevision() +
+    " by " + VersionInfo.getUser() + 
+    " source checksum " + VersionInfo.getSrcChecksum();
+  }
+  
+  public static void main(String[] args) {
+    System.out.println("Hadoop " + getVersion());
+    System.out.println("Subversion " + getUrl() + " -r " + getRevision());
+    System.out.println("Compiled by " + getUser() + " on " + getDate());
+    System.out.println("From source with checksum " + getSrcChecksum());
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/XMLUtils.java b/src/java/org/apache/hadoop/util/XMLUtils.java
new file mode 100644
index 00000000000..71c9644ada7
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/XMLUtils.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import javax.xml.transform.*;
+import javax.xml.transform.stream.*;
+import java.io.*;
+
+/**
+ * General xml utilities.
+ *   
+ */
+public class XMLUtils {
+  /**
+   * Transform input xml given a stylesheet.
+   * 
+   * @param styleSheet the style-sheet
+   * @param xml input xml data
+   * @param out output
+   * @throws TransformerConfigurationException
+   * @throws TransformerException
+   */
+  public static void transform(
+                               InputStream styleSheet, InputStream xml, Writer out
+                               ) 
+    throws TransformerConfigurationException, TransformerException {
+    // Instantiate a TransformerFactory
+    TransformerFactory tFactory = TransformerFactory.newInstance();
+
+    // Use the TransformerFactory to process the  
+    // stylesheet and generate a Transformer
+    Transformer transformer = tFactory.newTransformer(
+                                                      new StreamSource(styleSheet)
+                                                      );
+
+    // Use the Transformer to transform an XML Source 
+    // and send the output to a Result object.
+    transformer.transform(new StreamSource(xml), new StreamResult(out));
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/bloom/BloomFilter.java b/src/java/org/apache/hadoop/util/bloom/BloomFilter.java
new file mode 100644
index 00000000000..e7ad2b25756
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/bloom/BloomFilter.java
@@ -0,0 +1,234 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util.bloom;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import java.util.BitSet;
+
+/**
+ * Implements a <i>Bloom filter</i>, as defined by Bloom in 1970.
+ * <p>
+ * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by 
+ * the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ * offers for the transmission of set membership information between networked hosts.  A sender encodes 
+ * the information into a bit vector, the Bloom filter, that is more compact than a conventional 
+ * representation. Computation and space costs for construction are linear in the number of elements.  
+ * The receiver uses the filter to test whether various elements are members of the set. Though the 
+ * filter will occasionally return a false positive, it will never return a false negative. When creating 
+ * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. 
+ * 
+ * <p>
+ * Originally created by
+ * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ * 
+ * @see Filter The general behavior of a filter
+ * 
+ * @see <a href="http://portal.acm.org/citation.cfm?id=362692&dl=ACM&coll=portal">Space/Time Trade-Offs in Hash Coding with Allowable Errors</a>
+ */
+public class BloomFilter extends Filter {
+  private static final byte[] bitvalues = new byte[] {
+    (byte)0x01,
+    (byte)0x02,
+    (byte)0x04,
+    (byte)0x08,
+    (byte)0x10,
+    (byte)0x20,
+    (byte)0x40,
+    (byte)0x80
+  };
+  
+  /** The bit vector. */
+  BitSet bits;
+
+  /** Default constructor - use with readFields */
+  public BloomFilter() {
+    super();
+  }
+  
+  /**
+   * Constructor
+   * @param vectorSize The vector size of <i>this</i> filter.
+   * @param nbHash The number of hash function to consider.
+   * @param hashType type of the hashing function (see
+   * {@link org.apache.hadoop.util.hash.Hash}).
+   */
+  public BloomFilter(int vectorSize, int nbHash, int hashType) {
+    super(vectorSize, nbHash, hashType);
+
+    bits = new BitSet(this.vectorSize);
+  }
+
+  @Override
+  public void add(Key key) {
+    if(key == null) {
+      throw new NullPointerException("key cannot be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      bits.set(h[i]);
+    }
+  }
+
+  @Override
+  public void and(Filter filter) {
+    if(filter == null
+        || !(filter instanceof BloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be and-ed");
+    }
+
+    this.bits.and(((BloomFilter) filter).bits);
+  }
+
+  @Override
+  public boolean membershipTest(Key key) {
+    if(key == null) {
+      throw new NullPointerException("key cannot be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+    for(int i = 0; i < nbHash; i++) {
+      if(!bits.get(h[i])) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override
+  public void not() {
+    bits.flip(0, vectorSize - 1);
+  }
+
+  @Override
+  public void or(Filter filter) {
+    if(filter == null
+        || !(filter instanceof BloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be or-ed");
+    }
+    bits.or(((BloomFilter) filter).bits);
+  }
+
+  @Override
+  public void xor(Filter filter) {
+    if(filter == null
+        || !(filter instanceof BloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be xor-ed");
+    }
+    bits.xor(((BloomFilter) filter).bits);
+  }
+
+  @Override
+  public String toString() {
+    return bits.toString();
+  }
+
+  /**
+   * @return size of the the bloomfilter
+   */
+  public int getVectorSize() {
+    return this.vectorSize;
+  }
+
+  // Writable
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    byte[] bytes = new byte[getNBytes()];
+    for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
+      if (bitIndex == 8) {
+        bitIndex = 0;
+        byteIndex++;
+      }
+      if (bitIndex == 0) {
+        bytes[byteIndex] = 0;
+      }
+      if (bits.get(i)) {
+        bytes[byteIndex] |= bitvalues[bitIndex];
+      }
+    }
+    out.write(bytes);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    bits = new BitSet(this.vectorSize);
+    byte[] bytes = new byte[getNBytes()];
+    in.readFully(bytes);
+    for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
+      if (bitIndex == 8) {
+        bitIndex = 0;
+        byteIndex++;
+      }
+      if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) {
+        bits.set(i);
+      }
+    }
+  }
+  
+  /* @return number of bytes needed to hold bit vector */
+  private int getNBytes() {
+    return (vectorSize + 7) / 8;
+  }
+}//end class
diff --git a/src/java/org/apache/hadoop/util/bloom/CountingBloomFilter.java b/src/java/org/apache/hadoop/util/bloom/CountingBloomFilter.java
new file mode 100644
index 00000000000..527d2bff713
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/bloom/CountingBloomFilter.java
@@ -0,0 +1,305 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util.bloom;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * Implements a <i>counting Bloom filter</i>, as defined by Fan et al. in a ToN
+ * 2000 paper.
+ * <p>
+ * A counting Bloom filter is an improvement to standard a Bloom filter as it
+ * allows dynamic additions and deletions of set membership information.  This 
+ * is achieved through the use of a counting vector instead of a bit vector.
+ * <p>
+ * Originally created by
+ * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @see Filter The general behavior of a filter
+ * 
+ * @see <a href="http://portal.acm.org/citation.cfm?id=343571.343572">Summary cache: a scalable wide-area web cache sharing protocol</a>
+ */
+public final class CountingBloomFilter extends Filter {
+  /** Storage for the counting buckets */
+  private long[] buckets;
+
+  /** We are using 4bit buckets, so each bucket can count to 15 */
+  private final static long BUCKET_MAX_VALUE = 15;
+
+  /** Default constructor - use with readFields */
+  public CountingBloomFilter() {}
+  
+  /**
+   * Constructor
+   * @param vectorSize The vector size of <i>this</i> filter.
+   * @param nbHash The number of hash function to consider.
+   * @param hashType type of the hashing function (see
+   * {@link org.apache.hadoop.util.hash.Hash}).
+   */
+  public CountingBloomFilter(int vectorSize, int nbHash, int hashType) {
+    super(vectorSize, nbHash, hashType);
+    buckets = new long[buckets2words(vectorSize)];
+  }
+
+  /** returns the number of 64 bit words it would take to hold vectorSize buckets */
+  private static int buckets2words(int vectorSize) {
+   return ((vectorSize - 1) >>> 4) + 1;
+  }
+
+
+  @Override
+  public void add(Key key) {
+    if(key == null) {
+      throw new NullPointerException("key can not be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      
+      // only increment if the count in the bucket is less than BUCKET_MAX_VALUE
+      if(bucketValue < BUCKET_MAX_VALUE) {
+        // increment by 1
+        buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue + 1) << bucketShift);
+      }
+    }
+  }
+
+  /**
+   * Removes a specified key from <i>this</i> counting Bloom filter.
+   * <p>
+   * <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter.
+   * @param key The key to remove.
+   */
+  public void delete(Key key) {
+    if(key == null) {
+      throw new NullPointerException("Key may not be null");
+    }
+    if(!membershipTest(key)) {
+      throw new IllegalArgumentException("Key is not a member");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      
+      // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE
+      if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) {
+        // decrement by 1
+        buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift);
+      }
+    }
+  }
+
+  @Override
+  public void and(Filter filter) {
+    if(filter == null
+        || !(filter instanceof CountingBloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be and-ed");
+    }
+    CountingBloomFilter cbf = (CountingBloomFilter)filter;
+    
+    int sizeInWords = buckets2words(vectorSize);
+    for(int i = 0; i < sizeInWords; i++) {
+      this.buckets[i] &= cbf.buckets[i];
+    }
+  }
+
+  @Override
+  public boolean membershipTest(Key key) {
+    if(key == null) {
+      throw new NullPointerException("Key may not be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+
+      long bucketMask = 15L << bucketShift;
+
+      if((buckets[wordNum] & bucketMask) == 0) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  /**
+   * This method calculates an approximate count of the key, i.e. how many
+   * times the key was added to the filter. This allows the filter to be
+   * used as an approximate <code>key -&gt; count</code> map.
+   * <p>NOTE: due to the bucket size of this filter, inserting the same
+   * key more than 15 times will cause an overflow at all filter positions
+   * associated with this key, and it will significantly increase the error
+   * rate for this and other keys. For this reason the filter can only be
+   * used to store small count values <code>0 &lt;= N &lt;&lt; 15</code>.
+   * @param key key to be tested
+   * @return 0 if the key is not present. Otherwise, a positive value v will
+   * be returned such that <code>v == count</code> with probability equal to the
+   * error rate of this filter, and <code>v &gt; count</code> otherwise.
+   * Additionally, if the filter experienced an underflow as a result of
+   * {@link #delete(Key)} operation, the return value may be lower than the
+   * <code>count</code> with the probability of the false negative rate of such
+   * filter.
+   */
+  public int approximateCount(Key key) {
+    int res = Integer.MAX_VALUE;
+    int[] h = hash.hash(key);
+    hash.clear();
+    for (int i = 0; i < nbHash; i++) {
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      if (bucketValue < res) res = (int)bucketValue;
+    }
+    if (res != Integer.MAX_VALUE) {
+      return res;
+    } else {
+      return 0;
+    }
+  }
+
+  @Override
+  public void not() {
+    throw new UnsupportedOperationException("not() is undefined for "
+        + this.getClass().getName());
+  }
+
+  @Override
+  public void or(Filter filter) {
+    if(filter == null
+        || !(filter instanceof CountingBloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be or-ed");
+    }
+
+    CountingBloomFilter cbf = (CountingBloomFilter)filter;
+
+    int sizeInWords = buckets2words(vectorSize);
+    for(int i = 0; i < sizeInWords; i++) {
+      this.buckets[i] |= cbf.buckets[i];
+    }
+  }
+
+  @Override
+  public void xor(Filter filter) {
+    throw new UnsupportedOperationException("xor() is undefined for "
+        + this.getClass().getName());
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder res = new StringBuilder();
+
+    for(int i = 0; i < vectorSize; i++) {
+      if(i > 0) {
+        res.append(" ");
+      }
+      
+      int wordNum = i >> 4;          // div 16
+      int bucketShift = (i & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      
+      res.append(bucketValue);
+    }
+
+    return res.toString();
+  }
+
+  // Writable
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    int sizeInWords = buckets2words(vectorSize);
+    for(int i = 0; i < sizeInWords; i++) {
+      out.writeLong(buckets[i]);
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    int sizeInWords = buckets2words(vectorSize);
+    buckets = new long[sizeInWords];
+    for(int i = 0; i < sizeInWords; i++) {
+      buckets[i] = in.readLong();
+    }
+  }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/util/bloom/DynamicBloomFilter.java b/src/java/org/apache/hadoop/util/bloom/DynamicBloomFilter.java
new file mode 100644
index 00000000000..caabb4a05ba
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/bloom/DynamicBloomFilter.java
@@ -0,0 +1,293 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util.bloom;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * Implements a <i>dynamic Bloom filter</i>, as defined in the INFOCOM 2006 paper.
+ * <p>
+ * A dynamic Bloom filter (DBF) makes use of a <code>s * m</code> bit matrix but
+ * each of the <code>s</code> rows is a standard Bloom filter. The creation 
+ * process of a DBF is iterative. At the start, the DBF is a <code>1 * m</code>
+ * bit matrix, i.e., it is composed of a single standard Bloom filter.
+ * It assumes that <code>n<sub>r</sub></code> elements are recorded in the 
+ * initial bit vector, where <code>n<sub>r</sub> <= n</code> (<code>n</code> is
+ * the cardinality of the set <code>A</code> to record in the filter).  
+ * <p>
+ * As the size of <code>A</code> grows during the execution of the application,
+ * several keys must be inserted in the DBF.  When inserting a key into the DBF,
+ * one must first get an active Bloom filter in the matrix.  A Bloom filter is
+ * active when the number of recorded keys, <code>n<sub>r</sub></code>, is 
+ * strictly less than the current cardinality of <code>A</code>, <code>n</code>.
+ * If an active Bloom filter is found, the key is inserted and 
+ * <code>n<sub>r</sub></code> is incremented by one. On the other hand, if there
+ * is no active Bloom filter, a new one is created (i.e., a new row is added to
+ * the matrix) according to the current size of <code>A</code> and the element
+ * is added in this new Bloom filter and the <code>n<sub>r</sub></code> value of
+ * this new Bloom filter is set to one.  A given key is said to belong to the
+ * DBF if the <code>k</code> positions are set to one in one of the matrix rows.
+ * <p>
+ * Originally created by
+ * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @see Filter The general behavior of a filter
+ * @see BloomFilter A Bloom filter
+ * 
+ * @see <a href="http://www.cse.fau.edu/~jie/research/publications/Publication_files/infocom2006.pdf">Theory and Network Applications of Dynamic Bloom Filters</a>
+ */
+public class DynamicBloomFilter extends Filter {
+  /** 
+   * Threshold for the maximum number of key to record in a dynamic Bloom filter row.
+   */
+  private int nr;
+
+  /**
+   * The number of keys recorded in the current standard active Bloom filter.
+   */
+  private int currentNbRecord;
+
+  /**
+   * The matrix of Bloom filter.
+   */
+  private BloomFilter[] matrix;
+
+  /**
+   * Zero-args constructor for the serialization.
+   */
+  public DynamicBloomFilter() { }
+
+  /**
+   * Constructor.
+   * <p>
+   * Builds an empty Dynamic Bloom filter.
+   * @param vectorSize The number of bits in the vector.
+   * @param nbHash The number of hash function to consider.
+   * @param hashType type of the hashing function (see
+   * {@link org.apache.hadoop.util.hash.Hash}).
+   * @param nr The threshold for the maximum number of keys to record in a
+   * dynamic Bloom filter row.
+   */
+  public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) {
+    super(vectorSize, nbHash, hashType);
+
+    this.nr = nr;
+    this.currentNbRecord = 0;
+
+    matrix = new BloomFilter[1];
+    matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType);
+  }
+
+  @Override
+  public void add(Key key) {
+    if (key == null) {
+      throw new NullPointerException("Key can not be null");
+    }
+
+    BloomFilter bf = getActiveStandardBF();
+
+    if (bf == null) {
+      addRow();
+      bf = matrix[matrix.length - 1];
+      currentNbRecord = 0;
+    }
+
+    bf.add(key);
+
+    currentNbRecord++;
+  }
+
+  @Override
+  public void and(Filter filter) {
+    if (filter == null
+        || !(filter instanceof DynamicBloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be and-ed");
+    }
+
+    DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
+
+    if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
+      throw new IllegalArgumentException("filters cannot be and-ed");
+    }
+
+    for (int i = 0; i < matrix.length; i++) {
+      matrix[i].and(dbf.matrix[i]);
+    }
+  }
+
+  @Override
+  public boolean membershipTest(Key key) {
+    if (key == null) {
+      return true;
+    }
+
+    for (int i = 0; i < matrix.length; i++) {
+      if (matrix[i].membershipTest(key)) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  @Override
+  public void not() {
+    for (int i = 0; i < matrix.length; i++) {
+      matrix[i].not();
+    }
+  }
+
+  @Override
+  public void or(Filter filter) {
+    if (filter == null
+        || !(filter instanceof DynamicBloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be or-ed");
+    }
+
+    DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
+
+    if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
+      throw new IllegalArgumentException("filters cannot be or-ed");
+    }
+    for (int i = 0; i < matrix.length; i++) {
+      matrix[i].or(dbf.matrix[i]);
+    }
+  }
+
+  @Override
+  public void xor(Filter filter) {
+    if (filter == null
+        || !(filter instanceof DynamicBloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be xor-ed");
+    }
+    DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
+
+    if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
+      throw new IllegalArgumentException("filters cannot be xor-ed");
+    }
+
+    for(int i = 0; i<matrix.length; i++) {
+        matrix[i].xor(dbf.matrix[i]);
+    }
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder res = new StringBuilder();
+
+    for (int i = 0; i < matrix.length; i++) {
+      res.append(matrix[i]);
+      res.append(Character.LINE_SEPARATOR);
+    }
+    return res.toString();
+  }
+
+  // Writable
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    out.writeInt(nr);
+    out.writeInt(currentNbRecord);
+    out.writeInt(matrix.length);
+    for (int i = 0; i < matrix.length; i++) {
+      matrix[i].write(out);
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    nr = in.readInt();
+    currentNbRecord = in.readInt();
+    int len = in.readInt();
+    matrix = new BloomFilter[len];
+    for (int i = 0; i < matrix.length; i++) {
+      matrix[i] = new BloomFilter();
+      matrix[i].readFields(in);
+    }
+  }
+
+  /**
+   * Adds a new row to <i>this</i> dynamic Bloom filter.
+   */
+  private void addRow() {
+    BloomFilter[] tmp = new BloomFilter[matrix.length + 1];
+
+    for (int i = 0; i < matrix.length; i++) {
+      tmp[i] = matrix[i];
+    }
+
+    tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType);
+
+    matrix = tmp;
+  }
+
+  /**
+   * Returns the active standard Bloom filter in <i>this</i> dynamic Bloom filter.
+   * @return BloomFilter The active standard Bloom filter.
+   * 			 <code>Null</code> otherwise.
+   */
+  private BloomFilter getActiveStandardBF() {
+    if (currentNbRecord >= nr) {
+      return null;
+    }
+
+    return matrix[matrix.length - 1];
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/bloom/Filter.java b/src/java/org/apache/hadoop/util/bloom/Filter.java
new file mode 100644
index 00000000000..e95273b5913
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/bloom/Filter.java
@@ -0,0 +1,213 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819
+ * (http://www.one-lab.org)
+ * 
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util.bloom;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.hash.Hash;
+
+/**
+ * Defines the general behavior of a filter.
+ * <p>
+ * A filter is a data structure which aims at offering a lossy summary of a set <code>A</code>.  The
+ * key idea is to map entries of <code>A</code> (also called <i>keys</i>) into several positions 
+ * in a vector through the use of several hash functions.
+ * <p>
+ * Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension).
+ * <p>
+ * It must be extended in order to define the real behavior.
+ * 
+ * @see Key The general behavior of a key
+ * @see HashFunction A hash function
+ */
+public abstract class Filter implements Writable {
+  private static final int VERSION = -1; // negative to accommodate for old format 
+  /** The vector size of <i>this</i> filter. */
+  protected int vectorSize;
+
+  /** The hash function used to map a key to several positions in the vector. */
+  protected HashFunction hash;
+
+  /** The number of hash function to consider. */
+  protected int nbHash;
+  
+  /** Type of hashing function to use. */
+  protected int hashType;
+
+  protected Filter() {}
+  
+  /** 
+   * Constructor.
+   * @param vectorSize The vector size of <i>this</i> filter.
+   * @param nbHash The number of hash functions to consider.
+   * @param hashType type of the hashing function (see {@link Hash}).
+   */
+  protected Filter(int vectorSize, int nbHash, int hashType) {
+    this.vectorSize = vectorSize;
+    this.nbHash = nbHash;
+    this.hashType = hashType;
+    this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
+  }
+
+  /**
+   * Adds a key to <i>this</i> filter.
+   * @param key The key to add.
+   */
+  public abstract void add(Key key);
+
+  /**
+   * Determines wether a specified key belongs to <i>this</i> filter.
+   * @param key The key to test.
+   * @return boolean True if the specified key belongs to <i>this</i> filter.
+   * 		     False otherwise.
+   */
+  public abstract boolean membershipTest(Key key);
+
+  /**
+   * Peforms a logical AND between <i>this</i> filter and a specified filter.
+   * <p>
+   * <b>Invariant</b>: The result is assigned to <i>this</i> filter.
+   * @param filter The filter to AND with.
+   */
+  public abstract void and(Filter filter);
+
+  /**
+   * Peforms a logical OR between <i>this</i> filter and a specified filter.
+   * <p>
+   * <b>Invariant</b>: The result is assigned to <i>this</i> filter.
+   * @param filter The filter to OR with.
+   */
+  public abstract void or(Filter filter);
+
+  /**
+   * Peforms a logical XOR between <i>this</i> filter and a specified filter.
+   * <p>
+   * <b>Invariant</b>: The result is assigned to <i>this</i> filter.
+   * @param filter The filter to XOR with.
+   */
+  public abstract void xor(Filter filter);
+
+  /**
+   * Performs a logical NOT on <i>this</i> filter.
+   * <p>
+   * The result is assigned to <i>this</i> filter.
+   */
+  public abstract void not();
+
+  /**
+   * Adds a list of keys to <i>this</i> filter.
+   * @param keys The list of keys.
+   */
+  public void add(List<Key> keys){
+    if(keys == null) {
+      throw new IllegalArgumentException("ArrayList<Key> may not be null");
+    }
+
+    for(Key key: keys) {
+      add(key);
+    }
+  }//end add()
+
+  /**
+   * Adds a collection of keys to <i>this</i> filter.
+   * @param keys The collection of keys.
+   */
+  public void add(Collection<Key> keys){
+    if(keys == null) {
+      throw new IllegalArgumentException("Collection<Key> may not be null");
+    }
+    for(Key key: keys) {
+      add(key);
+    }
+  }//end add()
+
+  /**
+   * Adds an array of keys to <i>this</i> filter.
+   * @param keys The array of keys.
+   */
+  public void add(Key[] keys){
+    if(keys == null) {
+      throw new IllegalArgumentException("Key[] may not be null");
+    }
+    for(int i = 0; i < keys.length; i++) {
+      add(keys[i]);
+    }
+  }//end add()
+  
+  // Writable interface
+  
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(VERSION);
+    out.writeInt(this.nbHash);
+    out.writeByte(this.hashType);
+    out.writeInt(this.vectorSize);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    int ver = in.readInt();
+    if (ver > 0) { // old unversioned format
+      this.nbHash = ver;
+      this.hashType = Hash.JENKINS_HASH;
+    } else if (ver == VERSION) {
+      this.nbHash = in.readInt();
+      this.hashType = in.readByte();
+    } else {
+      throw new IOException("Unsupported version: " + ver);
+    }
+    this.vectorSize = in.readInt();
+    this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
+  }
+}//end class
diff --git a/src/java/org/apache/hadoop/util/bloom/HashFunction.java b/src/java/org/apache/hadoop/util/bloom/HashFunction.java
new file mode 100644
index 00000000000..535ce1c47b9
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/bloom/HashFunction.java
@@ -0,0 +1,119 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 
+ * (http://www.one-lab.org)
+ * 
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util.bloom;
+
+import org.apache.hadoop.util.hash.Hash;
+
+/**
+ * Implements a hash object that returns a certain number of hashed values.
+ * 
+ * @see Key The general behavior of a key being stored in a filter
+ * @see Filter The general behavior of a filter
+ */
+public final class HashFunction {
+  /** The number of hashed values. */
+  private int nbHash;
+
+  /** The maximum highest returned value. */
+  private int maxValue;
+
+  /** Hashing algorithm to use. */
+  private Hash hashFunction;
+  
+  /**
+   * Constructor.
+   * <p>
+   * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+   * @param maxValue The maximum highest returned value.
+   * @param nbHash The number of resulting hashed values.
+   * @param hashType type of the hashing function (see {@link Hash}).
+   */
+  public HashFunction(int maxValue, int nbHash, int hashType) {
+    if (maxValue <= 0) {
+      throw new IllegalArgumentException("maxValue must be > 0");
+    }
+    
+    if (nbHash <= 0) {
+      throw new IllegalArgumentException("nbHash must be > 0");
+    }
+
+    this.maxValue = maxValue;
+    this.nbHash = nbHash;
+    this.hashFunction = Hash.getInstance(hashType);
+    if (this.hashFunction == null)
+      throw new IllegalArgumentException("hashType must be known");
+  }
+
+  /** Clears <i>this</i> hash function. A NOOP */
+  public void clear() {
+  }
+
+  /**
+   * Hashes a specified key into several integers.
+   * @param k The specified key.
+   * @return The array of hashed values.
+   */
+  public int[] hash(Key k){
+      byte[] b = k.getBytes();
+      if (b == null) {
+        throw new NullPointerException("buffer reference is null");
+      }
+      if (b.length == 0) {
+        throw new IllegalArgumentException("key length must be > 0");
+      }
+      int[] result = new int[nbHash];
+      for (int i = 0, initval = 0; i < nbHash; i++) {
+	  initval = hashFunction.hash(b, initval);
+	  result[i] = Math.abs(initval % maxValue);
+      }
+      return result;
+  }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/util/bloom/Key.java b/src/java/org/apache/hadoop/util/bloom/Key.java
new file mode 100644
index 00000000000..69c7f174036
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/bloom/Key.java
@@ -0,0 +1,178 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util.bloom;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * The general behavior of a key that must be stored in a filter.
+ * 
+ * @see Filter The general behavior of a filter
+ */
+public class Key implements WritableComparable<Key> {
+  /** Byte value of key */
+  byte[] bytes;
+  
+  /**
+   * The weight associated to <i>this</i> key.
+   * <p>
+   * <b>Invariant</b>: if it is not specified, each instance of 
+   * <code>Key</code> will have a default weight of 1.0
+   */
+  double weight;
+
+  /** default constructor - use with readFields */
+  public Key() {}
+
+  /**
+   * Constructor.
+   * <p>
+   * Builds a key with a default weight.
+   * @param value The byte value of <i>this</i> key.
+   */
+  public Key(byte[] value) {
+    this(value, 1.0);
+  }
+
+  /**
+   * Constructor.
+   * <p>
+   * Builds a key with a specified weight.
+   * @param value The value of <i>this</i> key.
+   * @param weight The weight associated to <i>this</i> key.
+   */
+  public Key(byte[] value, double weight) {
+    set(value, weight);
+  }
+
+  /**
+   * @param value
+   * @param weight
+   */
+  public void set(byte[] value, double weight) {
+    if (value == null) {
+      throw new IllegalArgumentException("value can not be null");
+    }
+    this.bytes = value;
+    this.weight = weight;
+  }
+  
+  /** @return byte[] The value of <i>this</i> key. */
+  public byte[] getBytes() {
+    return this.bytes;
+  }
+
+  /** @return Returns the weight associated to <i>this</i> key. */
+  public double getWeight() {
+    return weight;
+  }
+
+  /**
+   * Increments the weight of <i>this</i> key with a specified value. 
+   * @param weight The increment.
+   */
+  public void incrementWeight(double weight) {
+    this.weight += weight;
+  }
+
+  /** Increments the weight of <i>this</i> key by one. */
+  public void incrementWeight() {
+    this.weight++;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof Key)) {
+      return false;
+    }
+    return this.compareTo((Key)o) == 0;
+  }
+  
+  @Override
+  public int hashCode() {
+    int result = 0;
+    for (int i = 0; i < bytes.length; i++) {
+      result ^= Byte.valueOf(bytes[i]).hashCode();
+    }
+    result ^= Double.valueOf(weight).hashCode();
+    return result;
+  }
+
+  // Writable
+
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(bytes.length);
+    out.write(bytes);
+    out.writeDouble(weight);
+  }
+  
+  public void readFields(DataInput in) throws IOException {
+    this.bytes = new byte[in.readInt()];
+    in.readFully(this.bytes);
+    weight = in.readDouble();
+  }
+  
+  // Comparable
+  
+  public int compareTo(Key other) {
+    int result = this.bytes.length - other.getBytes().length;
+    for (int i = 0; result == 0 && i < bytes.length; i++) {
+      result = this.bytes[i] - other.bytes[i];
+    }
+    
+    if (result == 0) {
+      result = Double.valueOf(this.weight - other.weight).intValue();
+    }
+    return result;
+  }
+}
\ No newline at end of file
diff --git a/src/java/org/apache/hadoop/util/bloom/RemoveScheme.java b/src/java/org/apache/hadoop/util/bloom/RemoveScheme.java
new file mode 100644
index 00000000000..462fc3a972e
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/bloom/RemoveScheme.java
@@ -0,0 +1,91 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819
+ * (http://www.one-lab.org)
+ * 
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util.bloom;
+
+/**
+ * Defines the different remove scheme for retouched Bloom filters.
+ * <p>
+ * Originally created by
+ * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ */
+public interface RemoveScheme {
+  /**
+   * Random selection.
+   * <p>
+   * The idea is to randomly select a bit to reset.
+   */
+  public final static short RANDOM = 0;
+
+  /**
+   * MinimumFN Selection.
+   * <p>
+   * The idea is to select the bit to reset that will generate the minimum
+   * number of false negative.
+   */
+  public final static short MINIMUM_FN = 1;
+
+  /**
+   * MaximumFP Selection.
+   * <p>
+   * The idea is to select the bit to reset that will remove the maximum number
+   * of false positive.
+   */
+  public final static short MAXIMUM_FP = 2;
+
+  /**
+   * Ratio Selection.
+   * <p>
+   * The idea is to select the bit to reset that will, at the same time, remove
+   * the maximum number of false positve while minimizing the amount of false
+   * negative generated.
+   */
+  public final static short RATIO = 3;
+}
diff --git a/src/java/org/apache/hadoop/util/bloom/RetouchedBloomFilter.java b/src/java/org/apache/hadoop/util/bloom/RetouchedBloomFilter.java
new file mode 100644
index 00000000000..c48fb340344
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/bloom/RetouchedBloomFilter.java
@@ -0,0 +1,450 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util.bloom;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * Implements a <i>retouched Bloom filter</i>, as defined in the CoNEXT 2006 paper.
+ * <p>
+ * It allows the removal of selected false positives at the cost of introducing
+ * random false negatives, and with the benefit of eliminating some random false
+ * positives at the same time.
+ * 
+ * <p>
+ * Originally created by
+ * <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ * 
+ * @see Filter The general behavior of a filter
+ * @see BloomFilter A Bloom filter
+ * @see RemoveScheme The different selective clearing algorithms
+ * 
+ * @see <a href="http://www-rp.lip6.fr/site_npa/site_rp/_publications/740-rbf_cameraready.pdf">Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives</a>
+ */
+public final class RetouchedBloomFilter extends BloomFilter
+implements RemoveScheme {
+  /**
+   * KeyList vector (or ElementList Vector, as defined in the paper) of false positives.
+   */
+  List<Key>[] fpVector;
+
+  /**
+   * KeyList vector of keys recorded in the filter.
+   */
+  List<Key>[] keyVector;
+
+  /**
+   * Ratio vector.
+   */
+  double[] ratio;
+  
+  private Random rand;
+
+  /** Default constructor - use with readFields */
+  public RetouchedBloomFilter() {}
+  
+  /**
+   * Constructor
+   * @param vectorSize The vector size of <i>this</i> filter.
+   * @param nbHash The number of hash function to consider.
+   * @param hashType type of the hashing function (see
+   * {@link org.apache.hadoop.util.hash.Hash}).
+   */
+  public RetouchedBloomFilter(int vectorSize, int nbHash, int hashType) {
+    super(vectorSize, nbHash, hashType);
+
+    this.rand = null;
+    createVector();
+  }
+
+  @Override
+  public void add(Key key) {
+    if (key == null) {
+      throw new NullPointerException("key can not be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for (int i = 0; i < nbHash; i++) {
+      bits.set(h[i]);
+      keyVector[h[i]].add(key);
+    }
+  }
+
+  /**
+   * Adds a false positive information to <i>this</i> retouched Bloom filter.
+   * <p>
+   * <b>Invariant</b>: if the false positive is <code>null</code>, nothing happens.
+   * @param key The false positive key to add.
+   */
+  public void addFalsePositive(Key key) {
+    if (key == null) {
+      throw new NullPointerException("key can not be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for (int i = 0; i < nbHash; i++) {
+      fpVector[h[i]].add(key);
+    }
+  }
+
+  /**
+   * Adds a collection of false positive information to <i>this</i> retouched Bloom filter.
+   * @param coll The collection of false positive.
+   */
+  public void addFalsePositive(Collection<Key> coll) {
+    if (coll == null) {
+      throw new NullPointerException("Collection<Key> can not be null");
+    }
+    
+    for (Key k : coll) {
+      addFalsePositive(k);
+    }
+  }
+
+  /**
+   * Adds a list of false positive information to <i>this</i> retouched Bloom filter.
+   * @param keys The list of false positive.
+   */
+  public void addFalsePositive(List<Key> keys) {
+    if (keys == null) {
+      throw new NullPointerException("ArrayList<Key> can not be null");
+    }
+
+    for (Key k : keys) {
+      addFalsePositive(k);
+    }
+  }
+
+  /**
+   * Adds an array of false positive information to <i>this</i> retouched Bloom filter.
+   * @param keys The array of false positive.
+   */
+  public void addFalsePositive(Key[] keys) {
+    if (keys == null) {
+      throw new NullPointerException("Key[] can not be null");
+    }
+
+    for (int i = 0; i < keys.length; i++) {
+      addFalsePositive(keys[i]);
+    }
+  }
+
+  /**
+   * Performs the selective clearing for a given key.
+   * @param k The false positive key to remove from <i>this</i> retouched Bloom filter.
+   * @param scheme The selective clearing scheme to apply.
+   */
+  public void selectiveClearing(Key k, short scheme) {
+    if (k == null) {
+      throw new NullPointerException("Key can not be null");
+    }
+
+    if (!membershipTest(k)) {
+      throw new IllegalArgumentException("Key is not a member");
+    }
+
+    int index = 0;
+    int[] h = hash.hash(k);
+
+    switch(scheme) {
+
+    case RANDOM:
+      index = randomRemove();
+      break;
+    
+    case MINIMUM_FN:
+      index = minimumFnRemove(h);
+      break;
+    
+    case MAXIMUM_FP:
+      index = maximumFpRemove(h);
+      break;
+    
+    case RATIO:
+      index = ratioRemove(h);
+      break;
+    
+    default:
+      throw new AssertionError("Undefined selective clearing scheme");
+
+    }
+
+    clearBit(index);
+  }
+
+  private int randomRemove() {
+    if (rand == null) {
+      rand = new Random();
+    }
+
+    return rand.nextInt(nbHash);
+  }
+
+  /**
+   * Chooses the bit position that minimizes the number of false negative generated.
+   * @param h The different bit positions.
+   * @return The position that minimizes the number of false negative generated.
+   */
+  private int minimumFnRemove(int[] h) {
+    int minIndex = Integer.MAX_VALUE;
+    double minValue = Double.MAX_VALUE;
+
+    for (int i = 0; i < nbHash; i++) {
+      double keyWeight = getWeight(keyVector[h[i]]);
+
+      if (keyWeight < minValue) {
+        minIndex = h[i];
+        minValue = keyWeight;
+      }
+
+    }
+
+    return minIndex;
+  }
+
+  /**
+   * Chooses the bit position that maximizes the number of false positive removed.
+   * @param h The different bit positions.
+   * @return The position that maximizes the number of false positive removed.
+   */
+  private int maximumFpRemove(int[] h) {
+    int maxIndex = Integer.MIN_VALUE;
+    double maxValue = Double.MIN_VALUE;
+
+    for (int i = 0; i < nbHash; i++) {
+      double fpWeight = getWeight(fpVector[h[i]]);
+
+      if (fpWeight > maxValue) {
+        maxValue = fpWeight;
+        maxIndex = h[i];
+      }
+    }
+
+    return maxIndex;
+  }
+
+  /**
+   * Chooses the bit position that minimizes the number of false negative generated while maximizing.
+   * the number of false positive removed.
+   * @param h The different bit positions.
+   * @return The position that minimizes the number of false negative generated while maximizing.
+   */
+  private int ratioRemove(int[] h) {
+    computeRatio();
+    int minIndex = Integer.MAX_VALUE;
+    double minValue = Double.MAX_VALUE;
+
+    for (int i = 0; i < nbHash; i++) {
+      if (ratio[h[i]] < minValue) {
+        minValue = ratio[h[i]];
+        minIndex = h[i];
+      }
+    }
+
+    return minIndex;
+  }
+
+  /**
+   * Clears a specified bit in the bit vector and keeps up-to-date the KeyList vectors.
+   * @param index The position of the bit to clear.
+   */
+  private void clearBit(int index) {
+    if (index < 0 || index >= vectorSize) {
+      throw new ArrayIndexOutOfBoundsException(index);
+    }
+
+    List<Key> kl = keyVector[index];
+    List<Key> fpl = fpVector[index];
+
+    // update key list
+    int listSize = kl.size();
+    for (int i = 0; i < listSize && !kl.isEmpty(); i++) {
+      removeKey(kl.get(0), keyVector);
+    }
+
+    kl.clear();
+    keyVector[index].clear();
+
+    //update false positive list
+    listSize = fpl.size();
+    for (int i = 0; i < listSize && !fpl.isEmpty(); i++) {
+      removeKey(fpl.get(0), fpVector);
+    }
+
+    fpl.clear();
+    fpVector[index].clear();
+
+    //update ratio
+    ratio[index] = 0.0;
+
+    //update bit vector
+    bits.clear(index);
+  }
+
+  /**
+   * Removes a given key from <i>this</i> filer.
+   * @param k The key to remove.
+   * @param vector The counting vector associated to the key.
+   */
+  private void removeKey(Key k, List<Key>[] vector) {
+    if (k == null) {
+      throw new NullPointerException("Key can not be null");
+    }
+    if (vector == null) {
+      throw new NullPointerException("ArrayList<Key>[] can not be null");
+    }
+
+    int[] h = hash.hash(k);
+    hash.clear();
+
+    for (int i = 0; i < nbHash; i++) {
+      vector[h[i]].remove(k);
+    }
+  }
+
+  /**
+   * Computes the ratio A/FP.
+   */
+  private void computeRatio() {
+    for (int i = 0; i < vectorSize; i++) {
+      double keyWeight = getWeight(keyVector[i]);
+      double fpWeight = getWeight(fpVector[i]);
+
+      if (keyWeight > 0 && fpWeight > 0) {
+        ratio[i] = keyWeight / fpWeight;
+      }
+    }
+  }
+
+  private double getWeight(List<Key> keyList) {
+    double weight = 0.0;
+    for (Key k : keyList) {
+      weight += k.getWeight();
+    }
+    return weight;
+  }
+  
+  /**
+   * Creates and initialises the various vectors.
+   */
+  @SuppressWarnings("unchecked")
+  private void createVector() {
+    fpVector = new List[vectorSize];
+    keyVector = new List[vectorSize];
+    ratio = new double[vectorSize];
+
+    for (int i = 0; i < vectorSize; i++) {
+      fpVector[i] = Collections.synchronizedList(new ArrayList<Key>());
+      keyVector[i] = Collections.synchronizedList(new ArrayList<Key>());
+      ratio[i] = 0.0;
+    }
+  }
+  
+  // Writable
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    for (int i = 0; i < fpVector.length; i++) {
+      List<Key> list = fpVector[i];
+      out.writeInt(list.size());
+      for (Key k : list) {
+        k.write(out);
+      }
+    }
+    for (int i = 0; i < keyVector.length; i++) {
+      List<Key> list = keyVector[i];
+      out.writeInt(list.size());
+      for (Key k : list) {
+        k.write(out);
+      }
+    }
+    for (int i = 0; i < ratio.length; i++) {
+      out.writeDouble(ratio[i]);
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    createVector();
+    for (int i = 0; i < fpVector.length; i++) {
+      List<Key> list = fpVector[i];
+      int size = in.readInt();
+      for (int j = 0; j < size; j++) {
+        Key k = new Key();
+        k.readFields(in);
+        list.add(k);
+      }
+    }
+    for (int i = 0; i < keyVector.length; i++) {
+      List<Key> list = keyVector[i];
+      int size = in.readInt();
+      for (int j = 0; j < size; j++) {
+        Key k = new Key();
+        k.readFields(in);
+        list.add(k);
+      }
+    }
+    for (int i = 0; i < ratio.length; i++) {
+      ratio[i] = in.readDouble();
+    }
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/hash/Hash.java b/src/java/org/apache/hadoop/util/hash/Hash.java
new file mode 100644
index 00000000000..6d3eb4ac4d4
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/hash/Hash.java
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util.hash;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * This class represents a common API for hashing functions.
+ */
+public abstract class Hash {
+  /** Constant to denote invalid hash type. */
+  public static final int INVALID_HASH = -1;
+  /** Constant to denote {@link JenkinsHash}. */
+  public static final int JENKINS_HASH = 0;
+  /** Constant to denote {@link MurmurHash}. */
+  public static final int MURMUR_HASH  = 1;
+  
+  /**
+   * This utility method converts String representation of hash function name
+   * to a symbolic constant. Currently two function types are supported,
+   * "jenkins" and "murmur".
+   * @param name hash function name
+   * @return one of the predefined constants
+   */
+  public static int parseHashType(String name) {
+    if ("jenkins".equalsIgnoreCase(name)) {
+      return JENKINS_HASH;
+    } else if ("murmur".equalsIgnoreCase(name)) {
+      return MURMUR_HASH;
+    } else {
+      return INVALID_HASH;
+    }
+  }
+  
+  /**
+   * This utility method converts the name of the configured
+   * hash type to a symbolic constant.
+   * @param conf configuration
+   * @return one of the predefined constants
+   */
+  public static int getHashType(Configuration conf) {
+    String name = conf.get("hadoop.util.hash.type", "murmur");
+    return parseHashType(name);
+  }
+  
+  /**
+   * Get a singleton instance of hash function of a given type.
+   * @param type predefined hash type
+   * @return hash function instance, or null if type is invalid
+   */
+  public static Hash getInstance(int type) {
+    switch(type) {
+    case JENKINS_HASH:
+      return JenkinsHash.getInstance();
+    case MURMUR_HASH:
+      return MurmurHash.getInstance();
+    default:
+      return null;
+    }
+  }
+  
+  /**
+   * Get a singleton instance of hash function of a type
+   * defined in the configuration.
+   * @param conf current configuration
+   * @return defined hash type, or null if type is invalid
+   */
+  public static Hash getInstance(Configuration conf) {
+    int type = getHashType(conf);
+    return getInstance(type);
+  }
+  
+  /**
+   * Calculate a hash using all bytes from the input argument, and
+   * a seed of -1.
+   * @param bytes input bytes
+   * @return hash value
+   */
+  public int hash(byte[] bytes) {
+    return hash(bytes, bytes.length, -1);
+  }
+  
+  /**
+   * Calculate a hash using all bytes from the input argument,
+   * and a provided seed value.
+   * @param bytes input bytes
+   * @param initval seed value
+   * @return hash value
+   */
+  public int hash(byte[] bytes, int initval) {
+    return hash(bytes, bytes.length, initval);
+  }
+  
+  /**
+   * Calculate a hash using bytes from 0 to <code>length</code>, and
+   * the provided seed value
+   * @param bytes input bytes
+   * @param length length of the valid bytes to consider
+   * @param initval seed value
+   * @return hash value
+   */
+  public abstract int hash(byte[] bytes, int length, int initval);
+}
diff --git a/src/java/org/apache/hadoop/util/hash/JenkinsHash.java b/src/java/org/apache/hadoop/util/hash/JenkinsHash.java
new file mode 100644
index 00000000000..89fd6cb02e2
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/hash/JenkinsHash.java
@@ -0,0 +1,258 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util.hash;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+
+/**
+ * Produces 32-bit hash for hash table lookup.
+ * 
+ * <pre>lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * You can use this free for any purpose.  It's in the public domain.
+ * It has no warranty.
+ * </pre>
+ * 
+ * @see <a href="http://burtleburtle.net/bob/c/lookup3.c">lookup3.c</a>
+ * @see <a href="http://www.ddj.com/184410284">Hash Functions (and how this
+ * function compares to others such as CRC, MD?, etc</a>
+ * @see <a href="http://burtleburtle.net/bob/hash/doobs.html">Has update on the
+ * Dr. Dobbs Article</a>
+ */
+public class JenkinsHash extends Hash {
+  private static long INT_MASK  = 0x00000000ffffffffL;
+  private static long BYTE_MASK = 0x00000000000000ffL;
+  
+  private static JenkinsHash _instance = new JenkinsHash();
+  
+  public static Hash getInstance() {
+    return _instance;
+  }
+
+  private static long rot(long val, int pos) {
+    return ((Integer.rotateLeft(
+        (int)(val & INT_MASK), pos)) & INT_MASK);
+  }
+
+  /**
+   * taken from  hashlittle() -- hash a variable-length key into a 32-bit value
+   * 
+   * @param key the key (the unaligned variable-length array of bytes)
+   * @param nbytes number of bytes to include in hash
+   * @param initval can be any integer value
+   * @return a 32-bit value.  Every bit of the key affects every bit of the
+   * return value.  Two keys differing by one or two bits will have totally
+   * different hash values.
+   * 
+   * <p>The best hash table sizes are powers of 2.  There is no need to do mod
+   * a prime (mod is sooo slow!).  If you need less than 32 bits, use a bitmask.
+   * For example, if you need only 10 bits, do
+   * <code>h = (h & hashmask(10));</code>
+   * In which case, the hash table should have hashsize(10) elements.
+   * 
+   * <p>If you are hashing n strings byte[][] k, do it like this:
+   * for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h);
+   * 
+   * <p>By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
+   * code any way you wish, private, educational, or commercial.  It's free.
+   * 
+   * <p>Use for hash table lookup, or anything where one collision in 2^^32 is
+   * acceptable.  Do NOT use for cryptographic purposes.
+  */
+  @SuppressWarnings("fallthrough")
+  public int hash(byte[] key, int nbytes, int initval) {
+    int length = nbytes;
+    long a, b, c;       // We use longs because we don't have unsigned ints
+    a = b = c = (0x00000000deadbeefL + length + initval) & INT_MASK;
+    int offset = 0;
+    for (; length > 12; offset += 12, length -= 12) {
+      a = (a + (key[offset + 0]    & BYTE_MASK)) & INT_MASK;
+      a = (a + (((key[offset + 1]  & BYTE_MASK) <<  8) & INT_MASK)) & INT_MASK;
+      a = (a + (((key[offset + 2]  & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK;
+      a = (a + (((key[offset + 3]  & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK;
+      b = (b + (key[offset + 4]    & BYTE_MASK)) & INT_MASK;
+      b = (b + (((key[offset + 5]  & BYTE_MASK) <<  8) & INT_MASK)) & INT_MASK;
+      b = (b + (((key[offset + 6]  & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK;
+      b = (b + (((key[offset + 7]  & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK;
+      c = (c + (key[offset + 8]    & BYTE_MASK)) & INT_MASK;
+      c = (c + (((key[offset + 9]  & BYTE_MASK) <<  8) & INT_MASK)) & INT_MASK;
+      c = (c + (((key[offset + 10] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK;
+      c = (c + (((key[offset + 11] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK;
+      
+      /*
+       * mix -- mix 3 32-bit values reversibly.
+       * This is reversible, so any information in (a,b,c) before mix() is
+       * still in (a,b,c) after mix().
+       * 
+       * If four pairs of (a,b,c) inputs are run through mix(), or through
+       * mix() in reverse, there are at least 32 bits of the output that
+       * are sometimes the same for one pair and different for another pair.
+       * 
+       * This was tested for:
+       * - pairs that differed by one bit, by two bits, in any combination
+       *   of top bits of (a,b,c), or in any combination of bottom bits of
+       *   (a,b,c).
+       * - "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
+       *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+       *    is commonly produced by subtraction) look like a single 1-bit
+       *    difference.
+       * - the base values were pseudorandom, all zero but one bit set, or
+       *   all zero plus a counter that starts at zero.
+       * 
+       * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
+       * satisfy this are
+       *     4  6  8 16 19  4
+       *     9 15  3 18 27 15
+       *    14  9  3  7 17  3
+       * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for 
+       * "differ" defined as + with a one-bit base and a two-bit delta.  I
+       * used http://burtleburtle.net/bob/hash/avalanche.html to choose
+       * the operations, constants, and arrangements of the variables.
+       * 
+       * This does not achieve avalanche.  There are input bits of (a,b,c)
+       * that fail to affect some output bits of (a,b,c), especially of a.
+       * The most thoroughly mixed value is c, but it doesn't really even
+       * achieve avalanche in c.
+       * 
+       * This allows some parallelism.  Read-after-writes are good at doubling
+       * the number of bits affected, so the goal of mixing pulls in the
+       * opposite direction as the goal of parallelism.  I did what I could.
+       * Rotates seem to cost as much as shifts on every machine I could lay
+       * my hands on, and rotates are much kinder to the top and bottom bits,
+       * so I used rotates.
+       *
+       * #define mix(a,b,c) \
+       * { \
+       *   a -= c;  a ^= rot(c, 4);  c += b; \
+       *   b -= a;  b ^= rot(a, 6);  a += c; \
+       *   c -= b;  c ^= rot(b, 8);  b += a; \
+       *   a -= c;  a ^= rot(c,16);  c += b; \
+       *   b -= a;  b ^= rot(a,19);  a += c; \
+       *   c -= b;  c ^= rot(b, 4);  b += a; \
+       * }
+       * 
+       * mix(a,b,c);
+       */
+      a = (a - c) & INT_MASK;  a ^= rot(c, 4);  c = (c + b) & INT_MASK;
+      b = (b - a) & INT_MASK;  b ^= rot(a, 6);  a = (a + c) & INT_MASK;
+      c = (c - b) & INT_MASK;  c ^= rot(b, 8);  b = (b + a) & INT_MASK;
+      a = (a - c) & INT_MASK;  a ^= rot(c,16);  c = (c + b) & INT_MASK;
+      b = (b - a) & INT_MASK;  b ^= rot(a,19);  a = (a + c) & INT_MASK;
+      c = (c - b) & INT_MASK;  c ^= rot(b, 4);  b = (b + a) & INT_MASK;
+    }
+
+    //-------------------------------- last block: affect all 32 bits of (c)
+    switch (length) {                   // all the case statements fall through
+    case 12:
+      c = (c + (((key[offset + 11] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK;
+    case 11:
+      c = (c + (((key[offset + 10] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK;
+    case 10:
+      c = (c + (((key[offset + 9]  & BYTE_MASK) <<  8) & INT_MASK)) & INT_MASK;
+    case  9:
+      c = (c + (key[offset + 8]    & BYTE_MASK)) & INT_MASK;
+    case  8:
+      b = (b + (((key[offset + 7]  & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK;
+    case  7:
+      b = (b + (((key[offset + 6]  & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK;
+    case  6:
+      b = (b + (((key[offset + 5]  & BYTE_MASK) <<  8) & INT_MASK)) & INT_MASK;
+    case  5:
+      b = (b + (key[offset + 4]    & BYTE_MASK)) & INT_MASK;
+    case  4:
+      a = (a + (((key[offset + 3]  & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK;
+    case  3:
+      a = (a + (((key[offset + 2]  & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK;
+    case  2:
+      a = (a + (((key[offset + 1]  & BYTE_MASK) <<  8) & INT_MASK)) & INT_MASK;
+    case  1:
+      a = (a + (key[offset + 0]    & BYTE_MASK)) & INT_MASK;
+      break;
+    case  0:
+      return (int)(c & INT_MASK);
+    }
+    /*
+     * final -- final mixing of 3 32-bit values (a,b,c) into c
+     * 
+     * Pairs of (a,b,c) values differing in only a few bits will usually
+     * produce values of c that look totally different.  This was tested for
+     * - pairs that differed by one bit, by two bits, in any combination
+     *   of top bits of (a,b,c), or in any combination of bottom bits of
+     *   (a,b,c).
+     * 
+     * - "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
+     *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+     *   is commonly produced by subtraction) look like a single 1-bit
+     *   difference.
+     * 
+     * - the base values were pseudorandom, all zero but one bit set, or
+     *   all zero plus a counter that starts at zero.
+     * 
+     * These constants passed:
+     *   14 11 25 16 4 14 24
+     *   12 14 25 16 4 14 24
+     * and these came close:
+     *    4  8 15 26 3 22 24
+     *   10  8 15 26 3 22 24
+     *   11  8 15 26 3 22 24
+     * 
+     * #define final(a,b,c) \
+     * { 
+     *   c ^= b; c -= rot(b,14); \
+     *   a ^= c; a -= rot(c,11); \
+     *   b ^= a; b -= rot(a,25); \
+     *   c ^= b; c -= rot(b,16); \
+     *   a ^= c; a -= rot(c,4);  \
+     *   b ^= a; b -= rot(a,14); \
+     *   c ^= b; c -= rot(b,24); \
+     * }
+     * 
+     */
+    c ^= b; c = (c - rot(b,14)) & INT_MASK;
+    a ^= c; a = (a - rot(c,11)) & INT_MASK;
+    b ^= a; b = (b - rot(a,25)) & INT_MASK;
+    c ^= b; c = (c - rot(b,16)) & INT_MASK;
+    a ^= c; a = (a - rot(c,4))  & INT_MASK;
+    b ^= a; b = (b - rot(a,14)) & INT_MASK;
+    c ^= b; c = (c - rot(b,24)) & INT_MASK;
+
+    return (int)(c & INT_MASK);
+  }
+  
+  /**
+   * Compute the hash of the specified file
+   * @param args name of file to compute hash of.
+   * @throws IOException
+   */
+  public static void main(String[] args) throws IOException {
+    if (args.length != 1) {
+      System.err.println("Usage: JenkinsHash filename");
+      System.exit(-1);
+    }
+    FileInputStream in = new FileInputStream(args[0]);
+    byte[] bytes = new byte[512];
+    int value = 0;
+    JenkinsHash hash = new JenkinsHash();
+    for (int length = in.read(bytes); length > 0 ; length = in.read(bytes)) {
+      value = hash.hash(bytes, length, value);
+    }
+    System.out.println(Math.abs(value));
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/hash/MurmurHash.java b/src/java/org/apache/hadoop/util/hash/MurmurHash.java
new file mode 100644
index 00000000000..09e311d0681
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/hash/MurmurHash.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util.hash;
+
+/**
+ * This is a very fast, non-cryptographic hash suitable for general hash-based
+ * lookup.  See http://murmurhash.googlepages.com/ for more details.
+ * 
+ * <p>The C version of MurmurHash 2.0 found at that site was ported
+ * to Java by Andrzej Bialecki (ab at getopt org).</p>
+ */
+public class MurmurHash extends Hash {
+  private static MurmurHash _instance = new MurmurHash();
+  
+  public static Hash getInstance() {
+    return _instance;
+  }
+  
+  public int hash(byte[] data, int length, int seed) {
+    int m = 0x5bd1e995;
+    int r = 24;
+
+    int h = seed ^ length;
+
+    int len_4 = length >> 2;
+
+    for (int i = 0; i < len_4; i++) {
+      int i_4 = i << 2;
+      int k = data[i_4 + 3];
+      k = k << 8;
+      k = k | (data[i_4 + 2] & 0xff);
+      k = k << 8;
+      k = k | (data[i_4 + 1] & 0xff);
+      k = k << 8;
+      k = k | (data[i_4 + 0] & 0xff);
+      k *= m;
+      k ^= k >>> r;
+      k *= m;
+      h *= m;
+      h ^= k;
+    }
+
+    // avoid calculating modulo
+    int len_m = len_4 << 2;
+    int left = length - len_m;
+
+    if (left != 0) {
+      if (left >= 3) {
+        h ^= (int) data[length - 3] << 16;
+      }
+      if (left >= 2) {
+        h ^= (int) data[length - 2] << 8;
+      }
+      if (left >= 1) {
+        h ^= (int) data[length - 1];
+      }
+
+      h *= m;
+    }
+
+    h ^= h >>> 13;
+    h *= m;
+    h ^= h >>> 15;
+
+    return h;
+  }
+}
diff --git a/src/java/org/apache/hadoop/util/package.html b/src/java/org/apache/hadoop/util/package.html
new file mode 100644
index 00000000000..e6512f1e437
--- /dev/null
+++ b/src/java/org/apache/hadoop/util/package.html
@@ -0,0 +1,23 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+Common utilities.
+</body>
+</html>
diff --git a/src/java/overview.html b/src/java/overview.html
new file mode 100644
index 00000000000..736da78aa1f
--- /dev/null
+++ b/src/java/overview.html
@@ -0,0 +1,292 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<head>
+   <title>Hadoop</title>
+</head>
+<body>
+
+Hadoop is a distributed computing platform.
+
+<p>Hadoop primarily consists of the <a 
+href="org/apache/hadoop/hdfs/package-summary.html">Hadoop Distributed FileSystem 
+(HDFS)</a> and an 
+implementation of the <a href="org/apache/hadoop/mapred/package-summary.html">
+Map-Reduce</a> programming paradigm.</p>
+
+
+<p>Hadoop is a software framework that lets one easily write and run applications 
+that process vast amounts of data. Here's what makes Hadoop especially useful:</p>
+<ul>
+  <li>
+    <b>Scalable</b>: Hadoop can reliably store and process petabytes.
+  </li>
+  <li>
+    <b>Economical</b>: It distributes the data and processing across clusters 
+    of commonly available computers. These clusters can number into the thousands 
+    of nodes.
+  </li>
+  <li>
+    <b>Efficient</b>: By distributing the data, Hadoop can process it in parallel 
+    on the nodes where the data is located. This makes it extremely rapid.
+  </li>
+  <li>
+    <b>Reliable</b>: Hadoop automatically maintains multiple copies of data and 
+    automatically redeploys computing tasks based on failures.
+  </li>
+</ul>  
+
+<h2>Requirements</h2>
+
+<h3>Platforms</h3>
+
+<ul>
+  <li>
+    Hadoop was been demonstrated on GNU/Linux clusters with 2000 nodes.
+  </li>
+  <li>
+    Win32 is supported as a <i>development</i> platform. Distributed operation 
+    has not been well tested on Win32, so this is not a <i>production</i> 
+    platform.
+  </li>  
+</ul>
+  
+<h3>Requisite Software</h3>
+
+<ol>
+  <li>
+    Java 1.6.x, preferably from 
+    <a href="http://java.sun.com/javase/downloads/">Sun</a>. 
+    Set <tt>JAVA_HOME</tt> to the root of your Java installation.
+  </li>
+  <li>
+    ssh must be installed and sshd must be running to use Hadoop's
+    scripts to manage remote Hadoop daemons.
+  </li>
+  <li>
+    rsync may be installed to use Hadoop's scripts to manage remote
+    Hadoop installations.
+  </li>
+</ol>
+
+<h4>Additional requirements for Windows</h4>
+
+<ol>
+  <li>
+    <a href="http://www.cygwin.com/">Cygwin</a> - Required for shell support in 
+    addition to the required software above.
+  </li>
+</ol>
+  
+<h3>Installing Required Software</h3>
+
+<p>If your platform does not have the required software listed above, you
+will have to install it.</p>
+
+<p>For example on Ubuntu Linux:</p>
+<p><blockquote><pre>
+$ sudo apt-get install ssh<br>
+$ sudo apt-get install rsync<br>
+</pre></blockquote></p>
+
+<p>On Windows, if you did not install the required software when you
+installed cygwin, start the cygwin installer and select the packages:</p>
+<ul>
+  <li>openssh - the "Net" category</li>
+  <li>rsync - the "Net" category</li>
+</ul>
+
+<h2>Getting Started</h2>
+
+<p>First, you need to get a copy of the Hadoop code.</p>
+
+<p>Edit the file <tt>conf/hadoop-env.sh</tt> to define at least
+<tt>JAVA_HOME</tt>.</p>
+
+<p>Try the following command:</p>
+<tt>bin/hadoop</tt>
+<p>This will display the documentation for the Hadoop command script.</p>
+
+<h2>Standalone operation</h2>
+
+<p>By default, Hadoop is configured to run things in a non-distributed
+mode, as a single Java process.  This is useful for debugging, and can
+be demonstrated as follows:</p>
+<tt>
+mkdir input<br>
+cp conf/*.xml input<br>
+bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'<br>
+cat output/*
+</tt>
+<p>This will display counts for each match of the <a
+href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">
+regular expression.</a></p>
+
+<p>Note that input is specified as a <em>directory</em> containing input
+files and that output is also specified as a directory where parts are
+written.</p>
+
+<h2>Distributed operation</h2>
+
+To configure Hadoop for distributed operation you must specify the
+following:
+
+<ol>
+
+<li>The NameNode (Distributed Filesystem master) host.  This is
+specified with the configuration property <tt><a
+ href="../core-default.html#fs.default.name">fs.default.name</a></tt>.
+</li>
+
+<li>The {@link org.apache.hadoop.mapred.JobTracker} (MapReduce master)
+host and port.  This is specified with the configuration property
+<tt><a
+href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>.
+</li>
+
+<li>A <em>slaves</em> file that lists the names of all the hosts in
+the cluster.  The default slaves file is <tt>conf/slaves</tt>.
+
+</ol>
+
+<h3>Pseudo-distributed configuration</h3>
+
+You can in fact run everything on a single host.  To run things this
+way, put the following in:
+<br/>
+<br/>
+conf/core-site.xml:
+<xmp><configuration>
+
+  <property>
+    <name>fs.default.name</name>
+    <value>hdfs://localhost/</value>
+  </property>
+
+</configuration></xmp>
+
+conf/hdfs-site.xml:
+<xmp><configuration>
+
+  <property>
+    <name>dfs.replication</name>
+    <value>1</value>
+  </property>
+
+</configuration></xmp>
+
+conf/mapred-site.xml:
+<xmp><configuration>
+
+  <property>
+    <name>mapred.job.tracker</name>
+    <value>localhost:9001</value>
+  </property>
+
+</configuration></xmp>
+
+<p>(We also set the HDFS replication level to 1 in order to
+reduce warnings when running on a single node.)</p>
+
+<p>Now check that the command <br><tt>ssh localhost</tt><br> does not
+require a password.  If it does, execute the following commands:</p>
+
+<p><tt>ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa<br>
+cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
+</tt></p>
+
+<h3>Bootstrapping</h3>
+
+<p>A new distributed filesystem must be formatted with the following
+command, run on the master node:</p>
+
+<p><tt>bin/hadoop namenode -format</tt></p>
+
+<p>The Hadoop daemons are started with the following command:</p>
+
+<p><tt>bin/start-all.sh</tt></p>
+
+<p>Daemon log output is written to the <tt>logs/</tt> directory.</p>
+
+<p>Input files are copied into the distributed filesystem as follows:</p>
+
+<p><tt>bin/hadoop fs -put input input</tt></p>
+
+<h3>Distributed execution</h3>
+
+<p>Things are run as before, but output must be copied locally to
+examine it:</p>
+
+<tt>
+bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'<br>
+bin/hadoop fs -get output output
+cat output/*
+</tt>
+
+<p>When you're done, stop the daemons with:</p>
+
+<p><tt>bin/stop-all.sh</tt></p>
+
+<h3>Fully-distributed operation</h3>
+
+<p>Fully distributed operation is just like the pseudo-distributed operation
+described above, except, specify:</p>
+
+<ol>
+
+<li>The hostname or IP address of your master server in the value
+for <tt><a
+href="../core-default.html#fs.default.name">fs.default.name</a></tt>,
+  as <tt><em>hdfs://master.example.com/</em></tt> in <tt>conf/core-site.xml</tt>.</li>
+
+<li>The host and port of the your master server in the value
+of <tt><a href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker</a></tt>
+as <tt><em>master.example.com</em>:<em>port</em></tt> in <tt>conf/mapred-site.xml</tt>.</li>
+
+<li>Directories for <tt><a
+href="../hdfs-default.html#dfs.name.dir">dfs.name.dir</a></tt> and
+<tt><a href="../hdfs-default.html#dfs.data.dir">dfs.data.dir</a> 
+in <tt>conf/hdfs-site.xml</tt>.
+</tt>These are local directories used to hold distributed filesystem
+data on the master node and slave nodes respectively.  Note
+that <tt>dfs.data.dir</tt> may contain a space- or comma-separated
+list of directory names, so that data may be stored on multiple local
+devices.</li>
+
+<li><tt><a href="../mapred-default.html#mapred.local.dir">mapred.local.dir</a></tt>
+  in <tt>conf/mapred-site.xml</tt>, the local directory where temporary 
+  MapReduce data is stored.  It also may be a list of directories.</li>
+
+<li><tt><a
+href="../mapred-default.html#mapred.map.tasks">mapred.map.tasks</a></tt>
+and <tt><a
+href="../mapred-default.html#mapred.reduce.tasks">mapred.reduce.tasks</a></tt> 
+in <tt>conf/mapred-site.xml</tt>.
+As a rule of thumb, use 10x the
+number of slave processors for <tt>mapred.map.tasks</tt>, and 2x the
+number of slave processors for <tt>mapred.reduce.tasks</tt>.</li>
+
+</ol>
+
+<p>Finally, list all slave hostnames or IP addresses in your
+<tt>conf/slaves</tt> file, one per line.  Then format your filesystem
+and start your cluster on your master node, as above.
+
+</body>
+</html>
+