From 5128a9a453d64bfe1ed978cf9ffed27985eeef36 Mon Sep 17 00:00:00 2001 From: Owen O'Malley Date: Tue, 19 May 2009 04:20:40 +0000 Subject: [PATCH] HADOOP-4687 Moving src directories on branch git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/HADOOP-4687/core@776174 13f79535-47bb-0310-9956-ffa450edef68 --- src/java/core-default.xml | 444 +++ .../hadoop/HadoopVersionAnnotation.java | 69 + .../org/apache/hadoop/conf/Configurable.java | 29 + .../org/apache/hadoop/conf/Configuration.java | 1326 +++++++ .../org/apache/hadoop/conf/Configured.java | 46 + src/java/org/apache/hadoop/conf/package.html | 23 + .../hadoop/filecache/DistributedCache.java | 879 +++++ .../org/apache/hadoop/fs/BlockLocation.java | 241 ++ .../hadoop/fs/BufferedFSInputStream.java | 96 + .../apache/hadoop/fs/ChecksumException.java | 35 + .../apache/hadoop/fs/ChecksumFileSystem.java | 547 +++ .../org/apache/hadoop/fs/ContentSummary.java | 164 + src/java/org/apache/hadoop/fs/DF.java | 193 + src/java/org/apache/hadoop/fs/DU.java | 198 + .../apache/hadoop/fs/FSDataInputStream.java | 62 + .../apache/hadoop/fs/FSDataOutputStream.java | 100 + src/java/org/apache/hadoop/fs/FSError.java | 29 + .../org/apache/hadoop/fs/FSInputChecker.java | 432 +++ .../org/apache/hadoop/fs/FSInputStream.java | 78 + .../org/apache/hadoop/fs/FSOutputSummer.java | 176 + .../org/apache/hadoop/fs/FileChecksum.java | 53 + src/java/org/apache/hadoop/fs/FileStatus.java | 252 ++ src/java/org/apache/hadoop/fs/FileSystem.java | 1648 +++++++++ src/java/org/apache/hadoop/fs/FileUtil.java | 794 ++++ .../apache/hadoop/fs/FilterFileSystem.java | 278 ++ src/java/org/apache/hadoop/fs/FsShell.java | 1925 ++++++++++ .../apache/hadoop/fs/FsShellPermissions.java | 315 ++ src/java/org/apache/hadoop/fs/FsStatus.java | 70 + .../org/apache/hadoop/fs/FsUrlConnection.java | 61 + .../apache/hadoop/fs/FsUrlStreamHandler.java | 47 + .../hadoop/fs/FsUrlStreamHandlerFactory.java | 78 + .../org/apache/hadoop/fs/GlobExpander.java | 166 + .../org/apache/hadoop/fs/HarFileSystem.java | 892 +++++ .../apache/hadoop/fs/LengthFileChecksum.java | 0 .../apache/hadoop/fs/LocalDirAllocator.java | 418 +++ .../org/apache/hadoop/fs/LocalFileSystem.java | 115 + .../hadoop/fs/MD5MD5CRC32FileChecksum.java | 113 + src/java/org/apache/hadoop/fs/Path.java | 298 ++ src/java/org/apache/hadoop/fs/PathFilter.java | 32 + .../apache/hadoop/fs/PositionedReadable.java | 47 + .../apache/hadoop/fs/RawLocalFileSystem.java | 496 +++ src/java/org/apache/hadoop/fs/Seekable.java | 41 + src/java/org/apache/hadoop/fs/Syncable.java | 30 + src/java/org/apache/hadoop/fs/Trash.java | 291 ++ .../apache/hadoop/fs/ftp/FTPException.java | 38 + .../apache/hadoop/fs/ftp/FTPFileSystem.java | 576 +++ .../apache/hadoop/fs/ftp/FTPInputStream.java | 126 + .../org/apache/hadoop/fs/kfs/IFSImpl.java | 60 + .../org/apache/hadoop/fs/kfs/KFSImpl.java | 151 + .../apache/hadoop/fs/kfs/KFSInputStream.java | 130 + .../apache/hadoop/fs/kfs/KFSOutputStream.java | 97 + .../hadoop/fs/kfs/KosmosFileSystem.java | 340 ++ .../org/apache/hadoop/fs/kfs/package.html | 98 + src/java/org/apache/hadoop/fs/package.html | 23 + .../fs/permission/AccessControlException.java | 61 + .../apache/hadoop/fs/permission/FsAction.java | 67 + .../hadoop/fs/permission/FsPermission.java | 232 ++ .../fs/permission/PermissionStatus.java | 118 + src/java/org/apache/hadoop/fs/s3/Block.java | 47 + .../apache/hadoop/fs/s3/FileSystemStore.java | 63 + src/java/org/apache/hadoop/fs/s3/INode.java | 117 + .../hadoop/fs/s3/Jets3tFileSystemStore.java | 390 ++ .../apache/hadoop/fs/s3/MigrationTool.java | 280 ++ .../apache/hadoop/fs/s3/S3Credentials.java | 99 + .../org/apache/hadoop/fs/s3/S3Exception.java | 34 + .../org/apache/hadoop/fs/s3/S3FileSystem.java | 361 ++ .../hadoop/fs/s3/S3FileSystemException.java | 31 + .../apache/hadoop/fs/s3/S3InputStream.java | 211 ++ .../apache/hadoop/fs/s3/S3OutputStream.java | 231 ++ .../fs/s3/VersionMismatchException.java | 32 + src/java/org/apache/hadoop/fs/s3/package.html | 55 + .../hadoop/fs/s3native/FileMetadata.java | 54 + .../s3native/Jets3tNativeFileSystemStore.java | 255 ++ .../fs/s3native/NativeFileSystemStore.java | 65 + .../fs/s3native/NativeS3FileSystem.java | 578 +++ .../hadoop/fs/s3native/PartialListing.java | 59 + .../apache/hadoop/fs/s3native/package.html | 32 + .../org/apache/hadoop/fs/shell/Command.java | 86 + .../apache/hadoop/fs/shell/CommandFormat.java | 75 + .../apache/hadoop/fs/shell/CommandUtils.java | 28 + .../org/apache/hadoop/fs/shell/Count.java | 77 + .../apache/hadoop/http/FilterContainer.java | 40 + .../apache/hadoop/http/FilterInitializer.java | 29 + .../org/apache/hadoop/http/HttpServer.java | 519 +++ .../apache/hadoop/io/AbstractMapWritable.java | 207 ++ src/java/org/apache/hadoop/io/ArrayFile.java | 94 + .../org/apache/hadoop/io/ArrayWritable.java | 103 + .../apache/hadoop/io/BinaryComparable.java | 76 + .../org/apache/hadoop/io/BloomMapFile.java | 259 ++ .../org/apache/hadoop/io/BooleanWritable.java | 111 + .../org/apache/hadoop/io/ByteWritable.java | 87 + .../org/apache/hadoop/io/BytesWritable.java | 216 ++ src/java/org/apache/hadoop/io/Closeable.java | 24 + .../apache/hadoop/io/CompressedWritable.java | 86 + .../org/apache/hadoop/io/DataInputBuffer.java | 91 + .../apache/hadoop/io/DataOutputBuffer.java | 108 + .../apache/hadoop/io/DefaultStringifier.java | 199 + .../org/apache/hadoop/io/DeprecatedUTF8.java | 60 + .../org/apache/hadoop/io/DoubleWritable.java | 95 + .../org/apache/hadoop/io/EnumSetWritable.java | 202 + .../org/apache/hadoop/io/FloatWritable.java | 87 + .../org/apache/hadoop/io/GenericWritable.java | 152 + src/java/org/apache/hadoop/io/IOUtils.java | 177 + .../org/apache/hadoop/io/InputBuffer.java | 89 + .../org/apache/hadoop/io/IntWritable.java | 86 + .../org/apache/hadoop/io/LongWritable.java | 97 + src/java/org/apache/hadoop/io/MD5Hash.java | 221 ++ src/java/org/apache/hadoop/io/MapFile.java | 713 ++++ .../org/apache/hadoop/io/MapWritable.java | 169 + .../apache/hadoop/io/MultipleIOException.java | 49 + .../org/apache/hadoop/io/NullWritable.java | 70 + .../org/apache/hadoop/io/ObjectWritable.java | 273 ++ .../org/apache/hadoop/io/OutputBuffer.java | 92 + .../org/apache/hadoop/io/RawComparator.java | 37 + .../org/apache/hadoop/io/SequenceFile.java | 3244 +++++++++++++++++ src/java/org/apache/hadoop/io/SetFile.java | 105 + .../apache/hadoop/io/SortedMapWritable.java | 204 ++ .../org/apache/hadoop/io/Stringifier.java | 54 + src/java/org/apache/hadoop/io/Text.java | 594 +++ .../apache/hadoop/io/TwoDArrayWritable.java | 91 + src/java/org/apache/hadoop/io/UTF8.java | 286 ++ .../org/apache/hadoop/io/VIntWritable.java | 73 + .../org/apache/hadoop/io/VLongWritable.java | 73 + .../hadoop/io/VersionMismatchException.java | 41 + .../apache/hadoop/io/VersionedWritable.java | 50 + src/java/org/apache/hadoop/io/Writable.java | 80 + .../apache/hadoop/io/WritableComparable.java | 55 + .../apache/hadoop/io/WritableComparator.java | 216 ++ .../apache/hadoop/io/WritableFactories.java | 63 + .../org/apache/hadoop/io/WritableFactory.java | 28 + .../org/apache/hadoop/io/WritableName.java | 79 + .../org/apache/hadoop/io/WritableUtils.java | 418 +++ .../apache/hadoop/io/compress/BZip2Codec.java | 301 ++ .../io/compress/BlockCompressorStream.java | 156 + .../io/compress/BlockDecompressorStream.java | 128 + .../apache/hadoop/io/compress/CodecPool.java | 154 + .../hadoop/io/compress/CompressionCodec.java | 110 + .../io/compress/CompressionCodecFactory.java | 230 ++ .../io/compress/CompressionInputStream.java | 63 + .../io/compress/CompressionOutputStream.java | 69 + .../apache/hadoop/io/compress/Compressor.java | 106 + .../hadoop/io/compress/CompressorStream.java | 109 + .../hadoop/io/compress/Decompressor.java | 97 + .../io/compress/DecompressorStream.java | 159 + .../hadoop/io/compress/DefaultCodec.java | 87 + .../apache/hadoop/io/compress/GzipCodec.java | 216 ++ .../io/compress/bzip2/BZip2Constants.java | 97 + .../compress/bzip2/BZip2DummyCompressor.java | 62 + .../bzip2/BZip2DummyDecompressor.java | 52 + .../io/compress/bzip2/CBZip2InputStream.java | 969 +++++ .../io/compress/bzip2/CBZip2OutputStream.java | 2081 +++++++++++ .../apache/hadoop/io/compress/bzip2/CRC.java | 125 + .../io/compress/zlib/BuiltInZlibDeflater.java | 49 + .../io/compress/zlib/BuiltInZlibInflater.java | 50 + .../io/compress/zlib/ZlibCompressor.java | 378 ++ .../io/compress/zlib/ZlibDecompressor.java | 287 ++ .../hadoop/io/compress/zlib/ZlibFactory.java | 110 + src/java/org/apache/hadoop/io/package.html | 24 + .../io/retry/RetryInvocationHandler.java | 88 + .../apache/hadoop/io/retry/RetryPolicies.java | 258 ++ .../apache/hadoop/io/retry/RetryPolicy.java | 43 + .../apache/hadoop/io/retry/RetryProxy.java | 68 + .../org/apache/hadoop/io/retry/package.html | 48 + .../hadoop/io/serializer/Deserializer.java | 59 + .../io/serializer/DeserializerComparator.java | 70 + .../io/serializer/JavaSerialization.java | 101 + .../JavaSerializationComparator.java | 46 + .../hadoop/io/serializer/Serialization.java | 44 + .../io/serializer/SerializationFactory.java | 89 + .../hadoop/io/serializer/Serializer.java | 52 + .../io/serializer/WritableSerialization.java | 111 + .../apache/hadoop/io/serializer/package.html | 37 + src/java/org/apache/hadoop/ipc/Client.java | 914 +++++ .../apache/hadoop/ipc/ConnectionHeader.java | 93 + src/java/org/apache/hadoop/ipc/RPC.java | 575 +++ .../apache/hadoop/ipc/RemoteException.java | 120 + src/java/org/apache/hadoop/ipc/Server.java | 1255 +++++++ src/java/org/apache/hadoop/ipc/Status.java | 32 + .../apache/hadoop/ipc/VersionedProtocol.java | 38 + .../hadoop/ipc/metrics/RpcActivityMBean.java | 80 + .../apache/hadoop/ipc/metrics/RpcMetrics.java | 104 + .../org/apache/hadoop/ipc/metrics/RpcMgt.java | 119 + .../hadoop/ipc/metrics/RpcMgtMBean.java | 105 + src/java/org/apache/hadoop/ipc/package.html | 23 + src/java/org/apache/hadoop/log/LogLevel.java | 151 + .../apache/hadoop/metrics/ContextFactory.java | 204 ++ .../apache/hadoop/metrics/MetricsContext.java | 118 + .../hadoop/metrics/MetricsException.java | 42 + .../apache/hadoop/metrics/MetricsRecord.java | 246 ++ .../apache/hadoop/metrics/MetricsServlet.java | 160 + .../apache/hadoop/metrics/MetricsUtil.java | 100 + .../org/apache/hadoop/metrics/Updater.java | 33 + .../hadoop/metrics/file/FileContext.java | 139 + .../apache/hadoop/metrics/file/package.html | 43 + .../metrics/ganglia/GangliaContext.java | 231 ++ .../hadoop/metrics/ganglia/package.html | 74 + .../hadoop/metrics/jvm/EventCounter.java | 94 + .../apache/hadoop/metrics/jvm/JvmMetrics.java | 191 + .../org/apache/hadoop/metrics/package.html | 159 + .../metrics/spi/AbstractMetricsContext.java | 475 +++ .../hadoop/metrics/spi/CompositeContext.java | 186 + .../hadoop/metrics/spi/MetricValue.java | 52 + .../hadoop/metrics/spi/MetricsRecordImpl.java | 275 ++ .../metrics/spi/NoEmitMetricsContext.java | 49 + .../hadoop/metrics/spi/NullContext.java | 58 + .../spi/NullContextWithUpdateThread.java | 69 + .../hadoop/metrics/spi/OutputRecord.java | 90 + .../org/apache/hadoop/metrics/spi/Util.java | 67 + .../apache/hadoop/metrics/spi/package.html | 36 + .../apache/hadoop/metrics/util/MBeanUtil.java | 87 + .../hadoop/metrics/util/MetricsBase.java | 47 + .../metrics/util/MetricsDynamicMBeanBase.java | 226 ++ .../hadoop/metrics/util/MetricsIntValue.java | 104 + .../hadoop/metrics/util/MetricsLongValue.java | 88 + .../hadoop/metrics/util/MetricsRegistry.java | 85 + .../metrics/util/MetricsTimeVaryingInt.java | 128 + .../metrics/util/MetricsTimeVaryingLong.java | 124 + .../metrics/util/MetricsTimeVaryingRate.java | 196 + .../hadoop/net/CachedDNSToSwitchMapping.java | 80 + src/java/org/apache/hadoop/net/DNS.java | 279 ++ .../apache/hadoop/net/DNSToSwitchMapping.java | 42 + src/java/org/apache/hadoop/net/NetUtils.java | 440 +++ .../apache/hadoop/net/NetworkTopology.java | 655 ++++ src/java/org/apache/hadoop/net/Node.java | 47 + src/java/org/apache/hadoop/net/NodeBase.java | 134 + .../apache/hadoop/net/ScriptBasedMapping.java | 159 + .../hadoop/net/SocketIOWithTimeout.java | 455 +++ .../apache/hadoop/net/SocketInputStream.java | 170 + .../apache/hadoop/net/SocketOutputStream.java | 219 ++ .../apache/hadoop/net/SocksSocketFactory.java | 161 + .../hadoop/net/StandardSocketFactory.java | 122 + src/java/org/apache/hadoop/net/package.html | 23 + .../hadoop/record/BinaryRecordInput.java | 136 + .../hadoop/record/BinaryRecordOutput.java | 120 + src/java/org/apache/hadoop/record/Buffer.java | 246 ++ .../apache/hadoop/record/CsvRecordInput.java | 200 + .../apache/hadoop/record/CsvRecordOutput.java | 140 + src/java/org/apache/hadoop/record/Index.java | 37 + src/java/org/apache/hadoop/record/Record.java | 91 + .../hadoop/record/RecordComparator.java | 47 + .../org/apache/hadoop/record/RecordInput.java | 120 + .../apache/hadoop/record/RecordOutput.java | 141 + src/java/org/apache/hadoop/record/Utils.java | 490 +++ .../apache/hadoop/record/XmlRecordInput.java | 243 ++ .../apache/hadoop/record/XmlRecordOutput.java | 248 ++ .../hadoop/record/compiler/CGenerator.java | 71 + .../hadoop/record/compiler/CodeBuffer.java | 96 + .../hadoop/record/compiler/CodeGenerator.java | 53 + .../apache/hadoop/record/compiler/Consts.java | 44 + .../hadoop/record/compiler/CppGenerator.java | 74 + .../hadoop/record/compiler/JBoolean.java | 92 + .../hadoop/record/compiler/JBuffer.java | 103 + .../apache/hadoop/record/compiler/JByte.java | 80 + .../hadoop/record/compiler/JCompType.java | 72 + .../hadoop/record/compiler/JDouble.java | 89 + .../apache/hadoop/record/compiler/JField.java | 44 + .../apache/hadoop/record/compiler/JFile.java | 70 + .../apache/hadoop/record/compiler/JFloat.java | 86 + .../apache/hadoop/record/compiler/JInt.java | 80 + .../apache/hadoop/record/compiler/JLong.java | 84 + .../apache/hadoop/record/compiler/JMap.java | 229 ++ .../hadoop/record/compiler/JRecord.java | 806 ++++ .../hadoop/record/compiler/JString.java | 83 + .../apache/hadoop/record/compiler/JType.java | 222 ++ .../hadoop/record/compiler/JVector.java | 197 + .../hadoop/record/compiler/JavaGenerator.java | 50 + .../hadoop/record/compiler/ant/RccTask.java | 136 + .../compiler/generated/ParseException.java | 210 ++ .../hadoop/record/compiler/generated/Rcc.java | 535 +++ .../compiler/generated/RccConstants.java | 88 + .../compiler/generated/RccTokenManager.java | 833 +++++ .../compiler/generated/SimpleCharStream.java | 439 +++ .../record/compiler/generated/Token.java | 99 + .../compiler/generated/TokenMgrError.java | 151 + .../record/compiler/generated/package.html | 29 + .../hadoop/record/compiler/generated/rcc.jj | 384 ++ .../hadoop/record/compiler/package.html | 31 + .../hadoop/record/meta/FieldTypeInfo.java | 98 + .../apache/hadoop/record/meta/MapTypeID.java | 82 + .../hadoop/record/meta/RecordTypeInfo.java | 151 + .../hadoop/record/meta/StructTypeID.java | 156 + .../org/apache/hadoop/record/meta/TypeID.java | 107 + .../org/apache/hadoop/record/meta/Utils.java | 96 + .../hadoop/record/meta/VectorTypeID.java | 65 + .../org/apache/hadoop/record/package.html | 800 ++++ .../security/AccessControlException.java | 56 + .../org/apache/hadoop/security/AccessKey.java | 110 + .../apache/hadoop/security/AccessToken.java | 89 + .../hadoop/security/AccessTokenHandler.java | 289 ++ .../hadoop/security/ExportedAccessKeys.java | 138 + .../org/apache/hadoop/security/Group.java | 70 + .../security/InvalidAccessTokenException.java | 36 + .../hadoop/security/PermissionChecker.java | 80 + .../apache/hadoop/security/SecurityUtil.java | 159 + .../security/UnixUserGroupInformation.java | 432 +++ src/java/org/apache/hadoop/security/User.java | 70 + .../hadoop/security/UserGroupInformation.java | 129 + .../authorize/AuthorizationException.java | 76 + .../security/authorize/ConfiguredPolicy.java | 156 + .../authorize/ConnectionPermission.java | 74 + .../security/authorize/PolicyProvider.java | 50 + .../RefreshAuthorizationPolicyProtocol.java | 39 + .../hadoop/security/authorize/Service.java | 53 + .../ServiceAuthorizationManager.java | 105 + .../apache/hadoop/util/CyclicIteration.java | 108 + src/java/org/apache/hadoop/util/Daemon.java | 51 + .../org/apache/hadoop/util/DataChecksum.java | 247 ++ .../org/apache/hadoop/util/DiskChecker.java | 89 + .../hadoop/util/GenericOptionsParser.java | 408 +++ .../org/apache/hadoop/util/GenericsUtil.java | 70 + src/java/org/apache/hadoop/util/HeapSort.java | 71 + .../apache/hadoop/util/HostsFileReader.java | 115 + .../apache/hadoop/util/IndexedSortable.java | 36 + .../org/apache/hadoop/util/IndexedSorter.java | 46 + .../org/apache/hadoop/util/LineReader.java | 190 + .../util/LinuxMemoryCalculatorPlugin.java | 132 + .../hadoop/util/MemoryCalculatorPlugin.java | 74 + .../org/apache/hadoop/util/MergeSort.java | 85 + .../apache/hadoop/util/NativeCodeLoader.java | 89 + .../org/apache/hadoop/util/PlatformName.java | 45 + .../apache/hadoop/util/PrintJarMainClass.java | 51 + .../org/apache/hadoop/util/PriorityQueue.java | 150 + .../org/apache/hadoop/util/ProcessTree.java | 239 ++ .../hadoop/util/ProcfsBasedProcessTree.java | 448 +++ .../org/apache/hadoop/util/ProgramDriver.java | 144 + src/java/org/apache/hadoop/util/Progress.java | 132 + .../org/apache/hadoop/util/Progressable.java | 35 + .../org/apache/hadoop/util/QuickSort.java | 131 + .../apache/hadoop/util/ReflectionUtils.java | 291 ++ src/java/org/apache/hadoop/util/RunJar.java | 166 + .../org/apache/hadoop/util/ServicePlugin.java | 46 + .../org/apache/hadoop/util/ServletUtil.java | 105 + src/java/org/apache/hadoop/util/Shell.java | 357 ++ .../org/apache/hadoop/util/StringUtils.java | 679 ++++ src/java/org/apache/hadoop/util/Tool.java | 79 + .../org/apache/hadoop/util/ToolRunner.java | 91 + .../hadoop/util/UTF8ByteArrayUtils.java | 98 + .../org/apache/hadoop/util/VersionInfo.java | 116 + src/java/org/apache/hadoop/util/XMLUtils.java | 56 + .../apache/hadoop/util/bloom/BloomFilter.java | 234 ++ .../util/bloom/CountingBloomFilter.java | 305 ++ .../hadoop/util/bloom/DynamicBloomFilter.java | 293 ++ .../org/apache/hadoop/util/bloom/Filter.java | 213 ++ .../hadoop/util/bloom/HashFunction.java | 119 + .../org/apache/hadoop/util/bloom/Key.java | 178 + .../hadoop/util/bloom/RemoveScheme.java | 91 + .../util/bloom/RetouchedBloomFilter.java | 450 +++ .../org/apache/hadoop/util/hash/Hash.java | 119 + .../apache/hadoop/util/hash/JenkinsHash.java | 258 ++ .../apache/hadoop/util/hash/MurmurHash.java | 83 + src/java/org/apache/hadoop/util/package.html | 23 + src/java/overview.html | 292 ++ 352 files changed, 67543 insertions(+) create mode 100644 src/java/core-default.xml create mode 100644 src/java/org/apache/hadoop/HadoopVersionAnnotation.java create mode 100644 src/java/org/apache/hadoop/conf/Configurable.java create mode 100644 src/java/org/apache/hadoop/conf/Configuration.java create mode 100644 src/java/org/apache/hadoop/conf/Configured.java create mode 100644 src/java/org/apache/hadoop/conf/package.html create mode 100644 src/java/org/apache/hadoop/filecache/DistributedCache.java create mode 100644 src/java/org/apache/hadoop/fs/BlockLocation.java create mode 100644 src/java/org/apache/hadoop/fs/BufferedFSInputStream.java create mode 100644 src/java/org/apache/hadoop/fs/ChecksumException.java create mode 100644 src/java/org/apache/hadoop/fs/ChecksumFileSystem.java create mode 100644 src/java/org/apache/hadoop/fs/ContentSummary.java create mode 100644 src/java/org/apache/hadoop/fs/DF.java create mode 100644 src/java/org/apache/hadoop/fs/DU.java create mode 100644 src/java/org/apache/hadoop/fs/FSDataInputStream.java create mode 100644 src/java/org/apache/hadoop/fs/FSDataOutputStream.java create mode 100644 src/java/org/apache/hadoop/fs/FSError.java create mode 100644 src/java/org/apache/hadoop/fs/FSInputChecker.java create mode 100644 src/java/org/apache/hadoop/fs/FSInputStream.java create mode 100644 src/java/org/apache/hadoop/fs/FSOutputSummer.java create mode 100644 src/java/org/apache/hadoop/fs/FileChecksum.java create mode 100644 src/java/org/apache/hadoop/fs/FileStatus.java create mode 100644 src/java/org/apache/hadoop/fs/FileSystem.java create mode 100644 src/java/org/apache/hadoop/fs/FileUtil.java create mode 100644 src/java/org/apache/hadoop/fs/FilterFileSystem.java create mode 100644 src/java/org/apache/hadoop/fs/FsShell.java create mode 100644 src/java/org/apache/hadoop/fs/FsShellPermissions.java create mode 100644 src/java/org/apache/hadoop/fs/FsStatus.java create mode 100644 src/java/org/apache/hadoop/fs/FsUrlConnection.java create mode 100644 src/java/org/apache/hadoop/fs/FsUrlStreamHandler.java create mode 100644 src/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java create mode 100644 src/java/org/apache/hadoop/fs/GlobExpander.java create mode 100644 src/java/org/apache/hadoop/fs/HarFileSystem.java create mode 100644 src/java/org/apache/hadoop/fs/LengthFileChecksum.java create mode 100644 src/java/org/apache/hadoop/fs/LocalDirAllocator.java create mode 100644 src/java/org/apache/hadoop/fs/LocalFileSystem.java create mode 100644 src/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java create mode 100644 src/java/org/apache/hadoop/fs/Path.java create mode 100644 src/java/org/apache/hadoop/fs/PathFilter.java create mode 100644 src/java/org/apache/hadoop/fs/PositionedReadable.java create mode 100644 src/java/org/apache/hadoop/fs/RawLocalFileSystem.java create mode 100644 src/java/org/apache/hadoop/fs/Seekable.java create mode 100644 src/java/org/apache/hadoop/fs/Syncable.java create mode 100644 src/java/org/apache/hadoop/fs/Trash.java create mode 100644 src/java/org/apache/hadoop/fs/ftp/FTPException.java create mode 100644 src/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java create mode 100644 src/java/org/apache/hadoop/fs/ftp/FTPInputStream.java create mode 100644 src/java/org/apache/hadoop/fs/kfs/IFSImpl.java create mode 100644 src/java/org/apache/hadoop/fs/kfs/KFSImpl.java create mode 100644 src/java/org/apache/hadoop/fs/kfs/KFSInputStream.java create mode 100644 src/java/org/apache/hadoop/fs/kfs/KFSOutputStream.java create mode 100644 src/java/org/apache/hadoop/fs/kfs/KosmosFileSystem.java create mode 100644 src/java/org/apache/hadoop/fs/kfs/package.html create mode 100644 src/java/org/apache/hadoop/fs/package.html create mode 100644 src/java/org/apache/hadoop/fs/permission/AccessControlException.java create mode 100644 src/java/org/apache/hadoop/fs/permission/FsAction.java create mode 100644 src/java/org/apache/hadoop/fs/permission/FsPermission.java create mode 100644 src/java/org/apache/hadoop/fs/permission/PermissionStatus.java create mode 100644 src/java/org/apache/hadoop/fs/s3/Block.java create mode 100644 src/java/org/apache/hadoop/fs/s3/FileSystemStore.java create mode 100644 src/java/org/apache/hadoop/fs/s3/INode.java create mode 100644 src/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java create mode 100644 src/java/org/apache/hadoop/fs/s3/MigrationTool.java create mode 100644 src/java/org/apache/hadoop/fs/s3/S3Credentials.java create mode 100644 src/java/org/apache/hadoop/fs/s3/S3Exception.java create mode 100644 src/java/org/apache/hadoop/fs/s3/S3FileSystem.java create mode 100644 src/java/org/apache/hadoop/fs/s3/S3FileSystemException.java create mode 100644 src/java/org/apache/hadoop/fs/s3/S3InputStream.java create mode 100644 src/java/org/apache/hadoop/fs/s3/S3OutputStream.java create mode 100644 src/java/org/apache/hadoop/fs/s3/VersionMismatchException.java create mode 100644 src/java/org/apache/hadoop/fs/s3/package.html create mode 100644 src/java/org/apache/hadoop/fs/s3native/FileMetadata.java create mode 100644 src/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java create mode 100644 src/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java create mode 100644 src/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java create mode 100644 src/java/org/apache/hadoop/fs/s3native/PartialListing.java create mode 100644 src/java/org/apache/hadoop/fs/s3native/package.html create mode 100644 src/java/org/apache/hadoop/fs/shell/Command.java create mode 100644 src/java/org/apache/hadoop/fs/shell/CommandFormat.java create mode 100644 src/java/org/apache/hadoop/fs/shell/CommandUtils.java create mode 100644 src/java/org/apache/hadoop/fs/shell/Count.java create mode 100644 src/java/org/apache/hadoop/http/FilterContainer.java create mode 100644 src/java/org/apache/hadoop/http/FilterInitializer.java create mode 100644 src/java/org/apache/hadoop/http/HttpServer.java create mode 100644 src/java/org/apache/hadoop/io/AbstractMapWritable.java create mode 100644 src/java/org/apache/hadoop/io/ArrayFile.java create mode 100644 src/java/org/apache/hadoop/io/ArrayWritable.java create mode 100644 src/java/org/apache/hadoop/io/BinaryComparable.java create mode 100644 src/java/org/apache/hadoop/io/BloomMapFile.java create mode 100644 src/java/org/apache/hadoop/io/BooleanWritable.java create mode 100644 src/java/org/apache/hadoop/io/ByteWritable.java create mode 100644 src/java/org/apache/hadoop/io/BytesWritable.java create mode 100644 src/java/org/apache/hadoop/io/Closeable.java create mode 100644 src/java/org/apache/hadoop/io/CompressedWritable.java create mode 100644 src/java/org/apache/hadoop/io/DataInputBuffer.java create mode 100644 src/java/org/apache/hadoop/io/DataOutputBuffer.java create mode 100644 src/java/org/apache/hadoop/io/DefaultStringifier.java create mode 100644 src/java/org/apache/hadoop/io/DeprecatedUTF8.java create mode 100644 src/java/org/apache/hadoop/io/DoubleWritable.java create mode 100644 src/java/org/apache/hadoop/io/EnumSetWritable.java create mode 100644 src/java/org/apache/hadoop/io/FloatWritable.java create mode 100644 src/java/org/apache/hadoop/io/GenericWritable.java create mode 100644 src/java/org/apache/hadoop/io/IOUtils.java create mode 100644 src/java/org/apache/hadoop/io/InputBuffer.java create mode 100644 src/java/org/apache/hadoop/io/IntWritable.java create mode 100644 src/java/org/apache/hadoop/io/LongWritable.java create mode 100644 src/java/org/apache/hadoop/io/MD5Hash.java create mode 100644 src/java/org/apache/hadoop/io/MapFile.java create mode 100644 src/java/org/apache/hadoop/io/MapWritable.java create mode 100644 src/java/org/apache/hadoop/io/MultipleIOException.java create mode 100644 src/java/org/apache/hadoop/io/NullWritable.java create mode 100644 src/java/org/apache/hadoop/io/ObjectWritable.java create mode 100644 src/java/org/apache/hadoop/io/OutputBuffer.java create mode 100644 src/java/org/apache/hadoop/io/RawComparator.java create mode 100644 src/java/org/apache/hadoop/io/SequenceFile.java create mode 100644 src/java/org/apache/hadoop/io/SetFile.java create mode 100644 src/java/org/apache/hadoop/io/SortedMapWritable.java create mode 100644 src/java/org/apache/hadoop/io/Stringifier.java create mode 100644 src/java/org/apache/hadoop/io/Text.java create mode 100644 src/java/org/apache/hadoop/io/TwoDArrayWritable.java create mode 100644 src/java/org/apache/hadoop/io/UTF8.java create mode 100644 src/java/org/apache/hadoop/io/VIntWritable.java create mode 100644 src/java/org/apache/hadoop/io/VLongWritable.java create mode 100644 src/java/org/apache/hadoop/io/VersionMismatchException.java create mode 100644 src/java/org/apache/hadoop/io/VersionedWritable.java create mode 100644 src/java/org/apache/hadoop/io/Writable.java create mode 100644 src/java/org/apache/hadoop/io/WritableComparable.java create mode 100644 src/java/org/apache/hadoop/io/WritableComparator.java create mode 100644 src/java/org/apache/hadoop/io/WritableFactories.java create mode 100644 src/java/org/apache/hadoop/io/WritableFactory.java create mode 100644 src/java/org/apache/hadoop/io/WritableName.java create mode 100644 src/java/org/apache/hadoop/io/WritableUtils.java create mode 100644 src/java/org/apache/hadoop/io/compress/BZip2Codec.java create mode 100644 src/java/org/apache/hadoop/io/compress/BlockCompressorStream.java create mode 100644 src/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java create mode 100644 src/java/org/apache/hadoop/io/compress/CodecPool.java create mode 100644 src/java/org/apache/hadoop/io/compress/CompressionCodec.java create mode 100644 src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java create mode 100644 src/java/org/apache/hadoop/io/compress/CompressionInputStream.java create mode 100644 src/java/org/apache/hadoop/io/compress/CompressionOutputStream.java create mode 100644 src/java/org/apache/hadoop/io/compress/Compressor.java create mode 100644 src/java/org/apache/hadoop/io/compress/CompressorStream.java create mode 100644 src/java/org/apache/hadoop/io/compress/Decompressor.java create mode 100644 src/java/org/apache/hadoop/io/compress/DecompressorStream.java create mode 100644 src/java/org/apache/hadoop/io/compress/DefaultCodec.java create mode 100644 src/java/org/apache/hadoop/io/compress/GzipCodec.java create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/BZip2Constants.java create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyCompressor.java create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyDecompressor.java create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java create mode 100644 src/java/org/apache/hadoop/io/compress/bzip2/CRC.java create mode 100644 src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java create mode 100644 src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibInflater.java create mode 100644 src/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java create mode 100644 src/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java create mode 100644 src/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java create mode 100644 src/java/org/apache/hadoop/io/package.html create mode 100644 src/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java create mode 100644 src/java/org/apache/hadoop/io/retry/RetryPolicies.java create mode 100644 src/java/org/apache/hadoop/io/retry/RetryPolicy.java create mode 100644 src/java/org/apache/hadoop/io/retry/RetryProxy.java create mode 100644 src/java/org/apache/hadoop/io/retry/package.html create mode 100644 src/java/org/apache/hadoop/io/serializer/Deserializer.java create mode 100644 src/java/org/apache/hadoop/io/serializer/DeserializerComparator.java create mode 100644 src/java/org/apache/hadoop/io/serializer/JavaSerialization.java create mode 100644 src/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java create mode 100644 src/java/org/apache/hadoop/io/serializer/Serialization.java create mode 100644 src/java/org/apache/hadoop/io/serializer/SerializationFactory.java create mode 100644 src/java/org/apache/hadoop/io/serializer/Serializer.java create mode 100644 src/java/org/apache/hadoop/io/serializer/WritableSerialization.java create mode 100644 src/java/org/apache/hadoop/io/serializer/package.html create mode 100644 src/java/org/apache/hadoop/ipc/Client.java create mode 100644 src/java/org/apache/hadoop/ipc/ConnectionHeader.java create mode 100644 src/java/org/apache/hadoop/ipc/RPC.java create mode 100644 src/java/org/apache/hadoop/ipc/RemoteException.java create mode 100644 src/java/org/apache/hadoop/ipc/Server.java create mode 100644 src/java/org/apache/hadoop/ipc/Status.java create mode 100644 src/java/org/apache/hadoop/ipc/VersionedProtocol.java create mode 100644 src/java/org/apache/hadoop/ipc/metrics/RpcActivityMBean.java create mode 100644 src/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java create mode 100644 src/java/org/apache/hadoop/ipc/metrics/RpcMgt.java create mode 100644 src/java/org/apache/hadoop/ipc/metrics/RpcMgtMBean.java create mode 100644 src/java/org/apache/hadoop/ipc/package.html create mode 100644 src/java/org/apache/hadoop/log/LogLevel.java create mode 100644 src/java/org/apache/hadoop/metrics/ContextFactory.java create mode 100644 src/java/org/apache/hadoop/metrics/MetricsContext.java create mode 100644 src/java/org/apache/hadoop/metrics/MetricsException.java create mode 100644 src/java/org/apache/hadoop/metrics/MetricsRecord.java create mode 100644 src/java/org/apache/hadoop/metrics/MetricsServlet.java create mode 100644 src/java/org/apache/hadoop/metrics/MetricsUtil.java create mode 100644 src/java/org/apache/hadoop/metrics/Updater.java create mode 100644 src/java/org/apache/hadoop/metrics/file/FileContext.java create mode 100644 src/java/org/apache/hadoop/metrics/file/package.html create mode 100644 src/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java create mode 100644 src/java/org/apache/hadoop/metrics/ganglia/package.html create mode 100644 src/java/org/apache/hadoop/metrics/jvm/EventCounter.java create mode 100644 src/java/org/apache/hadoop/metrics/jvm/JvmMetrics.java create mode 100644 src/java/org/apache/hadoop/metrics/package.html create mode 100644 src/java/org/apache/hadoop/metrics/spi/AbstractMetricsContext.java create mode 100644 src/java/org/apache/hadoop/metrics/spi/CompositeContext.java create mode 100644 src/java/org/apache/hadoop/metrics/spi/MetricValue.java create mode 100644 src/java/org/apache/hadoop/metrics/spi/MetricsRecordImpl.java create mode 100644 src/java/org/apache/hadoop/metrics/spi/NoEmitMetricsContext.java create mode 100644 src/java/org/apache/hadoop/metrics/spi/NullContext.java create mode 100644 src/java/org/apache/hadoop/metrics/spi/NullContextWithUpdateThread.java create mode 100644 src/java/org/apache/hadoop/metrics/spi/OutputRecord.java create mode 100644 src/java/org/apache/hadoop/metrics/spi/Util.java create mode 100644 src/java/org/apache/hadoop/metrics/spi/package.html create mode 100644 src/java/org/apache/hadoop/metrics/util/MBeanUtil.java create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsBase.java create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsIntValue.java create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsLongValue.java create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsRegistry.java create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingInt.java create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingLong.java create mode 100644 src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingRate.java create mode 100644 src/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java create mode 100644 src/java/org/apache/hadoop/net/DNS.java create mode 100644 src/java/org/apache/hadoop/net/DNSToSwitchMapping.java create mode 100644 src/java/org/apache/hadoop/net/NetUtils.java create mode 100644 src/java/org/apache/hadoop/net/NetworkTopology.java create mode 100644 src/java/org/apache/hadoop/net/Node.java create mode 100644 src/java/org/apache/hadoop/net/NodeBase.java create mode 100644 src/java/org/apache/hadoop/net/ScriptBasedMapping.java create mode 100644 src/java/org/apache/hadoop/net/SocketIOWithTimeout.java create mode 100644 src/java/org/apache/hadoop/net/SocketInputStream.java create mode 100644 src/java/org/apache/hadoop/net/SocketOutputStream.java create mode 100644 src/java/org/apache/hadoop/net/SocksSocketFactory.java create mode 100644 src/java/org/apache/hadoop/net/StandardSocketFactory.java create mode 100644 src/java/org/apache/hadoop/net/package.html create mode 100644 src/java/org/apache/hadoop/record/BinaryRecordInput.java create mode 100644 src/java/org/apache/hadoop/record/BinaryRecordOutput.java create mode 100644 src/java/org/apache/hadoop/record/Buffer.java create mode 100644 src/java/org/apache/hadoop/record/CsvRecordInput.java create mode 100644 src/java/org/apache/hadoop/record/CsvRecordOutput.java create mode 100644 src/java/org/apache/hadoop/record/Index.java create mode 100644 src/java/org/apache/hadoop/record/Record.java create mode 100644 src/java/org/apache/hadoop/record/RecordComparator.java create mode 100644 src/java/org/apache/hadoop/record/RecordInput.java create mode 100644 src/java/org/apache/hadoop/record/RecordOutput.java create mode 100644 src/java/org/apache/hadoop/record/Utils.java create mode 100644 src/java/org/apache/hadoop/record/XmlRecordInput.java create mode 100644 src/java/org/apache/hadoop/record/XmlRecordOutput.java create mode 100644 src/java/org/apache/hadoop/record/compiler/CGenerator.java create mode 100644 src/java/org/apache/hadoop/record/compiler/CodeBuffer.java create mode 100644 src/java/org/apache/hadoop/record/compiler/CodeGenerator.java create mode 100644 src/java/org/apache/hadoop/record/compiler/Consts.java create mode 100644 src/java/org/apache/hadoop/record/compiler/CppGenerator.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JBoolean.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JBuffer.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JByte.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JCompType.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JDouble.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JField.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JFile.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JFloat.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JInt.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JLong.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JMap.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JRecord.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JString.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JType.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JVector.java create mode 100644 src/java/org/apache/hadoop/record/compiler/JavaGenerator.java create mode 100644 src/java/org/apache/hadoop/record/compiler/ant/RccTask.java create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/ParseException.java create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/Rcc.java create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/RccConstants.java create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/Token.java create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/package.html create mode 100644 src/java/org/apache/hadoop/record/compiler/generated/rcc.jj create mode 100644 src/java/org/apache/hadoop/record/compiler/package.html create mode 100644 src/java/org/apache/hadoop/record/meta/FieldTypeInfo.java create mode 100644 src/java/org/apache/hadoop/record/meta/MapTypeID.java create mode 100644 src/java/org/apache/hadoop/record/meta/RecordTypeInfo.java create mode 100644 src/java/org/apache/hadoop/record/meta/StructTypeID.java create mode 100644 src/java/org/apache/hadoop/record/meta/TypeID.java create mode 100644 src/java/org/apache/hadoop/record/meta/Utils.java create mode 100644 src/java/org/apache/hadoop/record/meta/VectorTypeID.java create mode 100644 src/java/org/apache/hadoop/record/package.html create mode 100644 src/java/org/apache/hadoop/security/AccessControlException.java create mode 100644 src/java/org/apache/hadoop/security/AccessKey.java create mode 100644 src/java/org/apache/hadoop/security/AccessToken.java create mode 100644 src/java/org/apache/hadoop/security/AccessTokenHandler.java create mode 100644 src/java/org/apache/hadoop/security/ExportedAccessKeys.java create mode 100644 src/java/org/apache/hadoop/security/Group.java create mode 100644 src/java/org/apache/hadoop/security/InvalidAccessTokenException.java create mode 100644 src/java/org/apache/hadoop/security/PermissionChecker.java create mode 100644 src/java/org/apache/hadoop/security/SecurityUtil.java create mode 100644 src/java/org/apache/hadoop/security/UnixUserGroupInformation.java create mode 100644 src/java/org/apache/hadoop/security/User.java create mode 100644 src/java/org/apache/hadoop/security/UserGroupInformation.java create mode 100644 src/java/org/apache/hadoop/security/authorize/AuthorizationException.java create mode 100644 src/java/org/apache/hadoop/security/authorize/ConfiguredPolicy.java create mode 100644 src/java/org/apache/hadoop/security/authorize/ConnectionPermission.java create mode 100644 src/java/org/apache/hadoop/security/authorize/PolicyProvider.java create mode 100644 src/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java create mode 100644 src/java/org/apache/hadoop/security/authorize/Service.java create mode 100644 src/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java create mode 100644 src/java/org/apache/hadoop/util/CyclicIteration.java create mode 100644 src/java/org/apache/hadoop/util/Daemon.java create mode 100644 src/java/org/apache/hadoop/util/DataChecksum.java create mode 100644 src/java/org/apache/hadoop/util/DiskChecker.java create mode 100644 src/java/org/apache/hadoop/util/GenericOptionsParser.java create mode 100644 src/java/org/apache/hadoop/util/GenericsUtil.java create mode 100644 src/java/org/apache/hadoop/util/HeapSort.java create mode 100644 src/java/org/apache/hadoop/util/HostsFileReader.java create mode 100644 src/java/org/apache/hadoop/util/IndexedSortable.java create mode 100644 src/java/org/apache/hadoop/util/IndexedSorter.java create mode 100644 src/java/org/apache/hadoop/util/LineReader.java create mode 100644 src/java/org/apache/hadoop/util/LinuxMemoryCalculatorPlugin.java create mode 100644 src/java/org/apache/hadoop/util/MemoryCalculatorPlugin.java create mode 100644 src/java/org/apache/hadoop/util/MergeSort.java create mode 100644 src/java/org/apache/hadoop/util/NativeCodeLoader.java create mode 100644 src/java/org/apache/hadoop/util/PlatformName.java create mode 100644 src/java/org/apache/hadoop/util/PrintJarMainClass.java create mode 100644 src/java/org/apache/hadoop/util/PriorityQueue.java create mode 100644 src/java/org/apache/hadoop/util/ProcessTree.java create mode 100644 src/java/org/apache/hadoop/util/ProcfsBasedProcessTree.java create mode 100644 src/java/org/apache/hadoop/util/ProgramDriver.java create mode 100644 src/java/org/apache/hadoop/util/Progress.java create mode 100644 src/java/org/apache/hadoop/util/Progressable.java create mode 100644 src/java/org/apache/hadoop/util/QuickSort.java create mode 100644 src/java/org/apache/hadoop/util/ReflectionUtils.java create mode 100644 src/java/org/apache/hadoop/util/RunJar.java create mode 100644 src/java/org/apache/hadoop/util/ServicePlugin.java create mode 100644 src/java/org/apache/hadoop/util/ServletUtil.java create mode 100644 src/java/org/apache/hadoop/util/Shell.java create mode 100644 src/java/org/apache/hadoop/util/StringUtils.java create mode 100644 src/java/org/apache/hadoop/util/Tool.java create mode 100644 src/java/org/apache/hadoop/util/ToolRunner.java create mode 100644 src/java/org/apache/hadoop/util/UTF8ByteArrayUtils.java create mode 100644 src/java/org/apache/hadoop/util/VersionInfo.java create mode 100644 src/java/org/apache/hadoop/util/XMLUtils.java create mode 100644 src/java/org/apache/hadoop/util/bloom/BloomFilter.java create mode 100644 src/java/org/apache/hadoop/util/bloom/CountingBloomFilter.java create mode 100644 src/java/org/apache/hadoop/util/bloom/DynamicBloomFilter.java create mode 100644 src/java/org/apache/hadoop/util/bloom/Filter.java create mode 100644 src/java/org/apache/hadoop/util/bloom/HashFunction.java create mode 100644 src/java/org/apache/hadoop/util/bloom/Key.java create mode 100644 src/java/org/apache/hadoop/util/bloom/RemoveScheme.java create mode 100644 src/java/org/apache/hadoop/util/bloom/RetouchedBloomFilter.java create mode 100644 src/java/org/apache/hadoop/util/hash/Hash.java create mode 100644 src/java/org/apache/hadoop/util/hash/JenkinsHash.java create mode 100644 src/java/org/apache/hadoop/util/hash/MurmurHash.java create mode 100644 src/java/org/apache/hadoop/util/package.html create mode 100644 src/java/overview.html diff --git a/src/java/core-default.xml b/src/java/core-default.xml new file mode 100644 index 00000000000..b56dda4235b --- /dev/null +++ b/src/java/core-default.xml @@ -0,0 +1,444 @@ + + + + + + + + + + + + + hadoop.tmp.dir + /tmp/hadoop-${user.name} + A base for other temporary directories. + + + + hadoop.native.lib + true + Should native hadoop libraries, if present, be used. + + + + hadoop.http.filter.initializers + + A comma separated list of class names. Each class in the list + must extend org.apache.hadoop.http.FilterInitializer. The corresponding + Filter will be initialized. Then, the Filter will be applied to all user + facing jsp and servlet web pages. The ordering of the list defines the + ordering of the filters. + + + + hadoop.security.authorization + false + Is service-level authorization enabled? + + + + + + hadoop.logfile.size + 10000000 + The max size of each log file + + + + hadoop.logfile.count + 10 + The max number of log files + + + + + io.file.buffer.size + 4096 + The size of buffer for use in sequence files. + The size of this buffer should probably be a multiple of hardware + page size (4096 on Intel x86), and it determines how much data is + buffered during read and write operations. + + + + io.bytes.per.checksum + 512 + The number of bytes per checksum. Must not be larger than + io.file.buffer.size. + + + + io.skip.checksum.errors + false + If true, when a checksum error is encountered while + reading a sequence file, entries are skipped, instead of throwing an + exception. + + + + io.compression.codecs + org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec + A list of the compression codec classes that can be used + for compression/decompression. + + + + io.serializations + org.apache.hadoop.io.serializer.WritableSerialization + A list of serialization classes that can be used for + obtaining serializers and deserializers. + + + + io.seqfile.local.dir + ${hadoop.tmp.dir}/io/local + The local directory where sequence file stores intermediate + data files during merge. May be a comma-separated list of + directories on different devices in order to spread disk i/o. + Directories that do not exist are ignored. + + + + + + + fs.default.name + file:/// + The name of the default file system. A URI whose + scheme and authority determine the FileSystem implementation. The + uri's scheme determines the config property (fs.SCHEME.impl) naming + the FileSystem implementation class. The uri's authority is used to + determine the host, port, etc. for a filesystem. + + + + fs.trash.interval + 0 + Number of minutes between trash checkpoints. + If zero, the trash feature is disabled. + + + + + fs.file.impl + org.apache.hadoop.fs.LocalFileSystem + The FileSystem for file: uris. + + + + fs.hdfs.impl + org.apache.hadoop.hdfs.DistributedFileSystem + The FileSystem for hdfs: uris. + + + + fs.s3.impl + org.apache.hadoop.fs.s3.S3FileSystem + The FileSystem for s3: uris. + + + + fs.s3n.impl + org.apache.hadoop.fs.s3native.NativeS3FileSystem + The FileSystem for s3n: (Native S3) uris. + + + + fs.kfs.impl + org.apache.hadoop.fs.kfs.KosmosFileSystem + The FileSystem for kfs: uris. + + + + fs.hftp.impl + org.apache.hadoop.hdfs.HftpFileSystem + + + + fs.hsftp.impl + org.apache.hadoop.hdfs.HsftpFileSystem + + + + fs.ftp.impl + org.apache.hadoop.fs.ftp.FTPFileSystem + The FileSystem for ftp: uris. + + + + fs.ramfs.impl + org.apache.hadoop.fs.InMemoryFileSystem + The FileSystem for ramfs: uris. + + + + fs.har.impl + org.apache.hadoop.fs.HarFileSystem + The filesystem for Hadoop archives. + + + + fs.checkpoint.dir + ${hadoop.tmp.dir}/dfs/namesecondary + Determines where on the local filesystem the DFS secondary + name node should store the temporary images to merge. + If this is a comma-delimited list of directories then the image is + replicated in all of the directories for redundancy. + + + + + fs.checkpoint.edits.dir + ${fs.checkpoint.dir} + Determines where on the local filesystem the DFS secondary + name node should store the temporary edits to merge. + If this is a comma-delimited list of directoires then teh edits is + replicated in all of the directoires for redundancy. + Default value is same as fs.checkpoint.dir + + + + + fs.checkpoint.period + 3600 + The number of seconds between two periodic checkpoints. + + + + + fs.checkpoint.size + 67108864 + The size of the current edit log (in bytes) that triggers + a periodic checkpoint even if the fs.checkpoint.period hasn't expired. + + + + + + + fs.s3.block.size + 67108864 + Block size to use when writing files to S3. + + + + fs.s3.buffer.dir + ${hadoop.tmp.dir}/s3 + Determines where on the local filesystem the S3 filesystem + should store files before sending them to S3 + (or after retrieving them from S3). + + + + + fs.s3.maxRetries + 4 + The maximum number of retries for reading or writing files to S3, + before we signal failure to the application. + + + + + fs.s3.sleepTimeSeconds + 10 + The number of seconds to sleep between each S3 retry. + + + + + + local.cache.size + 10737418240 + The limit on the size of cache you want to keep, set by default + to 10GB. This will act as a soft limit on the cache directory for out of band data. + + + + + io.seqfile.compress.blocksize + 1000000 + The minimum block size for compression in block compressed + SequenceFiles. + + + + + io.seqfile.lazydecompress + true + Should values of block-compressed SequenceFiles be decompressed + only when necessary. + + + + + io.seqfile.sorter.recordlimit + 1000000 + The limit on number of records to be kept in memory in a spill + in SequenceFiles.Sorter + + + + + io.mapfile.bloom.size + 1048576 + The size of BloomFilter-s used in BloomMapFile. Each time this many + keys is appended the next BloomFilter will be created (inside a DynamicBloomFilter). + Larger values minimize the number of filters, which slightly increases the performance, + but may waste too much space if the total number of keys is usually much smaller + than this number. + + + + + io.mapfile.bloom.error.rate + 0.005 + The rate of false positives in BloomFilter-s used in BloomMapFile. + As this value decreases, the size of BloomFilter-s increases exponentially. This + value is the probability of encountering false positives (default is 0.5%). + + + + + hadoop.util.hash.type + murmur + The default implementation of Hash. Currently this can take one of the + two values: 'murmur' to select MurmurHash and 'jenkins' to select JenkinsHash. + + + + + + + + ipc.client.idlethreshold + 4000 + Defines the threshold number of connections after which + connections will be inspected for idleness. + + + + + ipc.client.kill.max + 10 + Defines the maximum number of clients to disconnect in one go. + + + + + ipc.client.connection.maxidletime + 10000 + The maximum time in msec after which a client will bring down the + connection to the server. + + + + + ipc.client.connect.max.retries + 10 + Indicates the number of retries a client will make to establish + a server connection. + + + + + ipc.server.listen.queue.size + 128 + Indicates the length of the listen queue for servers accepting + client connections. + + + + + ipc.server.tcpnodelay + false + Turn on/off Nagle's algorithm for the TCP socket connection on + the server. Setting to true disables the algorithm and may decrease latency + with a cost of more/smaller packets. + + + + + ipc.client.tcpnodelay + false + Turn on/off Nagle's algorithm for the TCP socket connection on + the client. Setting to true disables the algorithm and may decrease latency + with a cost of more/smaller packets. + + + + + + + + webinterface.private.actions + false + If set to true, the web interfaces of JT and NN may contain + actions, such as kill job, delete file, etc., that should + not be exposed to public. Enable this option if the interfaces + are only reachable by those who have the right authorization. + + + + + + + hadoop.rpc.socket.factory.class.default + org.apache.hadoop.net.StandardSocketFactory + Default SocketFactory to use. This parameter is expected to be + formatted as "package.FactoryClassName". + + + + + hadoop.rpc.socket.factory.class.ClientProtocol + + SocketFactory to use to connect to a DFS. If null or empty, use + hadoop.rpc.socket.class.default. This socket factory is also used by + DFSClient to create sockets to DataNodes. + + + + + + + hadoop.socks.server + + Address (host:port) of the SOCKS server to be used by the + SocksSocketFactory. + + + + + + + topology.node.switch.mapping.impl + org.apache.hadoop.net.ScriptBasedMapping + The default implementation of the DNSToSwitchMapping. It + invokes a script specified in topology.script.file.name to resolve + node names. If the value for topology.script.file.name is not set, the + default value of DEFAULT_RACK is returned for all node names. + + + + + topology.script.file.name + + The script name that should be invoked to resolve DNS names to + NetworkTopology names. Example: the script would take host.foo.bar as an + argument, and return /rack1 as the output. + + + + + topology.script.number.args + 100 + The max number of args that the script configured with + topology.script.file.name should be run with. Each arg is an + IP address. + + + + + + diff --git a/src/java/org/apache/hadoop/HadoopVersionAnnotation.java b/src/java/org/apache/hadoop/HadoopVersionAnnotation.java new file mode 100644 index 00000000000..324003a839b --- /dev/null +++ b/src/java/org/apache/hadoop/HadoopVersionAnnotation.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop; + +import java.lang.annotation.*; + +/** + * A package attribute that captures the version of Hadoop that was compiled. + */ +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.PACKAGE) +public @interface HadoopVersionAnnotation { + + /** + * Get the Hadoop version + * @return the version string "0.6.3-dev" + */ + String version(); + + /** + * Get the username that compiled Hadoop. + */ + String user(); + + /** + * Get the date when Hadoop was compiled. + * @return the date in unix 'date' format + */ + String date(); + + /** + * Get the url for the subversion repository. + */ + String url(); + + /** + * Get the subversion revision. + * @return the revision number as a string (eg. "451451") + */ + String revision(); + + /** + * Get the branch from which this was compiled. + * @return The branch name, e.g. "trunk" or "branches/branch-0.20" + */ + String branch(); + + /** + * Get a checksum of the source files from which + * Hadoop was compiled. + * @return a string that uniquely identifies the source + **/ + String srcChecksum(); +} diff --git a/src/java/org/apache/hadoop/conf/Configurable.java b/src/java/org/apache/hadoop/conf/Configurable.java new file mode 100644 index 00000000000..f4637f0e82b --- /dev/null +++ b/src/java/org/apache/hadoop/conf/Configurable.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.conf; + +/** Something that may be configured with a {@link Configuration}. */ +public interface Configurable { + + /** Set the configuration to be used by this object. */ + void setConf(Configuration conf); + + /** Return the configuration used by this object. */ + Configuration getConf(); +} diff --git a/src/java/org/apache/hadoop/conf/Configuration.java b/src/java/org/apache/hadoop/conf/Configuration.java new file mode 100644 index 00000000000..e1381f3bb62 --- /dev/null +++ b/src/java/org/apache/hadoop/conf/Configuration.java @@ -0,0 +1,1326 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.conf; + +import java.io.BufferedInputStream; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.Reader; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.StringTokenizer; +import java.util.WeakHashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; +import org.w3c.dom.DOMException; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.Text; +import org.xml.sax.SAXException; + +/** + * Provides access to configuration parameters. + * + *

Resources

+ * + *

Configurations are specified by resources. A resource contains a set of + * name/value pairs as XML data. Each resource is named by either a + * String or by a {@link Path}. If named by a String, + * then the classpath is examined for a file with that name. If named by a + * Path, then the local filesystem is examined directly, without + * referring to the classpath. + * + *

Unless explicitly turned off, Hadoop by default specifies two + * resources, loaded in-order from the classpath:

    + *
  1. core-default.xml + * : Read-only defaults for hadoop.
  2. + *
  3. core-site.xml: Site-specific configuration for a given hadoop + * installation.
  4. + *
+ * Applications may add additional resources, which are loaded + * subsequent to these resources in the order they are added. + * + *

Final Parameters

+ * + *

Configuration parameters may be declared final. + * Once a resource declares a value final, no subsequently-loaded + * resource can alter that value. + * For example, one might define a final parameter with: + *

+ *  <property>
+ *    <name>dfs.client.buffer.dir</name>
+ *    <value>/tmp/hadoop/dfs/client</value>
+ *    <final>true</final>
+ *  </property>
+ * + * Administrators typically define parameters as final in + * core-site.xml for values that user applications may not alter. + * + *

Variable Expansion

+ * + *

Value strings are first processed for variable expansion. The + * available properties are:

    + *
  1. Other properties defined in this Configuration; and, if a name is + * undefined here,
  2. + *
  3. Properties in {@link System#getProperties()}.
  4. + *
+ * + *

For example, if a configuration resource contains the following property + * definitions: + *

+ *  <property>
+ *    <name>basedir</name>
+ *    <value>/user/${user.name}</value>
+ *  </property>
+ *  
+ *  <property>
+ *    <name>tempdir</name>
+ *    <value>${basedir}/tmp</value>
+ *  </property>
+ * + * When conf.get("tempdir") is called, then ${basedir} + * will be resolved to another property in this Configuration, while + * ${user.name} would then ordinarily be resolved to the value + * of the System property with that name. + */ +public class Configuration implements Iterable>, + Writable { + private static final Log LOG = + LogFactory.getLog(Configuration.class); + + private boolean quietmode = true; + + /** + * List of configuration resources. + */ + private ArrayList resources = new ArrayList(); + + /** + * List of configuration parameters marked final. + */ + private Set finalParameters = new HashSet(); + + private boolean loadDefaults = true; + + /** + * Configurtion objects + */ + private static final WeakHashMap REGISTRY = + new WeakHashMap(); + + /** + * List of default Resources. Resources are loaded in the order of the list + * entries + */ + private static final ArrayList defaultResources = + new ArrayList(); + + static{ + //print deprecation warning if hadoop-site.xml is found in classpath + ClassLoader cL = Thread.currentThread().getContextClassLoader(); + if (cL == null) { + cL = Configuration.class.getClassLoader(); + } + if(cL.getResource("hadoop-site.xml")!=null) { + LOG.warn("DEPRECATED: hadoop-site.xml found in the classpath. " + + "Usage of hadoop-site.xml is deprecated. Instead use core-site.xml, " + + "mapred-site.xml and hdfs-site.xml to override properties of " + + "core-default.xml, mapred-default.xml and hdfs-default.xml " + + "respectively"); + } + addDefaultResource("core-default.xml"); + addDefaultResource("core-site.xml"); + } + + private Properties properties; + private Properties overlay; + private ClassLoader classLoader; + { + classLoader = Thread.currentThread().getContextClassLoader(); + if (classLoader == null) { + classLoader = Configuration.class.getClassLoader(); + } + } + + /** A new configuration. */ + public Configuration() { + this(true); + } + + /** A new configuration where the behavior of reading from the default + * resources can be turned off. + * + * If the parameter {@code loadDefaults} is false, the new instance + * will not load resources from the default files. + * @param loadDefaults specifies whether to load from the default files + */ + public Configuration(boolean loadDefaults) { + this.loadDefaults = loadDefaults; + if (LOG.isDebugEnabled()) { + LOG.debug(StringUtils.stringifyException(new IOException("config()"))); + } + synchronized(Configuration.class) { + REGISTRY.put(this, null); + } + } + + /** + * A new configuration with the same settings cloned from another. + * + * @param other the configuration from which to clone settings. + */ + @SuppressWarnings("unchecked") + public Configuration(Configuration other) { + if (LOG.isDebugEnabled()) { + LOG.debug(StringUtils.stringifyException + (new IOException("config(config)"))); + } + + this.resources = (ArrayList)other.resources.clone(); + synchronized(other) { + if (other.properties != null) { + this.properties = (Properties)other.properties.clone(); + } + + if (other.overlay!=null) { + this.overlay = (Properties)other.overlay.clone(); + } + } + + this.finalParameters = new HashSet(other.finalParameters); + synchronized(Configuration.class) { + REGISTRY.put(this, null); + } + } + + /** + * Add a default resource. Resources are loaded in the order of the resources + * added. + * @param name file name. File should be present in the classpath. + */ + public static synchronized void addDefaultResource(String name) { + if(!defaultResources.contains(name)) { + defaultResources.add(name); + for(Configuration conf : REGISTRY.keySet()) { + if(conf.loadDefaults) { + conf.reloadConfiguration(); + } + } + } + } + + /** + * Add a configuration resource. + * + * The properties of this resource will override properties of previously + * added resources, unless they were marked final. + * + * @param name resource to be added, the classpath is examined for a file + * with that name. + */ + public void addResource(String name) { + addResourceObject(name); + } + + /** + * Add a configuration resource. + * + * The properties of this resource will override properties of previously + * added resources, unless they were marked final. + * + * @param url url of the resource to be added, the local filesystem is + * examined directly to find the resource, without referring to + * the classpath. + */ + public void addResource(URL url) { + addResourceObject(url); + } + + /** + * Add a configuration resource. + * + * The properties of this resource will override properties of previously + * added resources, unless they were marked final. + * + * @param file file-path of resource to be added, the local filesystem is + * examined directly to find the resource, without referring to + * the classpath. + */ + public void addResource(Path file) { + addResourceObject(file); + } + + /** + * Add a configuration resource. + * + * The properties of this resource will override properties of previously + * added resources, unless they were marked final. + * + * @param in InputStream to deserialize the object from. + */ + public void addResource(InputStream in) { + addResourceObject(in); + } + + + /** + * Reload configuration from previously added resources. + * + * This method will clear all the configuration read from the added + * resources, and final parameters. This will make the resources to + * be read again before accessing the values. Values that are added + * via set methods will overlay values read from the resources. + */ + public synchronized void reloadConfiguration() { + properties = null; // trigger reload + finalParameters.clear(); // clear site-limits + } + + private synchronized void addResourceObject(Object resource) { + resources.add(resource); // add to resources + reloadConfiguration(); + } + + private static Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}"); + private static int MAX_SUBST = 20; + + private String substituteVars(String expr) { + if (expr == null) { + return null; + } + Matcher match = varPat.matcher(""); + String eval = expr; + for(int s=0; sname property, null if + * no such property exists. + * + * Values are processed for variable expansion + * before being returned. + * + * @param name the property name. + * @return the value of the name property, + * or null if no such property exists. + */ + public String get(String name) { + return substituteVars(getProps().getProperty(name)); + } + + /** + * Get the value of the name property, without doing + * variable expansion. + * + * @param name the property name. + * @return the value of the name property, + * or null if no such property exists. + */ + public String getRaw(String name) { + return getProps().getProperty(name); + } + + /** + * Set the value of the name property. + * + * @param name property name. + * @param value property value. + */ + public void set(String name, String value) { + getOverlay().setProperty(name, value); + getProps().setProperty(name, value); + } + + /** + * Sets a property if it is currently unset. + * @param name the property name + * @param value the new value + */ + public void setIfUnset(String name, String value) { + if (get(name) == null) { + set(name, value); + } + } + + private synchronized Properties getOverlay() { + if (overlay==null){ + overlay=new Properties(); + } + return overlay; + } + + /** + * Get the value of the name property. If no such property + * exists, then defaultValue is returned. + * + * @param name property name. + * @param defaultValue default value. + * @return property value, or defaultValue if the property + * doesn't exist. + */ + public String get(String name, String defaultValue) { + return substituteVars(getProps().getProperty(name, defaultValue)); + } + + /** + * Get the value of the name property as an int. + * + * If no such property exists, or if the specified value is not a valid + * int, then defaultValue is returned. + * + * @param name property name. + * @param defaultValue default value. + * @return property value as an int, + * or defaultValue. + */ + public int getInt(String name, int defaultValue) { + String valueString = get(name); + if (valueString == null) + return defaultValue; + try { + String hexString = getHexDigits(valueString); + if (hexString != null) { + return Integer.parseInt(hexString, 16); + } + return Integer.parseInt(valueString); + } catch (NumberFormatException e) { + return defaultValue; + } + } + + /** + * Set the value of the name property to an int. + * + * @param name property name. + * @param value int value of the property. + */ + public void setInt(String name, int value) { + set(name, Integer.toString(value)); + } + + + /** + * Get the value of the name property as a long. + * If no such property is specified, or if the specified value is not a valid + * long, then defaultValue is returned. + * + * @param name property name. + * @param defaultValue default value. + * @return property value as a long, + * or defaultValue. + */ + public long getLong(String name, long defaultValue) { + String valueString = get(name); + if (valueString == null) + return defaultValue; + try { + String hexString = getHexDigits(valueString); + if (hexString != null) { + return Long.parseLong(hexString, 16); + } + return Long.parseLong(valueString); + } catch (NumberFormatException e) { + return defaultValue; + } + } + + private String getHexDigits(String value) { + boolean negative = false; + String str = value; + String hexString = null; + if (value.startsWith("-")) { + negative = true; + str = value.substring(1); + } + if (str.startsWith("0x") || str.startsWith("0X")) { + hexString = str.substring(2); + if (negative) { + hexString = "-" + hexString; + } + return hexString; + } + return null; + } + + /** + * Set the value of the name property to a long. + * + * @param name property name. + * @param value long value of the property. + */ + public void setLong(String name, long value) { + set(name, Long.toString(value)); + } + + /** + * Get the value of the name property as a float. + * If no such property is specified, or if the specified value is not a valid + * float, then defaultValue is returned. + * + * @param name property name. + * @param defaultValue default value. + * @return property value as a float, + * or defaultValue. + */ + public float getFloat(String name, float defaultValue) { + String valueString = get(name); + if (valueString == null) + return defaultValue; + try { + return Float.parseFloat(valueString); + } catch (NumberFormatException e) { + return defaultValue; + } + } + /** + * Set the value of the name property to a float. + * + * @param name property name. + * @param value property value. + */ + public void setFloat(String name, float value) { + set(name,Float.toString(value)); + } + + /** + * Get the value of the name property as a boolean. + * If no such property is specified, or if the specified value is not a valid + * boolean, then defaultValue is returned. + * + * @param name property name. + * @param defaultValue default value. + * @return property value as a boolean, + * or defaultValue. + */ + public boolean getBoolean(String name, boolean defaultValue) { + String valueString = get(name); + if ("true".equals(valueString)) + return true; + else if ("false".equals(valueString)) + return false; + else return defaultValue; + } + + /** + * Set the value of the name property to a boolean. + * + * @param name property name. + * @param value boolean value of the property. + */ + public void setBoolean(String name, boolean value) { + set(name, Boolean.toString(value)); + } + + /** + * Set the given property, if it is currently unset. + * @param name property name + * @param value new value + */ + public void setBooleanIfUnset(String name, boolean value) { + setIfUnset(name, Boolean.toString(value)); + } + + /** + * A class that represents a set of positive integer ranges. It parses + * strings of the form: "2-3,5,7-" where ranges are separated by comma and + * the lower/upper bounds are separated by dash. Either the lower or upper + * bound may be omitted meaning all values up to or over. So the string + * above means 2, 3, 5, and 7, 8, 9, ... + */ + public static class IntegerRanges { + private static class Range { + int start; + int end; + } + + List ranges = new ArrayList(); + + public IntegerRanges() { + } + + public IntegerRanges(String newValue) { + StringTokenizer itr = new StringTokenizer(newValue, ","); + while (itr.hasMoreTokens()) { + String rng = itr.nextToken().trim(); + String[] parts = rng.split("-", 3); + if (parts.length < 1 || parts.length > 2) { + throw new IllegalArgumentException("integer range badly formed: " + + rng); + } + Range r = new Range(); + r.start = convertToInt(parts[0], 0); + if (parts.length == 2) { + r.end = convertToInt(parts[1], Integer.MAX_VALUE); + } else { + r.end = r.start; + } + if (r.start > r.end) { + throw new IllegalArgumentException("IntegerRange from " + r.start + + " to " + r.end + " is invalid"); + } + ranges.add(r); + } + } + + /** + * Convert a string to an int treating empty strings as the default value. + * @param value the string value + * @param defaultValue the value for if the string is empty + * @return the desired integer + */ + private static int convertToInt(String value, int defaultValue) { + String trim = value.trim(); + if (trim.length() == 0) { + return defaultValue; + } + return Integer.parseInt(trim); + } + + /** + * Is the given value in the set of ranges + * @param value the value to check + * @return is the value in the ranges? + */ + public boolean isIncluded(int value) { + for(Range r: ranges) { + if (r.start <= value && value <= r.end) { + return true; + } + } + return false; + } + + @Override + public String toString() { + StringBuffer result = new StringBuffer(); + boolean first = true; + for(Range r: ranges) { + if (first) { + first = false; + } else { + result.append(','); + } + result.append(r.start); + result.append('-'); + result.append(r.end); + } + return result.toString(); + } + } + + /** + * Parse the given attribute as a set of integer ranges + * @param name the attribute name + * @param defaultValue the default value if it is not set + * @return a new set of ranges from the configured value + */ + public IntegerRanges getRange(String name, String defaultValue) { + return new IntegerRanges(get(name, defaultValue)); + } + + /** + * Get the comma delimited values of the name property as + * a collection of Strings. + * If no such property is specified then empty collection is returned. + *

+ * This is an optimized version of {@link #getStrings(String)} + * + * @param name property name. + * @return property value as a collection of Strings. + */ + public Collection getStringCollection(String name) { + String valueString = get(name); + return StringUtils.getStringCollection(valueString); + } + + /** + * Get the comma delimited values of the name property as + * an array of Strings. + * If no such property is specified then null is returned. + * + * @param name property name. + * @return property value as an array of Strings, + * or null. + */ + public String[] getStrings(String name) { + String valueString = get(name); + return StringUtils.getStrings(valueString); + } + + /** + * Get the comma delimited values of the name property as + * an array of Strings. + * If no such property is specified then default value is returned. + * + * @param name property name. + * @param defaultValue The default value + * @return property value as an array of Strings, + * or default value. + */ + public String[] getStrings(String name, String... defaultValue) { + String valueString = get(name); + if (valueString == null) { + return defaultValue; + } else { + return StringUtils.getStrings(valueString); + } + } + + /** + * Set the array of string values for the name property as + * as comma delimited values. + * + * @param name property name. + * @param values The values + */ + public void setStrings(String name, String... values) { + set(name, StringUtils.arrayToString(values)); + } + + /** + * Load a class by name. + * + * @param name the class name. + * @return the class object. + * @throws ClassNotFoundException if the class is not found. + */ + public Class getClassByName(String name) throws ClassNotFoundException { + return Class.forName(name, true, classLoader); + } + + /** + * Get the value of the name property + * as an array of Class. + * The value of the property specifies a list of comma separated class names. + * If no such property is specified, then defaultValue is + * returned. + * + * @param name the property name. + * @param defaultValue default value. + * @return property value as a Class[], + * or defaultValue. + */ + public Class[] getClasses(String name, Class ... defaultValue) { + String[] classnames = getStrings(name); + if (classnames == null) + return defaultValue; + try { + Class[] classes = new Class[classnames.length]; + for(int i = 0; i < classnames.length; i++) { + classes[i] = getClassByName(classnames[i]); + } + return classes; + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + /** + * Get the value of the name property as a Class. + * If no such property is specified, then defaultValue is + * returned. + * + * @param name the class name. + * @param defaultValue default value. + * @return property value as a Class, + * or defaultValue. + */ + public Class getClass(String name, Class defaultValue) { + String valueString = get(name); + if (valueString == null) + return defaultValue; + try { + return getClassByName(valueString); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + /** + * Get the value of the name property as a Class + * implementing the interface specified by xface. + * + * If no such property is specified, then defaultValue is + * returned. + * + * An exception is thrown if the returned class does not implement the named + * interface. + * + * @param name the class name. + * @param defaultValue default value. + * @param xface the interface implemented by the named class. + * @return property value as a Class, + * or defaultValue. + */ + public Class getClass(String name, + Class defaultValue, + Class xface) { + try { + Class theClass = getClass(name, defaultValue); + if (theClass != null && !xface.isAssignableFrom(theClass)) + throw new RuntimeException(theClass+" not "+xface.getName()); + else if (theClass != null) + return theClass.asSubclass(xface); + else + return null; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Get the value of the name property as a List + * of objects implementing the interface specified by xface. + * + * An exception is thrown if any of the classes does not exist, or if it does + * not implement the named interface. + * + * @param name the property name. + * @param xface the interface implemented by the classes named by + * name. + * @return a List of objects implementing xface. + */ + @SuppressWarnings("unchecked") + public List getInstances(String name, Class xface) { + List ret = new ArrayList(); + Class[] classes = getClasses(name); + for (Class cl: classes) { + if (!xface.isAssignableFrom(cl)) { + throw new RuntimeException(cl + " does not implement " + xface); + } + ret.add((U)ReflectionUtils.newInstance(cl, this)); + } + return ret; + } + + /** + * Set the value of the name property to the name of a + * theClass implementing the given interface xface. + * + * An exception is thrown if theClass does not implement the + * interface xface. + * + * @param name property name. + * @param theClass property value. + * @param xface the interface implemented by the named class. + */ + public void setClass(String name, Class theClass, Class xface) { + if (!xface.isAssignableFrom(theClass)) + throw new RuntimeException(theClass+" not "+xface.getName()); + set(name, theClass.getName()); + } + + /** + * Get a local file under a directory named by dirsProp with + * the given path. If dirsProp contains multiple directories, + * then one is chosen based on path's hash code. If the selected + * directory does not exist, an attempt is made to create it. + * + * @param dirsProp directory in which to locate the file. + * @param path file-path. + * @return local file under the directory with the given path. + */ + public Path getLocalPath(String dirsProp, String path) + throws IOException { + String[] dirs = getStrings(dirsProp); + int hashCode = path.hashCode(); + FileSystem fs = FileSystem.getLocal(this); + for (int i = 0; i < dirs.length; i++) { // try each local dir + int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length; + Path file = new Path(dirs[index], path); + Path dir = file.getParent(); + if (fs.mkdirs(dir) || fs.exists(dir)) { + return file; + } + } + LOG.warn("Could not make " + path + + " in local directories from " + dirsProp); + for(int i=0; i < dirs.length; i++) { + int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length; + LOG.warn(dirsProp + "[" + index + "]=" + dirs[index]); + } + throw new IOException("No valid local directories in property: "+dirsProp); + } + + /** + * Get a local file name under a directory named in dirsProp with + * the given path. If dirsProp contains multiple directories, + * then one is chosen based on path's hash code. If the selected + * directory does not exist, an attempt is made to create it. + * + * @param dirsProp directory in which to locate the file. + * @param path file-path. + * @return local file under the directory with the given path. + */ + public File getFile(String dirsProp, String path) + throws IOException { + String[] dirs = getStrings(dirsProp); + int hashCode = path.hashCode(); + for (int i = 0; i < dirs.length; i++) { // try each local dir + int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length; + File file = new File(dirs[index], path); + File dir = file.getParentFile(); + if (dir.exists() || dir.mkdirs()) { + return file; + } + } + throw new IOException("No valid local directories in property: "+dirsProp); + } + + /** + * Get the {@link URL} for the named resource. + * + * @param name resource name. + * @return the url for the named resource. + */ + public URL getResource(String name) { + return classLoader.getResource(name); + } + + /** + * Get an input stream attached to the configuration resource with the + * given name. + * + * @param name configuration resource name. + * @return an input stream attached to the resource. + */ + public InputStream getConfResourceAsInputStream(String name) { + try { + URL url= getResource(name); + + if (url == null) { + LOG.info(name + " not found"); + return null; + } else { + LOG.info("found resource " + name + " at " + url); + } + + return url.openStream(); + } catch (Exception e) { + return null; + } + } + + /** + * Get a {@link Reader} attached to the configuration resource with the + * given name. + * + * @param name configuration resource name. + * @return a reader attached to the resource. + */ + public Reader getConfResourceAsReader(String name) { + try { + URL url= getResource(name); + + if (url == null) { + LOG.info(name + " not found"); + return null; + } else { + LOG.info("found resource " + name + " at " + url); + } + + return new InputStreamReader(url.openStream()); + } catch (Exception e) { + return null; + } + } + + protected synchronized Properties getProps() { + if (properties == null) { + properties = new Properties(); + loadResources(properties, resources, quietmode); + if (overlay!= null) + properties.putAll(overlay); + } + return properties; + } + + /** + * Return the number of keys in the configuration. + * + * @return number of keys in the configuration. + */ + public int size() { + return getProps().size(); + } + + /** + * Clears all keys from the configuration. + */ + public void clear() { + getProps().clear(); + getOverlay().clear(); + } + + /** + * Get an {@link Iterator} to go through the list of String + * key-value pairs in the configuration. + * + * @return an iterator over the entries. + */ + public Iterator> iterator() { + // Get a copy of just the string to string pairs. After the old object + // methods that allow non-strings to be put into configurations are removed, + // we could replace properties with a Map and get rid of this + // code. + Map result = new HashMap(); + for(Map.Entry item: getProps().entrySet()) { + if (item.getKey() instanceof String && + item.getValue() instanceof String) { + result.put((String) item.getKey(), (String) item.getValue()); + } + } + return result.entrySet().iterator(); + } + + private void loadResources(Properties properties, + ArrayList resources, + boolean quiet) { + if(loadDefaults) { + for (String resource : defaultResources) { + loadResource(properties, resource, quiet); + } + + //support the hadoop-site.xml as a deprecated case + if(getResource("hadoop-site.xml")!=null) { + loadResource(properties, "hadoop-site.xml", quiet); + } + } + + for (Object resource : resources) { + loadResource(properties, resource, quiet); + } + } + + private void loadResource(Properties properties, Object name, boolean quiet) { + try { + DocumentBuilderFactory docBuilderFactory + = DocumentBuilderFactory.newInstance(); + //ignore all comments inside the xml file + docBuilderFactory.setIgnoringComments(true); + + //allow includes in the xml file + docBuilderFactory.setNamespaceAware(true); + try { + docBuilderFactory.setXIncludeAware(true); + } catch (UnsupportedOperationException e) { + LOG.error("Failed to set setXIncludeAware(true) for parser " + + docBuilderFactory + + ":" + e, + e); + } + DocumentBuilder builder = docBuilderFactory.newDocumentBuilder(); + Document doc = null; + Element root = null; + + if (name instanceof URL) { // an URL resource + URL url = (URL)name; + if (url != null) { + if (!quiet) { + LOG.info("parsing " + url); + } + doc = builder.parse(url.toString()); + } + } else if (name instanceof String) { // a CLASSPATH resource + URL url = getResource((String)name); + if (url != null) { + if (!quiet) { + LOG.info("parsing " + url); + } + doc = builder.parse(url.toString()); + } + } else if (name instanceof Path) { // a file resource + // Can't use FileSystem API or we get an infinite loop + // since FileSystem uses Configuration API. Use java.io.File instead. + File file = new File(((Path)name).toUri().getPath()) + .getAbsoluteFile(); + if (file.exists()) { + if (!quiet) { + LOG.info("parsing " + file); + } + InputStream in = new BufferedInputStream(new FileInputStream(file)); + try { + doc = builder.parse(in); + } finally { + in.close(); + } + } + } else if (name instanceof InputStream) { + try { + doc = builder.parse((InputStream)name); + } finally { + ((InputStream)name).close(); + } + } else if (name instanceof Element) { + root = (Element)name; + } + + if (doc == null && root == null) { + if (quiet) + return; + throw new RuntimeException(name + " not found"); + } + + if (root == null) { + root = doc.getDocumentElement(); + } + if (!"configuration".equals(root.getTagName())) + LOG.fatal("bad conf file: top-level element not "); + NodeList props = root.getChildNodes(); + for (int i = 0; i < props.getLength(); i++) { + Node propNode = props.item(i); + if (!(propNode instanceof Element)) + continue; + Element prop = (Element)propNode; + if ("configuration".equals(prop.getTagName())) { + loadResource(properties, prop, quiet); + continue; + } + if (!"property".equals(prop.getTagName())) + LOG.warn("bad conf file: element not "); + NodeList fields = prop.getChildNodes(); + String attr = null; + String value = null; + boolean finalParameter = false; + for (int j = 0; j < fields.getLength(); j++) { + Node fieldNode = fields.item(j); + if (!(fieldNode instanceof Element)) + continue; + Element field = (Element)fieldNode; + if ("name".equals(field.getTagName()) && field.hasChildNodes()) + attr = ((Text)field.getFirstChild()).getData().trim(); + if ("value".equals(field.getTagName()) && field.hasChildNodes()) + value = ((Text)field.getFirstChild()).getData(); + if ("final".equals(field.getTagName()) && field.hasChildNodes()) + finalParameter = "true".equals(((Text)field.getFirstChild()).getData()); + } + + // Ignore this parameter if it has already been marked as 'final' + if (attr != null && value != null) { + if (!finalParameters.contains(attr)) { + properties.setProperty(attr, value); + if (finalParameter) + finalParameters.add(attr); + } else { + LOG.warn(name+":a attempt to override final parameter: "+attr + +"; Ignoring."); + } + } + } + + } catch (IOException e) { + LOG.fatal("error parsing conf file: " + e); + throw new RuntimeException(e); + } catch (DOMException e) { + LOG.fatal("error parsing conf file: " + e); + throw new RuntimeException(e); + } catch (SAXException e) { + LOG.fatal("error parsing conf file: " + e); + throw new RuntimeException(e); + } catch (ParserConfigurationException e) { + LOG.fatal("error parsing conf file: " + e); + throw new RuntimeException(e); + } + } + + /** + * Write out the non-default properties in this configuration to the give + * {@link OutputStream}. + * + * @param out the output stream to write to. + */ + public void writeXml(OutputStream out) throws IOException { + Properties properties = getProps(); + try { + Document doc = + DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); + Element conf = doc.createElement("configuration"); + doc.appendChild(conf); + conf.appendChild(doc.createTextNode("\n")); + for (Enumeration e = properties.keys(); e.hasMoreElements();) { + String name = (String)e.nextElement(); + Object object = properties.get(name); + String value = null; + if (object instanceof String) { + value = (String) object; + }else { + continue; + } + Element propNode = doc.createElement("property"); + conf.appendChild(propNode); + + Element nameNode = doc.createElement("name"); + nameNode.appendChild(doc.createTextNode(name)); + propNode.appendChild(nameNode); + + Element valueNode = doc.createElement("value"); + valueNode.appendChild(doc.createTextNode(value)); + propNode.appendChild(valueNode); + + conf.appendChild(doc.createTextNode("\n")); + } + + DOMSource source = new DOMSource(doc); + StreamResult result = new StreamResult(out); + TransformerFactory transFactory = TransformerFactory.newInstance(); + Transformer transformer = transFactory.newTransformer(); + transformer.transform(source, result); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Get the {@link ClassLoader} for this job. + * + * @return the correct class loader. + */ + public ClassLoader getClassLoader() { + return classLoader; + } + + /** + * Set the class loader that will be used to load the various objects. + * + * @param classLoader the new class loader. + */ + public void setClassLoader(ClassLoader classLoader) { + this.classLoader = classLoader; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("Configuration: "); + if(loadDefaults) { + toString(defaultResources, sb); + if(resources.size()>0) { + sb.append(", "); + } + } + toString(resources, sb); + return sb.toString(); + } + + private void toString(ArrayList resources, StringBuffer sb) { + ListIterator i = resources.listIterator(); + while (i.hasNext()) { + if (i.nextIndex() != 0) { + sb.append(", "); + } + sb.append(i.next()); + } + } + + /** + * Set the quietness-mode. + * + * In the quiet-mode, error and informational messages might not be logged. + * + * @param quietmode true to set quiet-mode on, false + * to turn it off. + */ + public synchronized void setQuietMode(boolean quietmode) { + this.quietmode = quietmode; + } + + /** For debugging. List non-default properties to the terminal and exit. */ + public static void main(String[] args) throws Exception { + new Configuration().writeXml(System.out); + } + + @Override + public void readFields(DataInput in) throws IOException { + clear(); + int size = WritableUtils.readVInt(in); + for(int i=0; i < size; ++i) { + set(org.apache.hadoop.io.Text.readString(in), + org.apache.hadoop.io.Text.readString(in)); + } + } + + //@Override + public void write(DataOutput out) throws IOException { + Properties props = getProps(); + WritableUtils.writeVInt(out, props.size()); + for(Map.Entry item: props.entrySet()) { + org.apache.hadoop.io.Text.writeString(out, (String) item.getKey()); + org.apache.hadoop.io.Text.writeString(out, (String) item.getValue()); + } + } + +} diff --git a/src/java/org/apache/hadoop/conf/Configured.java b/src/java/org/apache/hadoop/conf/Configured.java new file mode 100644 index 00000000000..cd5604e981d --- /dev/null +++ b/src/java/org/apache/hadoop/conf/Configured.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.conf; + +/** Base class for things that may be configured with a {@link Configuration}. */ +public class Configured implements Configurable { + + private Configuration conf; + + /** Construct a Configured. */ + public Configured() { + this(null); + } + + /** Construct a Configured. */ + public Configured(Configuration conf) { + setConf(conf); + } + + // inherit javadoc + public void setConf(Configuration conf) { + this.conf = conf; + } + + // inherit javadoc + public Configuration getConf() { + return conf; + } + +} diff --git a/src/java/org/apache/hadoop/conf/package.html b/src/java/org/apache/hadoop/conf/package.html new file mode 100644 index 00000000000..0be80bed5f9 --- /dev/null +++ b/src/java/org/apache/hadoop/conf/package.html @@ -0,0 +1,23 @@ + + + + + +Configuration of system parameters. + + diff --git a/src/java/org/apache/hadoop/filecache/DistributedCache.java b/src/java/org/apache/hadoop/filecache/DistributedCache.java new file mode 100644 index 00000000000..9d4a8f9a426 --- /dev/null +++ b/src/java/org/apache/hadoop/filecache/DistributedCache.java @@ -0,0 +1,879 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.filecache; + +import org.apache.commons.logging.*; +import java.io.*; +import java.util.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.util.*; +import org.apache.hadoop.fs.*; + +import java.net.URI; + +/** + * Distribute application-specific large, read-only files efficiently. + * + *

DistributedCache is a facility provided by the Map-Reduce + * framework to cache files (text, archives, jars etc.) needed by applications. + *

+ * + *

Applications specify the files, via urls (hdfs:// or http://) to be cached + * via the {@link org.apache.hadoop.mapred.JobConf}. + * The DistributedCache assumes that the + * files specified via hdfs:// urls are already present on the + * {@link FileSystem} at the path specified by the url.

+ * + *

The framework will copy the necessary files on to the slave node before + * any tasks for the job are executed on that node. Its efficiency stems from + * the fact that the files are only copied once per job and the ability to + * cache archives which are un-archived on the slaves.

+ * + *

DistributedCache can be used to distribute simple, read-only + * data/text files and/or more complex types such as archives, jars etc. + * Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. + * Jars may be optionally added to the classpath of the tasks, a rudimentary + * software distribution mechanism. Files have execution permissions. + * Optionally users can also direct it to symlink the distributed cache file(s) + * into the working directory of the task.

+ * + *

DistributedCache tracks modification timestamps of the cache + * files. Clearly the cache files should not be modified by the application + * or externally while the job is executing.

+ * + *

Here is an illustrative example on how to use the + * DistributedCache:

+ *

+ *     // Setting up the cache for the application
+ *     
+ *     1. Copy the requisite files to the FileSystem:
+ *     
+ *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
+ *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
+ *     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
+ *     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
+ *     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
+ *     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
+ *     
+ *     2. Setup the application's JobConf:
+ *     
+ *     JobConf job = new JobConf();
+ *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
+ *                                   job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
+ *     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
+ *     
+ *     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
+ *     or {@link org.apache.hadoop.mapred.Reducer}:
+ *     
+ *     public static class MapClass extends MapReduceBase  
+ *     implements Mapper<K, V, K, V> {
+ *     
+ *       private Path[] localArchives;
+ *       private Path[] localFiles;
+ *       
+ *       public void configure(JobConf job) {
+ *         // Get the cached archives/files
+ *         localArchives = DistributedCache.getLocalCacheArchives(job);
+ *         localFiles = DistributedCache.getLocalCacheFiles(job);
+ *       }
+ *       
+ *       public void map(K key, V value, 
+ *                       OutputCollector<K, V> output, Reporter reporter) 
+ *       throws IOException {
+ *         // Use data from the cached archives/files here
+ *         // ...
+ *         // ...
+ *         output.collect(k, v);
+ *       }
+ *     }
+ *     
+ * 

+ * + * @see org.apache.hadoop.mapred.JobConf + * @see org.apache.hadoop.mapred.JobClient + */ +public class DistributedCache { + // cacheID to cacheStatus mapping + private static TreeMap cachedArchives = new TreeMap(); + + private static TreeMap baseDirSize = new TreeMap(); + + // default total cache size + private static final long DEFAULT_CACHE_SIZE = 10737418240L; + + private static final Log LOG = + LogFactory.getLog(DistributedCache.class); + + /** + * Get the locally cached file or archive; it could either be + * previously cached (and valid) or copy it from the {@link FileSystem} now. + * + * @param cache the cache to be localized, this should be specified as + * new URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema + * or hostname:port is provided the file is assumed to be in the filesystem + * being used in the Configuration + * @param conf The Confguration file which contains the filesystem + * @param baseDir The base cache Dir where you wnat to localize the files/archives + * @param fileStatus The file status on the dfs. + * @param isArchive if the cache is an archive or a file. In case it is an + * archive with a .zip or .jar or .tar or .tgz or .tar.gz extension it will + * be unzipped/unjarred/untarred automatically + * and the directory where the archive is unzipped/unjarred/untarred is + * returned as the Path. + * In case of a file, the path to the file is returned + * @param confFileStamp this is the hdfs file modification timestamp to verify that the + * file to be cached hasn't changed since the job started + * @param currentWorkDir this is the directory where you would want to create symlinks + * for the locally cached files/archives + * @return the path to directory where the archives are unjarred in case of archives, + * the path to the file where the file is copied locally + * @throws IOException + */ + public static Path getLocalCache(URI cache, Configuration conf, + Path baseDir, FileStatus fileStatus, + boolean isArchive, long confFileStamp, + Path currentWorkDir) + throws IOException { + return getLocalCache(cache, conf, baseDir, fileStatus, isArchive, + confFileStamp, currentWorkDir, true); + } + /** + * Get the locally cached file or archive; it could either be + * previously cached (and valid) or copy it from the {@link FileSystem} now. + * + * @param cache the cache to be localized, this should be specified as + * new URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema + * or hostname:port is provided the file is assumed to be in the filesystem + * being used in the Configuration + * @param conf The Confguration file which contains the filesystem + * @param baseDir The base cache Dir where you wnat to localize the files/archives + * @param fileStatus The file status on the dfs. + * @param isArchive if the cache is an archive or a file. In case it is an + * archive with a .zip or .jar or .tar or .tgz or .tar.gz extension it will + * be unzipped/unjarred/untarred automatically + * and the directory where the archive is unzipped/unjarred/untarred is + * returned as the Path. + * In case of a file, the path to the file is returned + * @param confFileStamp this is the hdfs file modification timestamp to verify that the + * file to be cached hasn't changed since the job started + * @param currentWorkDir this is the directory where you would want to create symlinks + * for the locally cached files/archives + * @param honorSymLinkConf if this is false, then the symlinks are not + * created even if conf says so (this is required for an optimization in task + * launches + * @return the path to directory where the archives are unjarred in case of archives, + * the path to the file where the file is copied locally + * @throws IOException + */ + public static Path getLocalCache(URI cache, Configuration conf, + Path baseDir, FileStatus fileStatus, + boolean isArchive, long confFileStamp, + Path currentWorkDir, boolean honorSymLinkConf) + throws IOException { + String cacheId = makeRelative(cache, conf); + CacheStatus lcacheStatus; + Path localizedPath; + synchronized (cachedArchives) { + lcacheStatus = cachedArchives.get(cacheId); + if (lcacheStatus == null) { + // was never localized + lcacheStatus = new CacheStatus(baseDir, new Path(baseDir, new Path(cacheId))); + cachedArchives.put(cacheId, lcacheStatus); + } + + synchronized (lcacheStatus) { + localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus, + fileStatus, isArchive, currentWorkDir, honorSymLinkConf); + lcacheStatus.refcount++; + } + } + + // try deleting stuff if you can + long size = 0; + synchronized (baseDirSize) { + Long get = baseDirSize.get(baseDir); + if ( get != null ) { + size = get.longValue(); + } + } + // setting the cache size to a default of 10GB + long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE); + if (allowedSize < size) { + // try some cache deletions + deleteCache(conf); + } + return localizedPath; + } + + + /** + * Get the locally cached file or archive; it could either be + * previously cached (and valid) or copy it from the {@link FileSystem} now. + * + * @param cache the cache to be localized, this should be specified as + * new URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema + * or hostname:port is provided the file is assumed to be in the filesystem + * being used in the Configuration + * @param conf The Confguration file which contains the filesystem + * @param baseDir The base cache Dir where you wnat to localize the files/archives + * @param isArchive if the cache is an archive or a file. In case it is an + * archive with a .zip or .jar or .tar or .tgz or .tar.gz extension it will + * be unzipped/unjarred/untarred automatically + * and the directory where the archive is unzipped/unjarred/untarred + * is returned as the Path. + * In case of a file, the path to the file is returned + * @param confFileStamp this is the hdfs file modification timestamp to verify that the + * file to be cached hasn't changed since the job started + * @param currentWorkDir this is the directory where you would want to create symlinks + * for the locally cached files/archives + * @return the path to directory where the archives are unjarred in case of archives, + * the path to the file where the file is copied locally + * @throws IOException + + */ + public static Path getLocalCache(URI cache, Configuration conf, + Path baseDir, boolean isArchive, + long confFileStamp, Path currentWorkDir) + throws IOException { + return getLocalCache(cache, conf, + baseDir, null, isArchive, + confFileStamp, currentWorkDir); + } + + /** + * This is the opposite of getlocalcache. When you are done with + * using the cache, you need to release the cache + * @param cache The cache URI to be released + * @param conf configuration which contains the filesystem the cache + * is contained in. + * @throws IOException + */ + public static void releaseCache(URI cache, Configuration conf) + throws IOException { + String cacheId = makeRelative(cache, conf); + synchronized (cachedArchives) { + CacheStatus lcacheStatus = cachedArchives.get(cacheId); + if (lcacheStatus == null) + return; + synchronized (lcacheStatus) { + lcacheStatus.refcount--; + } + } + } + + // To delete the caches which have a refcount of zero + + private static void deleteCache(Configuration conf) throws IOException { + // try deleting cache Status with refcount of zero + synchronized (cachedArchives) { + for (Iterator it = cachedArchives.keySet().iterator(); it.hasNext();) { + String cacheId = (String) it.next(); + CacheStatus lcacheStatus = cachedArchives.get(cacheId); + synchronized (lcacheStatus) { + if (lcacheStatus.refcount == 0) { + // delete this cache entry + FileSystem.getLocal(conf).delete(lcacheStatus.localLoadPath, true); + synchronized (baseDirSize) { + Long dirSize = baseDirSize.get(lcacheStatus.baseDir); + if ( dirSize != null ) { + dirSize -= lcacheStatus.size; + baseDirSize.put(lcacheStatus.baseDir, dirSize); + } + } + it.remove(); + } + } + } + } + } + + /* + * Returns the relative path of the dir this cache will be localized in + * relative path that this cache will be localized in. For + * hdfs://hostname:port/absolute_path -- the relative path is + * hostname/absolute path -- if it is just /absolute_path -- then the + * relative path is hostname of DFS this mapred cluster is running + * on/absolute_path + */ + public static String makeRelative(URI cache, Configuration conf) + throws IOException { + String host = cache.getHost(); + if (host == null) { + host = cache.getScheme(); + } + if (host == null) { + URI defaultUri = FileSystem.get(conf).getUri(); + host = defaultUri.getHost(); + if (host == null) { + host = defaultUri.getScheme(); + } + } + String path = host + cache.getPath(); + path = path.replace(":/","/"); // remove windows device colon + return path; + } + + private static Path cacheFilePath(Path p) { + return new Path(p, p.getName()); + } + + // the method which actually copies the caches locally and unjars/unzips them + // and does chmod for the files + private static Path localizeCache(Configuration conf, + URI cache, long confFileStamp, + CacheStatus cacheStatus, + FileStatus fileStatus, + boolean isArchive, + Path currentWorkDir,boolean honorSymLinkConf) + throws IOException { + boolean doSymlink = honorSymLinkConf && getSymlink(conf); + if(cache.getFragment() == null) { + doSymlink = false; + } + FileSystem fs = getFileSystem(cache, conf); + String link = currentWorkDir.toString() + Path.SEPARATOR + cache.getFragment(); + File flink = new File(link); + if (ifExistsAndFresh(conf, fs, cache, confFileStamp, + cacheStatus, fileStatus)) { + if (isArchive) { + if (doSymlink){ + if (!flink.exists()) + FileUtil.symLink(cacheStatus.localLoadPath.toString(), + link); + } + return cacheStatus.localLoadPath; + } + else { + if (doSymlink){ + if (!flink.exists()) + FileUtil.symLink(cacheFilePath(cacheStatus.localLoadPath).toString(), + link); + } + return cacheFilePath(cacheStatus.localLoadPath); + } + } else { + // remove the old archive + // if the old archive cannot be removed since it is being used by another + // job + // return null + if (cacheStatus.refcount > 1 && (cacheStatus.currentStatus == true)) + throw new IOException("Cache " + cacheStatus.localLoadPath.toString() + + " is in use and cannot be refreshed"); + + FileSystem localFs = FileSystem.getLocal(conf); + localFs.delete(cacheStatus.localLoadPath, true); + synchronized (baseDirSize) { + Long dirSize = baseDirSize.get(cacheStatus.baseDir); + if ( dirSize != null ) { + dirSize -= cacheStatus.size; + baseDirSize.put(cacheStatus.baseDir, dirSize); + } + } + Path parchive = new Path(cacheStatus.localLoadPath, + new Path(cacheStatus.localLoadPath.getName())); + + if (!localFs.mkdirs(cacheStatus.localLoadPath)) { + throw new IOException("Mkdirs failed to create directory " + + cacheStatus.localLoadPath.toString()); + } + + String cacheId = cache.getPath(); + fs.copyToLocalFile(new Path(cacheId), parchive); + if (isArchive) { + String tmpArchive = parchive.toString().toLowerCase(); + File srcFile = new File(parchive.toString()); + File destDir = new File(parchive.getParent().toString()); + if (tmpArchive.endsWith(".jar")) { + RunJar.unJar(srcFile, destDir); + } else if (tmpArchive.endsWith(".zip")) { + FileUtil.unZip(srcFile, destDir); + } else if (isTarFile(tmpArchive)) { + FileUtil.unTar(srcFile, destDir); + } + // else will not do anyhting + // and copy the file into the dir as it is + } + + long cacheSize = FileUtil.getDU(new File(parchive.getParent().toString())); + cacheStatus.size = cacheSize; + synchronized (baseDirSize) { + Long dirSize = baseDirSize.get(cacheStatus.baseDir); + if( dirSize == null ) { + dirSize = Long.valueOf(cacheSize); + } else { + dirSize += cacheSize; + } + baseDirSize.put(cacheStatus.baseDir, dirSize); + } + + // do chmod here + try { + //Setting recursive permission to grant everyone read and execute + FileUtil.chmod(cacheStatus.baseDir.toString(), "ugo+rx",true); + } catch(InterruptedException e) { + LOG.warn("Exception in chmod" + e.toString()); + } + + // update cacheStatus to reflect the newly cached file + cacheStatus.currentStatus = true; + cacheStatus.mtime = getTimestamp(conf, cache); + } + + if (isArchive){ + if (doSymlink){ + if (!flink.exists()) + FileUtil.symLink(cacheStatus.localLoadPath.toString(), + link); + } + return cacheStatus.localLoadPath; + } + else { + if (doSymlink){ + if (!flink.exists()) + FileUtil.symLink(cacheFilePath(cacheStatus.localLoadPath).toString(), + link); + } + return cacheFilePath(cacheStatus.localLoadPath); + } + } + + private static boolean isTarFile(String filename) { + return (filename.endsWith(".tgz") || filename.endsWith(".tar.gz") || + filename.endsWith(".tar")); + } + + // Checks if the cache has already been localized and is fresh + private static boolean ifExistsAndFresh(Configuration conf, FileSystem fs, + URI cache, long confFileStamp, + CacheStatus lcacheStatus, + FileStatus fileStatus) + throws IOException { + // check for existence of the cache + if (lcacheStatus.currentStatus == false) { + return false; + } else { + long dfsFileStamp; + if (fileStatus != null) { + dfsFileStamp = fileStatus.getModificationTime(); + } else { + dfsFileStamp = getTimestamp(conf, cache); + } + + // ensure that the file on hdfs hasn't been modified since the job started + if (dfsFileStamp != confFileStamp) { + LOG.fatal("File: " + cache + " has changed on HDFS since job started"); + throw new IOException("File: " + cache + + " has changed on HDFS since job started"); + } + + if (dfsFileStamp != lcacheStatus.mtime) { + // needs refreshing + return false; + } + } + + return true; + } + + /** + * Returns mtime of a given cache file on hdfs. + * @param conf configuration + * @param cache cache file + * @return mtime of a given cache file on hdfs + * @throws IOException + */ + public static long getTimestamp(Configuration conf, URI cache) + throws IOException { + FileSystem fileSystem = FileSystem.get(cache, conf); + Path filePath = new Path(cache.getPath()); + + return fileSystem.getFileStatus(filePath).getModificationTime(); + } + + /** + * This method create symlinks for all files in a given dir in another directory + * @param conf the configuration + * @param jobCacheDir the target directory for creating symlinks + * @param workDir the directory in which the symlinks are created + * @throws IOException + */ + public static void createAllSymlink(Configuration conf, File jobCacheDir, File workDir) + throws IOException{ + if ((jobCacheDir == null || !jobCacheDir.isDirectory()) || + workDir == null || (!workDir.isDirectory())) { + return; + } + boolean createSymlink = getSymlink(conf); + if (createSymlink){ + File[] list = jobCacheDir.listFiles(); + for (int i=0; i < list.length; i++){ + FileUtil.symLink(list[i].getAbsolutePath(), + new File(workDir, list[i].getName()).toString()); + } + } + } + + private static FileSystem getFileSystem(URI cache, Configuration conf) + throws IOException { + if ("hdfs".equals(cache.getScheme())) + return FileSystem.get(cache, conf); + else + return FileSystem.get(conf); + } + + /** + * Set the configuration with the given set of archives + * @param archives The list of archives that need to be localized + * @param conf Configuration which will be changed + */ + public static void setCacheArchives(URI[] archives, Configuration conf) { + String sarchives = StringUtils.uriToString(archives); + conf.set("mapred.cache.archives", sarchives); + } + + /** + * Set the configuration with the given set of files + * @param files The list of files that need to be localized + * @param conf Configuration which will be changed + */ + public static void setCacheFiles(URI[] files, Configuration conf) { + String sfiles = StringUtils.uriToString(files); + conf.set("mapred.cache.files", sfiles); + } + + /** + * Get cache archives set in the Configuration + * @param conf The configuration which contains the archives + * @return A URI array of the caches set in the Configuration + * @throws IOException + */ + public static URI[] getCacheArchives(Configuration conf) throws IOException { + return StringUtils.stringToURI(conf.getStrings("mapred.cache.archives")); + } + + /** + * Get cache files set in the Configuration + * @param conf The configuration which contains the files + * @return A URI array of the files set in the Configuration + * @throws IOException + */ + + public static URI[] getCacheFiles(Configuration conf) throws IOException { + return StringUtils.stringToURI(conf.getStrings("mapred.cache.files")); + } + + /** + * Return the path array of the localized caches + * @param conf Configuration that contains the localized archives + * @return A path array of localized caches + * @throws IOException + */ + public static Path[] getLocalCacheArchives(Configuration conf) + throws IOException { + return StringUtils.stringToPath(conf + .getStrings("mapred.cache.localArchives")); + } + + /** + * Return the path array of the localized files + * @param conf Configuration that contains the localized files + * @return A path array of localized files + * @throws IOException + */ + public static Path[] getLocalCacheFiles(Configuration conf) + throws IOException { + return StringUtils.stringToPath(conf.getStrings("mapred.cache.localFiles")); + } + + /** + * Get the timestamps of the archives + * @param conf The configuration which stored the timestamps + * @return a string array of timestamps + * @throws IOException + */ + public static String[] getArchiveTimestamps(Configuration conf) { + return conf.getStrings("mapred.cache.archives.timestamps"); + } + + + /** + * Get the timestamps of the files + * @param conf The configuration which stored the timestamps + * @return a string array of timestamps + * @throws IOException + */ + public static String[] getFileTimestamps(Configuration conf) { + return conf.getStrings("mapred.cache.files.timestamps"); + } + + /** + * This is to check the timestamp of the archives to be localized + * @param conf Configuration which stores the timestamp's + * @param timestamps comma separated list of timestamps of archives. + * The order should be the same as the order in which the archives are added. + */ + public static void setArchiveTimestamps(Configuration conf, String timestamps) { + conf.set("mapred.cache.archives.timestamps", timestamps); + } + + /** + * This is to check the timestamp of the files to be localized + * @param conf Configuration which stores the timestamp's + * @param timestamps comma separated list of timestamps of files. + * The order should be the same as the order in which the files are added. + */ + public static void setFileTimestamps(Configuration conf, String timestamps) { + conf.set("mapred.cache.files.timestamps", timestamps); + } + + /** + * Set the conf to contain the location for localized archives + * @param conf The conf to modify to contain the localized caches + * @param str a comma separated list of local archives + */ + public static void setLocalArchives(Configuration conf, String str) { + conf.set("mapred.cache.localArchives", str); + } + + /** + * Set the conf to contain the location for localized files + * @param conf The conf to modify to contain the localized caches + * @param str a comma separated list of local files + */ + public static void setLocalFiles(Configuration conf, String str) { + conf.set("mapred.cache.localFiles", str); + } + + /** + * Add a archives to be localized to the conf + * @param uri The uri of the cache to be localized + * @param conf Configuration to add the cache to + */ + public static void addCacheArchive(URI uri, Configuration conf) { + String archives = conf.get("mapred.cache.archives"); + conf.set("mapred.cache.archives", archives == null ? uri.toString() + : archives + "," + uri.toString()); + } + + /** + * Add a file to be localized to the conf + * @param uri The uri of the cache to be localized + * @param conf Configuration to add the cache to + */ + public static void addCacheFile(URI uri, Configuration conf) { + String files = conf.get("mapred.cache.files"); + conf.set("mapred.cache.files", files == null ? uri.toString() : files + "," + + uri.toString()); + } + + /** + * Add an file path to the current set of classpath entries It adds the file + * to cache as well. + * + * @param file Path of the file to be added + * @param conf Configuration that contains the classpath setting + */ + public static void addFileToClassPath(Path file, Configuration conf) + throws IOException { + String classpath = conf.get("mapred.job.classpath.files"); + conf.set("mapred.job.classpath.files", classpath == null ? file.toString() + : classpath + System.getProperty("path.separator") + file.toString()); + FileSystem fs = FileSystem.get(conf); + URI uri = fs.makeQualified(file).toUri(); + + addCacheFile(uri, conf); + } + + /** + * Get the file entries in classpath as an array of Path + * + * @param conf Configuration that contains the classpath setting + */ + public static Path[] getFileClassPaths(Configuration conf) { + String classpath = conf.get("mapred.job.classpath.files"); + if (classpath == null) + return null; + ArrayList list = Collections.list(new StringTokenizer(classpath, System + .getProperty("path.separator"))); + Path[] paths = new Path[list.size()]; + for (int i = 0; i < list.size(); i++) { + paths[i] = new Path((String) list.get(i)); + } + return paths; + } + + /** + * Add an archive path to the current set of classpath entries. It adds the + * archive to cache as well. + * + * @param archive Path of the archive to be added + * @param conf Configuration that contains the classpath setting + */ + public static void addArchiveToClassPath(Path archive, Configuration conf) + throws IOException { + String classpath = conf.get("mapred.job.classpath.archives"); + conf.set("mapred.job.classpath.archives", classpath == null ? archive + .toString() : classpath + System.getProperty("path.separator") + + archive.toString()); + FileSystem fs = FileSystem.get(conf); + URI uri = fs.makeQualified(archive).toUri(); + + addCacheArchive(uri, conf); + } + + /** + * Get the archive entries in classpath as an array of Path + * + * @param conf Configuration that contains the classpath setting + */ + public static Path[] getArchiveClassPaths(Configuration conf) { + String classpath = conf.get("mapred.job.classpath.archives"); + if (classpath == null) + return null; + ArrayList list = Collections.list(new StringTokenizer(classpath, System + .getProperty("path.separator"))); + Path[] paths = new Path[list.size()]; + for (int i = 0; i < list.size(); i++) { + paths[i] = new Path((String) list.get(i)); + } + return paths; + } + + /** + * This method allows you to create symlinks in the current working directory + * of the task to all the cache files/archives + * @param conf the jobconf + */ + public static void createSymlink(Configuration conf){ + conf.set("mapred.create.symlink", "yes"); + } + + /** + * This method checks to see if symlinks are to be create for the + * localized cache files in the current working directory + * @param conf the jobconf + * @return true if symlinks are to be created- else return false + */ + public static boolean getSymlink(Configuration conf){ + String result = conf.get("mapred.create.symlink"); + if ("yes".equals(result)){ + return true; + } + return false; + } + + /** + * This method checks if there is a conflict in the fragment names + * of the uris. Also makes sure that each uri has a fragment. It + * is only to be called if you want to create symlinks for + * the various archives and files. + * @param uriFiles The uri array of urifiles + * @param uriArchives the uri array of uri archives + */ + public static boolean checkURIs(URI[] uriFiles, URI[] uriArchives){ + if ((uriFiles == null) && (uriArchives == null)){ + return true; + } + if (uriFiles != null){ + for (int i = 0; i < uriFiles.length; i++){ + String frag1 = uriFiles[i].getFragment(); + if (frag1 == null) + return false; + for (int j=i+1; j < uriFiles.length; j++){ + String frag2 = uriFiles[j].getFragment(); + if (frag2 == null) + return false; + if (frag1.equalsIgnoreCase(frag2)) + return false; + } + if (uriArchives != null){ + for (int j = 0; j < uriArchives.length; j++){ + String frag2 = uriArchives[j].getFragment(); + if (frag2 == null){ + return false; + } + if (frag1.equalsIgnoreCase(frag2)) + return false; + for (int k=j+1; k < uriArchives.length; k++){ + String frag3 = uriArchives[k].getFragment(); + if (frag3 == null) + return false; + if (frag2.equalsIgnoreCase(frag3)) + return false; + } + } + } + } + } + return true; + } + + private static class CacheStatus { + // false, not loaded yet, true is loaded + boolean currentStatus; + + // the local load path of this cache + Path localLoadPath; + + //the base dir where the cache lies + Path baseDir; + + //the size of this cache + long size; + + // number of instances using this cache + int refcount; + + // the cache-file modification time + long mtime; + + public CacheStatus(Path baseDir, Path localLoadPath) { + super(); + this.currentStatus = false; + this.localLoadPath = localLoadPath; + this.refcount = 0; + this.mtime = -1; + this.baseDir = baseDir; + this.size = 0; + } + } + + /** + * Clear the entire contents of the cache and delete the backing files. This + * should only be used when the server is reinitializing, because the users + * are going to lose their files. + */ + public static void purgeCache(Configuration conf) throws IOException { + synchronized (cachedArchives) { + FileSystem localFs = FileSystem.getLocal(conf); + for (Map.Entry f: cachedArchives.entrySet()) { + try { + localFs.delete(f.getValue().localLoadPath, true); + } catch (IOException ie) { + LOG.debug("Error cleaning up cache", ie); + } + } + cachedArchives.clear(); + } + } +} diff --git a/src/java/org/apache/hadoop/fs/BlockLocation.java b/src/java/org/apache/hadoop/fs/BlockLocation.java new file mode 100644 index 00000000000..8fb24a2fb30 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/BlockLocation.java @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import org.apache.hadoop.io.*; + +import java.io.*; + +/* + * A BlockLocation lists hosts, offset and length + * of block. + * + */ +public class BlockLocation implements Writable { + + static { // register a ctor + WritableFactories.setFactory + (BlockLocation.class, + new WritableFactory() { + public Writable newInstance() { return new BlockLocation(); } + }); + } + + private String[] hosts; //hostnames of datanodes + private String[] names; //hostname:portNumber of datanodes + private String[] topologyPaths; // full path name in network topology + private long offset; //offset of the of the block in the file + private long length; + + /** + * Default Constructor + */ + public BlockLocation() { + this(new String[0], new String[0], 0L, 0L); + } + + /** + * Constructor with host, name, offset and length + */ + public BlockLocation(String[] names, String[] hosts, long offset, + long length) { + if (names == null) { + this.names = new String[0]; + } else { + this.names = names; + } + if (hosts == null) { + this.hosts = new String[0]; + } else { + this.hosts = hosts; + } + this.offset = offset; + this.length = length; + this.topologyPaths = new String[0]; + } + + /** + * Constructor with host, name, network topology, offset and length + */ + public BlockLocation(String[] names, String[] hosts, String[] topologyPaths, + long offset, long length) { + this(names, hosts, offset, length); + if (topologyPaths == null) { + this.topologyPaths = new String[0]; + } else { + this.topologyPaths = topologyPaths; + } + } + + /** + * Get the list of hosts (hostname) hosting this block + */ + public String[] getHosts() throws IOException { + if ((hosts == null) || (hosts.length == 0)) { + return new String[0]; + } else { + return hosts; + } + } + + /** + * Get the list of names (hostname:port) hosting this block + */ + public String[] getNames() throws IOException { + if ((names == null) || (names.length == 0)) { + return new String[0]; + } else { + return this.names; + } + } + + /** + * Get the list of network topology paths for each of the hosts. + * The last component of the path is the host. + */ + public String[] getTopologyPaths() throws IOException { + if ((topologyPaths == null) || (topologyPaths.length == 0)) { + return new String[0]; + } else { + return this.topologyPaths; + } + } + + /** + * Get the start offset of file associated with this block + */ + public long getOffset() { + return offset; + } + + /** + * Get the length of the block + */ + public long getLength() { + return length; + } + + /** + * Set the start offset of file associated with this block + */ + public void setOffset(long offset) { + this.offset = offset; + } + + /** + * Set the length of block + */ + public void setLength(long length) { + this.length = length; + } + + /** + * Set the hosts hosting this block + */ + public void setHosts(String[] hosts) throws IOException { + if (hosts == null) { + this.hosts = new String[0]; + } else { + this.hosts = hosts; + } + } + + /** + * Set the names (host:port) hosting this block + */ + public void setNames(String[] names) throws IOException { + if (names == null) { + this.names = new String[0]; + } else { + this.names = names; + } + } + + /** + * Set the network topology paths of the hosts + */ + public void setTopologyPaths(String[] topologyPaths) throws IOException { + if (topologyPaths == null) { + this.topologyPaths = new String[0]; + } else { + this.topologyPaths = topologyPaths; + } + } + + /** + * Implement write of Writable + */ + public void write(DataOutput out) throws IOException { + out.writeLong(offset); + out.writeLong(length); + out.writeInt(names.length); + for (int i=0; i < names.length; i++) { + Text name = new Text(names[i]); + name.write(out); + } + out.writeInt(hosts.length); + for (int i=0; i < hosts.length; i++) { + Text host = new Text(hosts[i]); + host.write(out); + } + out.writeInt(topologyPaths.length); + for (int i=0; i < topologyPaths.length; i++) { + Text host = new Text(topologyPaths[i]); + host.write(out); + } + } + + /** + * Implement readFields of Writable + */ + public void readFields(DataInput in) throws IOException { + this.offset = in.readLong(); + this.length = in.readLong(); + int numNames = in.readInt(); + this.names = new String[numNames]; + for (int i = 0; i < numNames; i++) { + Text name = new Text(); + name.readFields(in); + names[i] = name.toString(); + } + int numHosts = in.readInt(); + for (int i = 0; i < numHosts; i++) { + Text host = new Text(); + host.readFields(in); + hosts[i] = host.toString(); + } + int numTops = in.readInt(); + Text path = new Text(); + for (int i = 0; i < numTops; i++) { + path.readFields(in); + topologyPaths[i] = path.toString(); + } + } + + public String toString() { + StringBuilder result = new StringBuilder(); + result.append(offset); + result.append(','); + result.append(length); + for(String h: hosts) { + result.append(','); + result.append(h); + } + return result.toString(); + } +} diff --git a/src/java/org/apache/hadoop/fs/BufferedFSInputStream.java b/src/java/org/apache/hadoop/fs/BufferedFSInputStream.java new file mode 100644 index 00000000000..f682d969e40 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/BufferedFSInputStream.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.BufferedInputStream; +import java.io.IOException; + + +/** + * A class optimizes reading from FSInputStream by bufferring + */ + + +public class BufferedFSInputStream extends BufferedInputStream +implements Seekable, PositionedReadable { + /** + * Creates a BufferedFSInputStream + * with the specified buffer size, + * and saves its argument, the input stream + * in, for later use. An internal + * buffer array of length size + * is created and stored in buf. + * + * @param in the underlying input stream. + * @param size the buffer size. + * @exception IllegalArgumentException if size <= 0. + */ + public BufferedFSInputStream(FSInputStream in, int size) { + super(in, size); + } + + public long getPos() throws IOException { + return ((FSInputStream)in).getPos()-(count-pos); + } + + public long skip(long n) throws IOException { + if (n <= 0) { + return 0; + } + + seek(getPos()+n); + return n; + } + + public void seek(long pos) throws IOException { + if( pos<0 ) { + return; + } + // optimize: check if the pos is in the buffer + long end = ((FSInputStream)in).getPos(); + long start = end - count; + if( pos>=start && pos 0) { + throw new ChecksumException("Checksum error: "+file+" at "+pos, pos); + } + return nread; + } + + /* Return the file length */ + private long getFileLength() throws IOException { + if( fileLen==-1L ) { + fileLen = fs.getContentSummary(file).getLength(); + } + return fileLen; + } + + /** + * Skips over and discards n bytes of data from the + * input stream. + * + *The skip method skips over some smaller number of bytes + * when reaching end of file before n bytes have been skipped. + * The actual number of bytes skipped is returned. If n is + * negative, no bytes are skipped. + * + * @param n the number of bytes to be skipped. + * @return the actual number of bytes skipped. + * @exception IOException if an I/O error occurs. + * ChecksumException if the chunk to skip to is corrupted + */ + public synchronized long skip(long n) throws IOException { + long curPos = getPos(); + long fileLength = getFileLength(); + if( n+curPos > fileLength ) { + n = fileLength - curPos; + } + return super.skip(n); + } + + /** + * Seek to the given position in the stream. + * The next read() will be from that position. + * + *

This method does not allow seek past the end of the file. + * This produces IOException. + * + * @param pos the postion to seek to. + * @exception IOException if an I/O error occurs or seeks after EOF + * ChecksumException if the chunk to seek to is corrupted + */ + + public synchronized void seek(long pos) throws IOException { + if(pos>getFileLength()) { + throw new IOException("Cannot seek after EOF"); + } + super.seek(pos); + } + + } + + /** + * Opens an FSDataInputStream at the indicated Path. + * @param f the file name to open + * @param bufferSize the size of the buffer to be used. + */ + @Override + public FSDataInputStream open(Path f, int bufferSize) throws IOException { + return new FSDataInputStream( + new ChecksumFSInputChecker(this, f, bufferSize)); + } + + /** {@inheritDoc} */ + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException { + throw new IOException("Not supported"); + } + + /** + * Calculated the length of the checksum file in bytes. + * @param size the length of the data file in bytes + * @param bytesPerSum the number of bytes in a checksum block + * @return the number of bytes in the checksum file + */ + public static long getChecksumLength(long size, int bytesPerSum) { + //the checksum length is equal to size passed divided by bytesPerSum + + //bytes written in the beginning of the checksum file. + return ((size + bytesPerSum - 1) / bytesPerSum) * 4 + + CHECKSUM_VERSION.length + 4; + } + + /** This class provides an output stream for a checksummed file. + * It generates checksums for data. */ + private static class ChecksumFSOutputSummer extends FSOutputSummer { + private FSDataOutputStream datas; + private FSDataOutputStream sums; + private static final float CHKSUM_AS_FRACTION = 0.01f; + + public ChecksumFSOutputSummer(ChecksumFileSystem fs, + Path file, + boolean overwrite, + short replication, + long blockSize, + Configuration conf) + throws IOException { + this(fs, file, overwrite, + conf.getInt("io.file.buffer.size", 4096), + replication, blockSize, null); + } + + public ChecksumFSOutputSummer(ChecksumFileSystem fs, + Path file, + boolean overwrite, + int bufferSize, + short replication, + long blockSize, + Progressable progress) + throws IOException { + super(new CRC32(), fs.getBytesPerSum(), 4); + int bytesPerSum = fs.getBytesPerSum(); + this.datas = fs.getRawFileSystem().create(file, overwrite, bufferSize, + replication, blockSize, progress); + int sumBufferSize = fs.getSumBufferSize(bytesPerSum, bufferSize); + this.sums = fs.getRawFileSystem().create(fs.getChecksumFile(file), true, + sumBufferSize, replication, + blockSize); + sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length); + sums.writeInt(bytesPerSum); + } + + public void close() throws IOException { + flushBuffer(); + sums.close(); + datas.close(); + } + + @Override + protected void writeChunk(byte[] b, int offset, int len, byte[] checksum) + throws IOException { + datas.write(b, offset, len); + sums.write(checksum); + } + } + + /** {@inheritDoc} */ + @Override + public FSDataOutputStream create(Path f, FsPermission permission, + boolean overwrite, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + Path parent = f.getParent(); + if (parent != null && !mkdirs(parent)) { + throw new IOException("Mkdirs failed to create " + parent); + } + final FSDataOutputStream out = new FSDataOutputStream( + new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication, + blockSize, progress), null); + if (permission != null) { + setPermission(f, permission); + } + return out; + } + + /** + * Set replication for an existing file. + * Implement the abstract setReplication of FileSystem + * @param src file name + * @param replication new replication + * @throws IOException + * @return true if successful; + * false if file does not exist or is a directory + */ + public boolean setReplication(Path src, short replication) throws IOException { + boolean value = fs.setReplication(src, replication); + if (!value) + return false; + + Path checkFile = getChecksumFile(src); + if (exists(checkFile)) + fs.setReplication(checkFile, replication); + + return true; + } + + /** + * Rename files/dirs + */ + public boolean rename(Path src, Path dst) throws IOException { + if (fs.isDirectory(src)) { + return fs.rename(src, dst); + } else { + + boolean value = fs.rename(src, dst); + if (!value) + return false; + + Path checkFile = getChecksumFile(src); + if (fs.exists(checkFile)) { //try to rename checksum + if (fs.isDirectory(dst)) { + value = fs.rename(checkFile, dst); + } else { + value = fs.rename(checkFile, getChecksumFile(dst)); + } + } + + return value; + } + } + + /** + * Implement the delete(Path, boolean) in checksum + * file system. + */ + public boolean delete(Path f, boolean recursive) throws IOException{ + FileStatus fstatus = null; + try { + fstatus = fs.getFileStatus(f); + } catch(FileNotFoundException e) { + return false; + } + if(fstatus.isDir()) { + //this works since the crcs are in the same + //directories and the files. so we just delete + //everything in the underlying filesystem + return fs.delete(f, recursive); + } else { + Path checkFile = getChecksumFile(f); + if (fs.exists(checkFile)) { + fs.delete(checkFile, true); + } + return fs.delete(f, true); + } + } + + final private static PathFilter DEFAULT_FILTER = new PathFilter() { + public boolean accept(Path file) { + return !isChecksumFile(file); + } + }; + + /** + * List the statuses of the files/directories in the given path if the path is + * a directory. + * + * @param f + * given path + * @return the statuses of the files/directories in the given patch + * @throws IOException + */ + @Override + public FileStatus[] listStatus(Path f) throws IOException { + return fs.listStatus(f, DEFAULT_FILTER); + } + + @Override + public boolean mkdirs(Path f) throws IOException { + return fs.mkdirs(f); + } + + @Override + public void copyFromLocalFile(boolean delSrc, Path src, Path dst) + throws IOException { + Configuration conf = getConf(); + FileUtil.copy(getLocal(conf), src, this, dst, delSrc, conf); + } + + /** + * The src file is under FS, and the dst is on the local disk. + * Copy it from FS control to the local dst name. + */ + @Override + public void copyToLocalFile(boolean delSrc, Path src, Path dst) + throws IOException { + Configuration conf = getConf(); + FileUtil.copy(this, src, getLocal(conf), dst, delSrc, conf); + } + + /** + * The src file is under FS, and the dst is on the local disk. + * Copy it from FS control to the local dst name. + * If src and dst are directories, the copyCrc parameter + * determines whether to copy CRC files. + */ + public void copyToLocalFile(Path src, Path dst, boolean copyCrc) + throws IOException { + if (!fs.isDirectory(src)) { // source is a file + fs.copyToLocalFile(src, dst); + FileSystem localFs = getLocal(getConf()).getRawFileSystem(); + if (localFs.isDirectory(dst)) { + dst = new Path(dst, src.getName()); + } + dst = getChecksumFile(dst); + if (localFs.exists(dst)) { //remove old local checksum file + localFs.delete(dst, true); + } + Path checksumFile = getChecksumFile(src); + if (copyCrc && fs.exists(checksumFile)) { //copy checksum file + fs.copyToLocalFile(checksumFile, dst); + } + } else { + FileStatus[] srcs = listStatus(src); + for (FileStatus srcFile : srcs) { + copyToLocalFile(srcFile.getPath(), + new Path(dst, srcFile.getPath().getName()), copyCrc); + } + } + } + + @Override + public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + return tmpLocalFile; + } + + @Override + public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + moveFromLocalFile(tmpLocalFile, fsOutputFile); + } + + /** + * Report a checksum error to the file system. + * @param f the file name containing the error + * @param in the stream open on the file + * @param inPos the position of the beginning of the bad data in the file + * @param sums the stream open on the checksum file + * @param sumsPos the position of the beginning of the bad data in the checksum file + * @return if retry is neccessary + */ + public boolean reportChecksumFailure(Path f, FSDataInputStream in, + long inPos, FSDataInputStream sums, long sumsPos) { + return false; + } +} diff --git a/src/java/org/apache/hadoop/fs/ContentSummary.java b/src/java/org/apache/hadoop/fs/ContentSummary.java new file mode 100644 index 00000000000..2ec7959370d --- /dev/null +++ b/src/java/org/apache/hadoop/fs/ContentSummary.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.Writable; + +/** Store the summary of a content (a directory or a file). */ +public class ContentSummary implements Writable{ + private long length; + private long fileCount; + private long directoryCount; + private long quota; + private long spaceConsumed; + private long spaceQuota; + + + /** Constructor */ + public ContentSummary() {} + + /** Constructor */ + public ContentSummary(long length, long fileCount, long directoryCount) { + this(length, fileCount, directoryCount, -1L, length, -1L); + } + + /** Constructor */ + public ContentSummary( + long length, long fileCount, long directoryCount, long quota, + long spaceConsumed, long spaceQuota) { + this.length = length; + this.fileCount = fileCount; + this.directoryCount = directoryCount; + this.quota = quota; + this.spaceConsumed = spaceConsumed; + this.spaceQuota = spaceQuota; + } + + /** @return the length */ + public long getLength() {return length;} + + /** @return the directory count */ + public long getDirectoryCount() {return directoryCount;} + + /** @return the file count */ + public long getFileCount() {return fileCount;} + + /** Return the directory quota */ + public long getQuota() {return quota;} + + /** Retuns (disk) space consumed */ + public long getSpaceConsumed() {return spaceConsumed;} + + /** Returns (disk) space quota */ + public long getSpaceQuota() {return spaceQuota;} + + /** {@inheritDoc} */ + public void write(DataOutput out) throws IOException { + out.writeLong(length); + out.writeLong(fileCount); + out.writeLong(directoryCount); + out.writeLong(quota); + out.writeLong(spaceConsumed); + out.writeLong(spaceQuota); + } + + /** {@inheritDoc} */ + public void readFields(DataInput in) throws IOException { + this.length = in.readLong(); + this.fileCount = in.readLong(); + this.directoryCount = in.readLong(); + this.quota = in.readLong(); + this.spaceConsumed = in.readLong(); + this.spaceQuota = in.readLong(); + } + + /** + * Output format: + * <----12----> <----12----> <-------18-------> + * DIR_COUNT FILE_COUNT CONTENT_SIZE FILE_NAME + */ + private static final String STRING_FORMAT = "%12d %12d %18d "; + /** + * Output format: + * <----12----> <----15----> <----15----> <----15----> <----12----> <----12----> <-------18-------> + * QUOTA REMAINING_QUATA SPACE_QUOTA SPACE_QUOTA_REM DIR_COUNT FILE_COUNT CONTENT_SIZE FILE_NAME + */ + private static final String QUOTA_STRING_FORMAT = "%12s %15s "; + private static final String SPACE_QUOTA_STRING_FORMAT = "%15s %15s "; + + /** The header string */ + private static final String HEADER = String.format( + STRING_FORMAT.replace('d', 's'), "directories", "files", "bytes"); + + private static final String QUOTA_HEADER = String.format( + QUOTA_STRING_FORMAT + SPACE_QUOTA_STRING_FORMAT, + "quota", "remaining quota", "space quota", "reamaining quota") + + HEADER; + + /** Return the header of the output. + * if qOption is false, output directory count, file count, and content size; + * if qOption is true, output quota and remaining quota as well. + * + * @param qOption a flag indicating if quota needs to be printed or not + * @return the header of the output + */ + public static String getHeader(boolean qOption) { + return qOption ? QUOTA_HEADER : HEADER; + } + + /** {@inheritDoc} */ + public String toString() { + return toString(true); + } + + /** Return the string representation of the object in the output format. + * if qOption is false, output directory count, file count, and content size; + * if qOption is true, output quota and remaining quota as well. + * + * @param qOption a flag indicating if quota needs to be printed or not + * @return the string representation of the object + */ + public String toString(boolean qOption) { + String prefix = ""; + if (qOption) { + String quotaStr = "none"; + String quotaRem = "inf"; + String spaceQuotaStr = "none"; + String spaceQuotaRem = "inf"; + + if (quota>0) { + quotaStr = Long.toString(quota); + quotaRem = Long.toString(quota-(directoryCount+fileCount)); + } + if (spaceQuota>0) { + spaceQuotaStr = Long.toString(spaceQuota); + spaceQuotaRem = Long.toString(spaceQuota - spaceConsumed); + } + + prefix = String.format(QUOTA_STRING_FORMAT + SPACE_QUOTA_STRING_FORMAT, + quotaStr, quotaRem, spaceQuotaStr, spaceQuotaRem); + } + + return prefix + String.format(STRING_FORMAT, directoryCount, + fileCount, length); + } +} diff --git a/src/java/org/apache/hadoop/fs/DF.java b/src/java/org/apache/hadoop/fs/DF.java new file mode 100644 index 00000000000..70cea9eb023 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/DF.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.File; +import java.io.IOException; +import java.io.BufferedReader; + +import java.util.EnumSet; +import java.util.StringTokenizer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Shell; + +/** Filesystem disk space usage statistics. Uses the unix 'df' program. + * Tested on Linux, FreeBSD, Cygwin. */ +public class DF extends Shell { + public static final long DF_INTERVAL_DEFAULT = 3 * 1000; // default DF refresh interval + + private String dirPath; + private String filesystem; + private long capacity; + private long used; + private long available; + private int percentUsed; + private String mount; + + enum OSType { + OS_TYPE_UNIX("UNIX"), + OS_TYPE_WIN("Windows"), + OS_TYPE_SOLARIS("SunOS"), + OS_TYPE_MAC("Mac"), + OS_TYPE_AIX("AIX"); + + private String id; + OSType(String id) { + this.id = id; + } + public boolean match(String osStr) { + return osStr != null && osStr.indexOf(id) >= 0; + } + String getId() { + return id; + } + } + + private static final String OS_NAME = System.getProperty("os.name"); + private static final OSType OS_TYPE = getOSType(OS_NAME); + + protected static OSType getOSType(String osName) { + for (OSType ost : EnumSet.allOf(OSType.class)) { + if (ost.match(osName)) { + return ost; + } + } + return OSType.OS_TYPE_UNIX; + } + + public DF(File path, Configuration conf) throws IOException { + this(path, conf.getLong("dfs.df.interval", DF.DF_INTERVAL_DEFAULT)); + } + + public DF(File path, long dfInterval) throws IOException { + super(dfInterval); + this.dirPath = path.getCanonicalPath(); + } + + protected OSType getOSType() { + return OS_TYPE; + } + + /// ACCESSORS + + public String getDirPath() { + return dirPath; + } + + public String getFilesystem() throws IOException { + run(); + return filesystem; + } + + public long getCapacity() throws IOException { + run(); + return capacity; + } + + public long getUsed() throws IOException { + run(); + return used; + } + + public long getAvailable() throws IOException { + run(); + return available; + } + + public int getPercentUsed() throws IOException { + run(); + return percentUsed; + } + + public String getMount() throws IOException { + run(); + return mount; + } + + public String toString() { + return + "df -k " + mount +"\n" + + filesystem + "\t" + + capacity / 1024 + "\t" + + used / 1024 + "\t" + + available / 1024 + "\t" + + percentUsed + "%\t" + + mount; + } + + @Override + protected String[] getExecString() { + // ignoring the error since the exit code it enough + return new String[] {"bash","-c","exec 'df' '-k' '" + dirPath + + "' 2>/dev/null"}; + } + + @Override + protected void parseExecResult(BufferedReader lines) throws IOException { + lines.readLine(); // skip headings + + String line = lines.readLine(); + if (line == null) { + throw new IOException( "Expecting a line not the end of stream" ); + } + StringTokenizer tokens = + new StringTokenizer(line, " \t\n\r\f%"); + + this.filesystem = tokens.nextToken(); + if (!tokens.hasMoreTokens()) { // for long filesystem name + line = lines.readLine(); + if (line == null) { + throw new IOException( "Expecting a line not the end of stream" ); + } + tokens = new StringTokenizer(line, " \t\n\r\f%"); + } + + switch(getOSType()) { + case OS_TYPE_AIX: + this.capacity = Long.parseLong(tokens.nextToken()) * 1024; + this.available = Long.parseLong(tokens.nextToken()) * 1024; + this.percentUsed = Integer.parseInt(tokens.nextToken()); + tokens.nextToken(); + tokens.nextToken(); + this.mount = tokens.nextToken(); + this.used = this.capacity - this.available; + break; + + case OS_TYPE_WIN: + case OS_TYPE_SOLARIS: + case OS_TYPE_MAC: + case OS_TYPE_UNIX: + default: + this.capacity = Long.parseLong(tokens.nextToken()) * 1024; + this.used = Long.parseLong(tokens.nextToken()) * 1024; + this.available = Long.parseLong(tokens.nextToken()) * 1024; + this.percentUsed = Integer.parseInt(tokens.nextToken()); + this.mount = tokens.nextToken(); + break; + } + } + + public static void main(String[] args) throws Exception { + String path = "."; + if (args.length > 0) + path = args[0]; + + System.out.println(new DF(new File(path), DF_INTERVAL_DEFAULT).toString()); + } +} diff --git a/src/java/org/apache/hadoop/fs/DU.java b/src/java/org/apache/hadoop/fs/DU.java new file mode 100644 index 00000000000..2b65ae09875 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/DU.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Shell; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; + +/** Filesystem disk space usage statistics. Uses the unix 'du' program*/ +public class DU extends Shell { + private String dirPath; + + private AtomicLong used = new AtomicLong(); + private volatile boolean shouldRun = true; + private Thread refreshUsed; + private IOException duException = null; + private long refreshInterval; + + /** + * Keeps track of disk usage. + * @param path the path to check disk usage in + * @param interval refresh the disk usage at this interval + * @throws IOException if we fail to refresh the disk usage + */ + public DU(File path, long interval) throws IOException { + super(0); + + //we set the Shell interval to 0 so it will always run our command + //and use this one to set the thread sleep interval + this.refreshInterval = interval; + this.dirPath = path.getCanonicalPath(); + + //populate the used variable + run(); + } + + /** + * Keeps track of disk usage. + * @param path the path to check disk usage in + * @param conf configuration object + * @throws IOException if we fail to refresh the disk usage + */ + public DU(File path, Configuration conf) throws IOException { + this(path, 600000L); + //10 minutes default refresh interval + } + + /** + * This thread refreshes the "used" variable. + * + * Future improvements could be to not permanently + * run this thread, instead run when getUsed is called. + **/ + class DURefreshThread implements Runnable { + + public void run() { + + while(shouldRun) { + + try { + Thread.sleep(refreshInterval); + + try { + //update the used variable + DU.this.run(); + } catch (IOException e) { + synchronized (DU.this) { + //save the latest exception so we can return it in getUsed() + duException = e; + } + + LOG.warn("Could not get disk usage information", e); + } + } catch (InterruptedException e) { + } + } + } + } + + /** + * Decrease how much disk space we use. + * @param value decrease by this value + */ + public void decDfsUsed(long value) { + used.addAndGet(-value); + } + + /** + * Increase how much disk space we use. + * @param value increase by this value + */ + public void incDfsUsed(long value) { + used.addAndGet(value); + } + + /** + * @return disk space used + * @throws IOException if the shell command fails + */ + public long getUsed() throws IOException { + //if the updating thread isn't started, update on demand + if(refreshUsed == null) { + run(); + } else { + synchronized (DU.this) { + //if an exception was thrown in the last run, rethrow + if(duException != null) { + IOException tmp = duException; + duException = null; + throw tmp; + } + } + } + + return used.longValue(); + } + + /** + * @return the path of which we're keeping track of disk usage + */ + public String getDirPath() { + return dirPath; + } + + /** + * Start the disk usage checking thread. + */ + public void start() { + //only start the thread if the interval is sane + if(refreshInterval > 0) { + refreshUsed = new Thread(new DURefreshThread(), + "refreshUsed-"+dirPath); + refreshUsed.setDaemon(true); + refreshUsed.start(); + } + } + + /** + * Shut down the refreshing thread. + */ + public void shutdown() { + this.shouldRun = false; + + if(this.refreshUsed != null) { + this.refreshUsed.interrupt(); + } + } + + public String toString() { + return + "du -sk " + dirPath +"\n" + + used + "\t" + dirPath; + } + + protected String[] getExecString() { + return new String[] {"du", "-sk", dirPath}; + } + + protected void parseExecResult(BufferedReader lines) throws IOException { + String line = lines.readLine(); + if (line == null) { + throw new IOException("Expecting a line not the end of stream"); + } + String[] tokens = line.split("\t"); + if(tokens.length == 0) { + throw new IOException("Illegal du output"); + } + this.used.set(Long.parseLong(tokens[0])*1024); + } + + public static void main(String[] args) throws Exception { + String path = "."; + if (args.length > 0) { + path = args[0]; + } + + System.out.println(new DU(new File(path), new Configuration()).toString()); + } +} diff --git a/src/java/org/apache/hadoop/fs/FSDataInputStream.java b/src/java/org/apache/hadoop/fs/FSDataInputStream.java new file mode 100644 index 00000000000..6c59b701f23 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FSDataInputStream.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.*; + +/** Utility that wraps a {@link FSInputStream} in a {@link DataInputStream} + * and buffers input through a {@link BufferedInputStream}. */ +public class FSDataInputStream extends DataInputStream + implements Seekable, PositionedReadable { + + public FSDataInputStream(InputStream in) + throws IOException { + super(in); + if( !(in instanceof Seekable) || !(in instanceof PositionedReadable) ) { + throw new IllegalArgumentException( + "In is not an instance of Seekable or PositionedReadable"); + } + } + + public synchronized void seek(long desired) throws IOException { + ((Seekable)in).seek(desired); + } + + public long getPos() throws IOException { + return ((Seekable)in).getPos(); + } + + public int read(long position, byte[] buffer, int offset, int length) + throws IOException { + return ((PositionedReadable)in).read(position, buffer, offset, length); + } + + public void readFully(long position, byte[] buffer, int offset, int length) + throws IOException { + ((PositionedReadable)in).readFully(position, buffer, offset, length); + } + + public void readFully(long position, byte[] buffer) + throws IOException { + ((PositionedReadable)in).readFully(position, buffer, 0, buffer.length); + } + + public boolean seekToNewSource(long targetPos) throws IOException { + return ((Seekable)in).seekToNewSource(targetPos); + } +} diff --git a/src/java/org/apache/hadoop/fs/FSDataOutputStream.java b/src/java/org/apache/hadoop/fs/FSDataOutputStream.java new file mode 100644 index 00000000000..ac13d74c3b2 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FSDataOutputStream.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.*; + +/** Utility that wraps a {@link OutputStream} in a {@link DataOutputStream}, + * buffers output through a {@link BufferedOutputStream} and creates a checksum + * file. */ +public class FSDataOutputStream extends DataOutputStream implements Syncable { + private OutputStream wrappedStream; + + private static class PositionCache extends FilterOutputStream { + private FileSystem.Statistics statistics; + long position; + + public PositionCache(OutputStream out, + FileSystem.Statistics stats, + long pos) throws IOException { + super(out); + statistics = stats; + position = pos; + } + + public void write(int b) throws IOException { + out.write(b); + position++; + if (statistics != null) { + statistics.incrementBytesWritten(1); + } + } + + public void write(byte b[], int off, int len) throws IOException { + out.write(b, off, len); + position += len; // update position + if (statistics != null) { + statistics.incrementBytesWritten(len); + } + } + + public long getPos() throws IOException { + return position; // return cached position + } + + public void close() throws IOException { + out.close(); + } + } + + @Deprecated + public FSDataOutputStream(OutputStream out) throws IOException { + this(out, null); + } + + public FSDataOutputStream(OutputStream out, FileSystem.Statistics stats) + throws IOException { + this(out, stats, 0); + } + + public FSDataOutputStream(OutputStream out, FileSystem.Statistics stats, + long startPosition) throws IOException { + super(new PositionCache(out, stats, startPosition)); + wrappedStream = out; + } + + public long getPos() throws IOException { + return ((PositionCache)out).getPos(); + } + + public void close() throws IOException { + out.close(); // This invokes PositionCache.close() + } + + // Returns the underlying output stream. This is used by unit tests. + public OutputStream getWrappedStream() { + return wrappedStream; + } + + /** {@inheritDoc} */ + public void sync() throws IOException { + if (wrappedStream instanceof Syncable) { + ((Syncable)wrappedStream).sync(); + } + } +} diff --git a/src/java/org/apache/hadoop/fs/FSError.java b/src/java/org/apache/hadoop/fs/FSError.java new file mode 100644 index 00000000000..8dd19125898 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FSError.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +/** Thrown for unexpected filesystem errors, presumed to reflect disk errors + * in the native filesystem. */ +public class FSError extends Error { + private static final long serialVersionUID = 1L; + + FSError(Throwable cause) { + super(cause); + } +} diff --git a/src/java/org/apache/hadoop/fs/FSInputChecker.java b/src/java/org/apache/hadoop/fs/FSInputChecker.java new file mode 100644 index 00000000000..1d8e03ff935 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FSInputChecker.java @@ -0,0 +1,432 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.Checksum; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.StringUtils; + +/** + * This is a generic input stream for verifying checksums for + * data before it is read by a user. + */ + +abstract public class FSInputChecker extends FSInputStream { + public static final Log LOG + = LogFactory.getLog(FSInputChecker.class); + + /** The file name from which data is read from */ + protected Path file; + private Checksum sum; + private boolean verifyChecksum = true; + private byte[] buf; + private byte[] checksum; + private int pos; + private int count; + + private int numOfRetries; + + // cached file position + private long chunkPos = 0; + + /** Constructor + * + * @param file The name of the file to be read + * @param numOfRetries Number of read retries when ChecksumError occurs + */ + protected FSInputChecker( Path file, int numOfRetries) { + this.file = file; + this.numOfRetries = numOfRetries; + } + + /** Constructor + * + * @param file The name of the file to be read + * @param numOfRetries Number of read retries when ChecksumError occurs + * @param sum the type of Checksum engine + * @param chunkSize maximun chunk size + * @param checksumSize the number byte of each checksum + */ + protected FSInputChecker( Path file, int numOfRetries, + boolean verifyChecksum, Checksum sum, int chunkSize, int checksumSize ) { + this(file, numOfRetries); + set(verifyChecksum, sum, chunkSize, checksumSize); + } + + /** Reads in next checksum chunk data into buf at offset + * and checksum into checksum. + * The method is used for implementing read, therefore, it should be optimized + * for sequential reading + * @param pos chunkPos + * @param buf desitination buffer + * @param offset offset in buf at which to store data + * @param len maximun number of bytes to read + * @return number of bytes read + */ + abstract protected int readChunk(long pos, byte[] buf, int offset, int len, + byte[] checksum) throws IOException; + + /** Return position of beginning of chunk containing pos. + * + * @param pos a postion in the file + * @return the starting position of the chunk which contains the byte + */ + abstract protected long getChunkPosition(long pos); + + /** Return true if there is a need for checksum verification */ + protected synchronized boolean needChecksum() { + return verifyChecksum && sum != null; + } + + /** + * Read one checksum-verified byte + * + * @return the next byte of data, or -1 if the end of the + * stream is reached. + * @exception IOException if an I/O error occurs. + */ + + public synchronized int read() throws IOException { + if (pos >= count) { + fill(); + if (pos >= count) { + return -1; + } + } + return buf[pos++] & 0xff; + } + + /** + * Read checksum verified bytes from this byte-input stream into + * the specified byte array, starting at the given offset. + * + *

This method implements the general contract of the corresponding + * {@link InputStream#read(byte[], int, int) read} method of + * the {@link InputStream} class. As an additional + * convenience, it attempts to read as many bytes as possible by repeatedly + * invoking the read method of the underlying stream. This + * iterated read continues until one of the following + * conditions becomes true:

    + * + *
  • The specified number of bytes have been read, + * + *
  • The read method of the underlying stream returns + * -1, indicating end-of-file. + * + *
If the first read on the underlying stream returns + * -1 to indicate end-of-file then this method returns + * -1. Otherwise this method returns the number of bytes + * actually read. + * + * @param b destination buffer. + * @param off offset at which to start storing bytes. + * @param len maximum number of bytes to read. + * @return the number of bytes read, or -1 if the end of + * the stream has been reached. + * @exception IOException if an I/O error occurs. + * ChecksumException if any checksum error occurs + */ + public synchronized int read(byte[] b, int off, int len) throws IOException { + // parameter check + if ((off | len | (off + len) | (b.length - (off + len))) < 0) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return 0; + } + + int n = 0; + for (;;) { + int nread = read1(b, off + n, len - n); + if (nread <= 0) + return (n == 0) ? nread : n; + n += nread; + if (n >= len) + return n; + } + } + + /** + * Fills the buffer with a chunk data. + * No mark is supported. + * This method assumes that all data in the buffer has already been read in, + * hence pos > count. + */ + private void fill( ) throws IOException { + assert(pos>=count); + // fill internal buffer + count = readChecksumChunk(buf, 0, buf.length); + } + + /* + * Read characters into a portion of an array, reading from the underlying + * stream at most once if necessary. + */ + private int read1(byte b[], int off, int len) + throws IOException { + int avail = count-pos; + if( avail <= 0 ) { + if(len>=buf.length) { + // read a chunk to user buffer directly; avoid one copy + int nread = readChecksumChunk(b, off, len); + return nread; + } else { + // read a chunk into the local buffer + fill(); + if( count <= 0 ) { + return -1; + } else { + avail = count; + } + } + } + + // copy content of the local buffer to the user buffer + int cnt = (avail < len) ? avail : len; + System.arraycopy(buf, pos, b, off, cnt); + pos += cnt; + return cnt; + } + + /* Read up one checksum chunk to array b at pos off + * It requires a checksum chunk boundary + * in between + * and it stops reading at the boundary or at the end of the stream; + * Otherwise an IllegalArgumentException is thrown. + * This makes sure that all data read are checksum verified. + * + * @param b the buffer into which the data is read. + * @param off the start offset in array b + * at which the data is written. + * @param len the maximum number of bytes to read. + * @return the total number of bytes read into the buffer, or + * -1 if there is no more data because the end of + * the stream has been reached. + * @throws IOException if an I/O error occurs. + */ + private int readChecksumChunk(byte b[], int off, int len) + throws IOException { + // invalidate buffer + count = pos = 0; + + int read = 0; + boolean retry = true; + int retriesLeft = numOfRetries; + do { + retriesLeft--; + + try { + read = readChunk(chunkPos, b, off, len, checksum); + if( read > 0 ) { + if( needChecksum() ) { + sum.update(b, off, read); + verifySum(chunkPos); + } + chunkPos += read; + } + retry = false; + } catch (ChecksumException ce) { + LOG.info("Found checksum error: b[" + off + ", " + (off+read) + "]=" + + StringUtils.byteToHexString(b, off, off + read), ce); + if (retriesLeft == 0) { + throw ce; + } + + // try a new replica + if (seekToNewSource(chunkPos)) { + // Since at least one of the sources is different, + // the read might succeed, so we'll retry. + seek(chunkPos); + } else { + // Neither the data stream nor the checksum stream are being read + // from different sources, meaning we'll still get a checksum error + // if we try to do the read again. We throw an exception instead. + throw ce; + } + } + } while (retry); + return read; + } + + /* verify checksum for the chunk. + * @throws ChecksumException if there is a mismatch + */ + private void verifySum(long errPos) throws ChecksumException { + long crc = getChecksum(); + long sumValue = sum.getValue(); + sum.reset(); + if (crc != sumValue) { + throw new ChecksumException( + "Checksum error: "+file+" at "+errPos, errPos); + } + } + + /* calculate checksum value */ + private long getChecksum() { + return checksum2long(checksum); + } + + /** Convert a checksum byte array to a long */ + static public long checksum2long(byte[] checksum) { + long crc = 0L; + for(int i=0; in bytes of data from the + * input stream. + * + *

This method may skip more bytes than are remaining in the backing + * file. This produces no exception and the number of bytes skipped + * may include some number of bytes that were beyond the EOF of the + * backing file. Attempting to read from the stream after skipping past + * the end will result in -1 indicating the end of the file. + * + *

If n is negative, no bytes are skipped. + * + * @param n the number of bytes to be skipped. + * @return the actual number of bytes skipped. + * @exception IOException if an I/O error occurs. + * ChecksumException if the chunk to skip to is corrupted + */ + public synchronized long skip(long n) throws IOException { + if (n <= 0) { + return 0; + } + + seek(getPos()+n); + return n; + } + + /** + * Seek to the given position in the stream. + * The next read() will be from that position. + * + *

This method may seek past the end of the file. + * This produces no exception and an attempt to read from + * the stream will result in -1 indicating the end of the file. + * + * @param pos the postion to seek to. + * @exception IOException if an I/O error occurs. + * ChecksumException if the chunk to seek to is corrupted + */ + + public synchronized void seek(long pos) throws IOException { + if( pos<0 ) { + return; + } + // optimize: check if the pos is in the buffer + long start = chunkPos - this.count; + if( pos>=start && pos 0) { + readFully(this, new byte[delta], 0, delta); + } + } + + /** + * A utility function that tries to read up to len bytes from + * stm + * + * @param stm an input stream + * @param buf destiniation buffer + * @param offset offset at which to store data + * @param len number of bytes to read + * @return actual number of bytes read + * @throws IOException if there is any IO error + */ + protected static int readFully(InputStream stm, + byte[] buf, int offset, int len) throws IOException { + int n = 0; + for (;;) { + int nread = stm.read(buf, offset + n, len - n); + if (nread <= 0) + return (n == 0) ? nread : n; + n += nread; + if (n >= len) + return n; + } + } + + /** + * Set the checksum related parameters + * @param verifyChecksum whether to verify checksum + * @param sum which type of checksum to use + * @param maxChunkSize maximun chunk size + * @param checksumSize checksum size + */ + final protected synchronized void set(boolean verifyChecksum, + Checksum sum, int maxChunkSize, int checksumSize ) { + this.verifyChecksum = verifyChecksum; + this.sum = sum; + this.buf = new byte[maxChunkSize]; + this.checksum = new byte[checksumSize]; + this.count = 0; + this.pos = 0; + } + + final public boolean markSupported() { + return false; + } + + final public void mark(int readlimit) { + } + + final public void reset() throws IOException { + throw new IOException("mark/reset not supported"); + } + + + /* reset this FSInputChecker's state */ + private void resetState() { + // invalidate buffer + count = 0; + pos = 0; + // reset Checksum + if (sum != null) { + sum.reset(); + } + } +} diff --git a/src/java/org/apache/hadoop/fs/FSInputStream.java b/src/java/org/apache/hadoop/fs/FSInputStream.java new file mode 100644 index 00000000000..91cac46cdc5 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FSInputStream.java @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.*; + +/**************************************************************** + * FSInputStream is a generic old InputStream with a little bit + * of RAF-style seek ability. + * + *****************************************************************/ +public abstract class FSInputStream extends InputStream + implements Seekable, PositionedReadable { + /** + * Seek to the given offset from the start of the file. + * The next read() will be from that location. Can't + * seek past the end of the file. + */ + public abstract void seek(long pos) throws IOException; + + /** + * Return the current offset from the start of the file + */ + public abstract long getPos() throws IOException; + + /** + * Seeks a different copy of the data. Returns true if + * found a new source, false otherwise. + */ + public abstract boolean seekToNewSource(long targetPos) throws IOException; + + public int read(long position, byte[] buffer, int offset, int length) + throws IOException { + synchronized (this) { + long oldPos = getPos(); + int nread = -1; + try { + seek(position); + nread = read(buffer, offset, length); + } finally { + seek(oldPos); + } + return nread; + } + } + + public void readFully(long position, byte[] buffer, int offset, int length) + throws IOException { + int nread = 0; + while (nread < length) { + int nbytes = read(position+nread, buffer, offset+nread, length-nread); + if (nbytes < 0) { + throw new EOFException("End of file reached before reading fully."); + } + nread += nbytes; + } + } + + public void readFully(long position, byte[] buffer) + throws IOException { + readFully(position, buffer, 0, buffer.length); + } +} diff --git a/src/java/org/apache/hadoop/fs/FSOutputSummer.java b/src/java/org/apache/hadoop/fs/FSOutputSummer.java new file mode 100644 index 00000000000..d730671f539 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FSOutputSummer.java @@ -0,0 +1,176 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.zip.Checksum; + +/** + * This is a generic output stream for generating checksums for + * data before it is written to the underlying stream + */ + +abstract public class FSOutputSummer extends OutputStream { + // data checksum + private Checksum sum; + // internal buffer for storing data before it is checksumed + private byte buf[]; + // internal buffer for storing checksum + private byte checksum[]; + // The number of valid bytes in the buffer. + private int count; + + protected FSOutputSummer(Checksum sum, int maxChunkSize, int checksumSize) { + this.sum = sum; + this.buf = new byte[maxChunkSize]; + this.checksum = new byte[checksumSize]; + this.count = 0; + } + + /* write the data chunk in b staring at offset with + * a length of len, and its checksum + */ + protected abstract void writeChunk(byte[] b, int offset, int len, byte[] checksum) + throws IOException; + + /** Write one byte */ + public synchronized void write(int b) throws IOException { + sum.update(b); + buf[count++] = (byte)b; + if(count == buf.length) { + flushBuffer(); + } + } + + /** + * Writes len bytes from the specified byte array + * starting at offset off and generate a checksum for + * each data chunk. + * + *

This method stores bytes from the given array into this + * stream's buffer before it gets checksumed. The buffer gets checksumed + * and flushed to the underlying output stream when all data + * in a checksum chunk are in the buffer. If the buffer is empty and + * requested length is at least as large as the size of next checksum chunk + * size, this method will checksum and write the chunk directly + * to the underlying output stream. Thus it avoids uneccessary data copy. + * + * @param b the data. + * @param off the start offset in the data. + * @param len the number of bytes to write. + * @exception IOException if an I/O error occurs. + */ + public synchronized void write(byte b[], int off, int len) + throws IOException { + if (off < 0 || len < 0 || off > b.length - len) { + throw new ArrayIndexOutOfBoundsException(); + } + + for (int n=0;n=buf.length) { + // local buffer is empty and user data has one chunk + // checksum and output data + final int length = buf.length; + sum.update(b, off, length); + writeChecksumChunk(b, off, length, false); + return length; + } + + // copy user data to local buffer + int bytesToCopy = buf.length-count; + bytesToCopy = (len>> 24) & 0xFF); + bytes[1] = (byte)((integer >>> 16) & 0xFF); + bytes[2] = (byte)((integer >>> 8) & 0xFF); + bytes[3] = (byte)((integer >>> 0) & 0xFF); + return bytes; + } + + /** + * Resets existing buffer with a new one of the specified size. + */ + protected synchronized void resetChecksumChunk(int size) { + sum.reset(); + this.buf = new byte[size]; + this.count = 0; + } +} diff --git a/src/java/org/apache/hadoop/fs/FileChecksum.java b/src/java/org/apache/hadoop/fs/FileChecksum.java new file mode 100644 index 00000000000..4fe66d0cd70 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FileChecksum.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.util.Arrays; + +import org.apache.hadoop.io.Writable; + +/** An abstract class representing file checksums for files. */ +public abstract class FileChecksum implements Writable { + /** The checksum algorithm name */ + public abstract String getAlgorithmName(); + + /** The length of the checksum in bytes */ + public abstract int getLength(); + + /** The value of the checksum in bytes */ + public abstract byte[] getBytes(); + + /** Return true if both the algorithms and the values are the same. */ + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (other == null || !(other instanceof FileChecksum)) { + return false; + } + + final FileChecksum that = (FileChecksum)other; + return this.getAlgorithmName().equals(that.getAlgorithmName()) + && Arrays.equals(this.getBytes(), that.getBytes()); + } + + /** {@inheritDoc} */ + public int hashCode() { + return getAlgorithmName().hashCode() ^ Arrays.hashCode(getBytes()); + } +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/fs/FileStatus.java b/src/java/org/apache/hadoop/fs/FileStatus.java new file mode 100644 index 00000000000..124984658cd --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FileStatus.java @@ -0,0 +1,252 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; + +/** Interface that represents the client side information for a file. + */ +public class FileStatus implements Writable, Comparable { + + private Path path; + private long length; + private boolean isdir; + private short block_replication; + private long blocksize; + private long modification_time; + private long access_time; + private FsPermission permission; + private String owner; + private String group; + + public FileStatus() { this(0, false, 0, 0, 0, 0, null, null, null, null); } + + //We should deprecate this soon? + public FileStatus(long length, boolean isdir, int block_replication, + long blocksize, long modification_time, Path path) { + + this(length, isdir, block_replication, blocksize, modification_time, + 0, null, null, null, path); + } + + public FileStatus(long length, boolean isdir, int block_replication, + long blocksize, long modification_time, long access_time, + FsPermission permission, String owner, String group, + Path path) { + this.length = length; + this.isdir = isdir; + this.block_replication = (short)block_replication; + this.blocksize = blocksize; + this.modification_time = modification_time; + this.access_time = access_time; + this.permission = (permission == null) ? + FsPermission.getDefault() : permission; + this.owner = (owner == null) ? "" : owner; + this.group = (group == null) ? "" : group; + this.path = path; + } + + /* + * @return the length of this file, in blocks + */ + public long getLen() { + return length; + } + + /** + * Is this a directory? + * @return true if this is a directory + */ + public boolean isDir() { + return isdir; + } + + /** + * Get the block size of the file. + * @return the number of bytes + */ + public long getBlockSize() { + return blocksize; + } + + /** + * Get the replication factor of a file. + * @return the replication factor of a file. + */ + public short getReplication() { + return block_replication; + } + + /** + * Get the modification time of the file. + * @return the modification time of file in milliseconds since January 1, 1970 UTC. + */ + public long getModificationTime() { + return modification_time; + } + + /** + * Get the access time of the file. + * @return the access time of file in milliseconds since January 1, 1970 UTC. + */ + public long getAccessTime() { + return access_time; + } + + /** + * Get FsPermission associated with the file. + * @return permssion. If a filesystem does not have a notion of permissions + * or if permissions could not be determined, then default + * permissions equivalent of "rwxrwxrwx" is returned. + */ + public FsPermission getPermission() { + return permission; + } + + /** + * Get the owner of the file. + * @return owner of the file. The string could be empty if there is no + * notion of owner of a file in a filesystem or if it could not + * be determined (rare). + */ + public String getOwner() { + return owner; + } + + /** + * Get the group associated with the file. + * @return group for the file. The string could be empty if there is no + * notion of group of a file in a filesystem or if it could not + * be determined (rare). + */ + public String getGroup() { + return group; + } + + public Path getPath() { + return path; + } + + /* These are provided so that these values could be loaded lazily + * by a filesystem (e.g. local file system). + */ + + /** + * Sets permission. + * @param permission if permission is null, default value is set + */ + protected void setPermission(FsPermission permission) { + this.permission = (permission == null) ? + FsPermission.getDefault() : permission; + } + + /** + * Sets owner. + * @param owner if it is null, default value is set + */ + protected void setOwner(String owner) { + this.owner = (owner == null) ? "" : owner; + } + + /** + * Sets group. + * @param group if it is null, default value is set + */ + protected void setGroup(String group) { + this.group = (group == null) ? "" : group; + } + + ////////////////////////////////////////////////// + // Writable + ////////////////////////////////////////////////// + public void write(DataOutput out) throws IOException { + Text.writeString(out, getPath().toString()); + out.writeLong(length); + out.writeBoolean(isdir); + out.writeShort(block_replication); + out.writeLong(blocksize); + out.writeLong(modification_time); + out.writeLong(access_time); + permission.write(out); + Text.writeString(out, owner); + Text.writeString(out, group); + } + + public void readFields(DataInput in) throws IOException { + String strPath = Text.readString(in); + this.path = new Path(strPath); + this.length = in.readLong(); + this.isdir = in.readBoolean(); + this.block_replication = in.readShort(); + blocksize = in.readLong(); + modification_time = in.readLong(); + access_time = in.readLong(); + permission.readFields(in); + owner = Text.readString(in); + group = Text.readString(in); + } + + /** + * Compare this object to another object + * + * @param o the object to be compared. + * @return a negative integer, zero, or a positive integer as this object + * is less than, equal to, or greater than the specified object. + * + * @throws ClassCastException if the specified object's is not of + * type FileStatus + */ + public int compareTo(Object o) { + FileStatus other = (FileStatus)o; + return this.getPath().compareTo(other.getPath()); + } + + /** Compare if this object is equal to another object + * @param o the object to be compared. + * @return true if two file status has the same path name; false if not. + */ + public boolean equals(Object o) { + if (o == null) { + return false; + } + if (this == o) { + return true; + } + if (!(o instanceof FileStatus)) { + return false; + } + FileStatus other = (FileStatus)o; + return this.getPath().equals(other.getPath()); + } + + /** + * Returns a hash code value for the object, which is defined as + * the hash code of the path name. + * + * @return a hash code value for the path name. + */ + public int hashCode() { + return getPath().hashCode(); + } +} diff --git a/src/java/org/apache/hadoop/fs/FileSystem.java b/src/java/org/apache/hadoop/fs/FileSystem.java new file mode 100644 index 00000000000..fcc5817d27e --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FileSystem.java @@ -0,0 +1,1648 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.Closeable; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicLong; +import java.util.regex.Pattern; + +import javax.security.auth.login.LoginException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.MultipleIOException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.util.ReflectionUtils; + +/**************************************************************** + * An abstract base class for a fairly generic filesystem. It + * may be implemented as a distributed filesystem, or as a "local" + * one that reflects the locally-connected disk. The local version + * exists for small Hadoop instances and for testing. + * + *

+ * + * All user code that may potentially use the Hadoop Distributed + * File System should be written to use a FileSystem object. The + * Hadoop DFS is a multi-machine system that appears as a single + * disk. It's useful because of its fault tolerance and potentially + * very large capacity. + * + *

+ * The local implementation is {@link LocalFileSystem} and distributed + * implementation is DistributedFileSystem. + *****************************************************************/ +public abstract class FileSystem extends Configured implements Closeable { + private static final String FS_DEFAULT_NAME_KEY = "fs.default.name"; + + public static final Log LOG = LogFactory.getLog(FileSystem.class); + + /** FileSystem cache */ + private static final Cache CACHE = new Cache(); + + /** The key this instance is stored under in the cache. */ + private Cache.Key key; + + /** Recording statistics per a FileSystem class */ + private static final Map, Statistics> + statisticsTable = + new IdentityHashMap, Statistics>(); + + /** + * The statistics for this file system. + */ + protected Statistics statistics; + + /** + * A cache of files that should be deleted when filsystem is closed + * or the JVM is exited. + */ + private Set deleteOnExit = new TreeSet(); + + /** Returns the configured filesystem implementation.*/ + public static FileSystem get(Configuration conf) throws IOException { + return get(getDefaultUri(conf), conf); + } + + /** Get the default filesystem URI from a configuration. + * @param conf the configuration to access + * @return the uri of the default filesystem + */ + public static URI getDefaultUri(Configuration conf) { + return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, "file:///"))); + } + + /** Set the default filesystem URI in a configuration. + * @param conf the configuration to alter + * @param uri the new default filesystem uri + */ + public static void setDefaultUri(Configuration conf, URI uri) { + conf.set(FS_DEFAULT_NAME_KEY, uri.toString()); + } + + /** Set the default filesystem URI in a configuration. + * @param conf the configuration to alter + * @param uri the new default filesystem uri + */ + public static void setDefaultUri(Configuration conf, String uri) { + setDefaultUri(conf, URI.create(fixName(uri))); + } + + /** Called after a new FileSystem instance is constructed. + * @param name a uri whose authority section names the host, port, etc. + * for this FileSystem + * @param conf the configuration + */ + public void initialize(URI name, Configuration conf) throws IOException { + statistics = getStatistics(name.getScheme(), getClass()); + } + + /** Returns a URI whose scheme and authority identify this FileSystem.*/ + public abstract URI getUri(); + + /** Update old-format filesystem names, for back-compatibility. This should + * eventually be replaced with a checkName() method that throws an exception + * for old-format names. */ + private static String fixName(String name) { + // convert old-format name to new-format name + if (name.equals("local")) { // "local" is now "file:///". + LOG.warn("\"local\" is a deprecated filesystem name." + +" Use \"file:///\" instead."); + name = "file:///"; + } else if (name.indexOf('/')==-1) { // unqualified is "hdfs://" + LOG.warn("\""+name+"\" is a deprecated filesystem name." + +" Use \"hdfs://"+name+"/\" instead."); + name = "hdfs://"+name; + } + return name; + } + + /** + * Get the local file syste + * @param conf the configuration to configure the file system with + * @return a LocalFileSystem + */ + public static LocalFileSystem getLocal(Configuration conf) + throws IOException { + return (LocalFileSystem)get(LocalFileSystem.NAME, conf); + } + + /** Returns the FileSystem for this URI's scheme and authority. The scheme + * of the URI determines a configuration property name, + * fs.scheme.class whose value names the FileSystem class. + * The entire URI is passed to the FileSystem instance's initialize method. + */ + public static FileSystem get(URI uri, Configuration conf) throws IOException { + String scheme = uri.getScheme(); + String authority = uri.getAuthority(); + + if (scheme == null) { // no scheme: use default FS + return get(conf); + } + + if (authority == null) { // no authority + URI defaultUri = getDefaultUri(conf); + if (scheme.equals(defaultUri.getScheme()) // if scheme matches default + && defaultUri.getAuthority() != null) { // & default has authority + return get(defaultUri, conf); // return default + } + } + + return CACHE.get(uri, conf); + } + + /** Returns the FileSystem for this URI's scheme and authority. The scheme + * of the URI determines a configuration property name, + * fs.scheme.class whose value names the FileSystem class. + * The entire URI is passed to the FileSystem instance's initialize method. + * This always returns a new FileSystem object. + */ + public static FileSystem newInstance(URI uri, Configuration conf) throws IOException { + String scheme = uri.getScheme(); + String authority = uri.getAuthority(); + + if (scheme == null) { // no scheme: use default FS + return newInstance(conf); + } + + if (authority == null) { // no authority + URI defaultUri = getDefaultUri(conf); + if (scheme.equals(defaultUri.getScheme()) // if scheme matches default + && defaultUri.getAuthority() != null) { // & default has authority + return newInstance(defaultUri, conf); // return default + } + } + return CACHE.getUnique(uri, conf); + } + + /** Returns a unique configured filesystem implementation. + * This always returns a new FileSystem object. */ + public static FileSystem newInstance(Configuration conf) throws IOException { + return newInstance(getDefaultUri(conf), conf); + } + + /** + * Get a unique local file system object + * @param conf the configuration to configure the file system with + * @return a LocalFileSystem + * This always returns a new FileSystem object. + */ + public static LocalFileSystem newInstanceLocal(Configuration conf) + throws IOException { + return (LocalFileSystem)newInstance(LocalFileSystem.NAME, conf); + } + + private static class ClientFinalizer extends Thread { + public synchronized void run() { + try { + FileSystem.closeAll(); + } catch (IOException e) { + LOG.info("FileSystem.closeAll() threw an exception:\n" + e); + } + } + } + private static final ClientFinalizer clientFinalizer = new ClientFinalizer(); + + /** + * Close all cached filesystems. Be sure those filesystems are not + * used anymore. + * + * @throws IOException + */ + public static void closeAll() throws IOException { + CACHE.closeAll(); + } + + /** Make sure that a path specifies a FileSystem. */ + public Path makeQualified(Path path) { + checkPath(path); + return path.makeQualified(this); + } + + /** create a file with the provided permission + * The permission of the file is set to be the provided permission as in + * setPermission, not permission&~umask + * + * It is implemented using two RPCs. It is understood that it is inefficient, + * but the implementation is thread-safe. The other option is to change the + * value of umask in configuration to be 0, but it is not thread-safe. + * + * @param fs file system handle + * @param file the name of the file to be created + * @param permission the permission of the file + * @return an output stream + * @throws IOException + */ + public static FSDataOutputStream create(FileSystem fs, + Path file, FsPermission permission) throws IOException { + // create the file with default permission + FSDataOutputStream out = fs.create(file); + // set its permission to the supplied one + fs.setPermission(file, permission); + return out; + } + + /** create a directory with the provided permission + * The permission of the directory is set to be the provided permission as in + * setPermission, not permission&~umask + * + * @see #create(FileSystem, Path, FsPermission) + * + * @param fs file system handle + * @param dir the name of the directory to be created + * @param permission the permission of the directory + * @return true if the directory creation succeeds; false otherwise + * @throws IOException + */ + public static boolean mkdirs(FileSystem fs, Path dir, FsPermission permission) + throws IOException { + // create the directory using the default permission + boolean result = fs.mkdirs(dir); + // set its permission to be the supplied one + fs.setPermission(dir, permission); + return result; + } + + /////////////////////////////////////////////////////////////// + // FileSystem + /////////////////////////////////////////////////////////////// + + protected FileSystem() { + super(null); + } + + /** Check that a Path belongs to this FileSystem. */ + protected void checkPath(Path path) { + URI uri = path.toUri(); + if (uri.getScheme() == null) // fs is relative + return; + String thisScheme = this.getUri().getScheme(); + String thatScheme = uri.getScheme(); + String thisAuthority = this.getUri().getAuthority(); + String thatAuthority = uri.getAuthority(); + //authority and scheme are not case sensitive + if (thisScheme.equalsIgnoreCase(thatScheme)) {// schemes match + if (thisAuthority == thatAuthority || // & authorities match + (thisAuthority != null && + thisAuthority.equalsIgnoreCase(thatAuthority))) + return; + + if (thatAuthority == null && // path's authority is null + thisAuthority != null) { // fs has an authority + URI defaultUri = getDefaultUri(getConf()); // & is the conf default + if (thisScheme.equalsIgnoreCase(defaultUri.getScheme()) && + thisAuthority.equalsIgnoreCase(defaultUri.getAuthority())) + return; + try { // or the default fs's uri + defaultUri = get(getConf()).getUri(); + } catch (IOException e) { + throw new RuntimeException(e); + } + if (thisScheme.equalsIgnoreCase(defaultUri.getScheme()) && + thisAuthority.equalsIgnoreCase(defaultUri.getAuthority())) + return; + } + } + throw new IllegalArgumentException("Wrong FS: "+path+ + ", expected: "+this.getUri()); + } + + /** + * Return an array containing hostnames, offset and size of + * portions of the given file. For a nonexistent + * file or regions, null will be returned. + * + * This call is most helpful with DFS, where it returns + * hostnames of machines that contain the given file. + * + * The FileSystem will simply return an elt containing 'localhost'. + */ + public BlockLocation[] getFileBlockLocations(FileStatus file, + long start, long len) throws IOException { + if (file == null) { + return null; + } + + if ( (start<0) || (len < 0) ) { + throw new IllegalArgumentException("Invalid start or len parameter"); + } + + if (file.getLen() < start) { + return new BlockLocation[0]; + + } + String[] name = { "localhost:50010" }; + String[] host = { "localhost" }; + return new BlockLocation[] { new BlockLocation(name, host, 0, file.getLen()) }; + } + + /** + * Opens an FSDataInputStream at the indicated Path. + * @param f the file name to open + * @param bufferSize the size of the buffer to be used. + */ + public abstract FSDataInputStream open(Path f, int bufferSize) + throws IOException; + + /** + * Opens an FSDataInputStream at the indicated Path. + * @param f the file to open + */ + public FSDataInputStream open(Path f) throws IOException { + return open(f, getConf().getInt("io.file.buffer.size", 4096)); + } + + /** + * Opens an FSDataOutputStream at the indicated Path. + * Files are overwritten by default. + */ + public FSDataOutputStream create(Path f) throws IOException { + return create(f, true); + } + + /** + * Opens an FSDataOutputStream at the indicated Path. + */ + public FSDataOutputStream create(Path f, boolean overwrite) + throws IOException { + return create(f, overwrite, + getConf().getInt("io.file.buffer.size", 4096), + getDefaultReplication(), + getDefaultBlockSize()); + } + + /** + * Create an FSDataOutputStream at the indicated Path with write-progress + * reporting. + * Files are overwritten by default. + */ + public FSDataOutputStream create(Path f, Progressable progress) throws IOException { + return create(f, true, + getConf().getInt("io.file.buffer.size", 4096), + getDefaultReplication(), + getDefaultBlockSize(), progress); + } + + /** + * Opens an FSDataOutputStream at the indicated Path. + * Files are overwritten by default. + */ + public FSDataOutputStream create(Path f, short replication) + throws IOException { + return create(f, true, + getConf().getInt("io.file.buffer.size", 4096), + replication, + getDefaultBlockSize()); + } + + /** + * Opens an FSDataOutputStream at the indicated Path with write-progress + * reporting. + * Files are overwritten by default. + */ + public FSDataOutputStream create(Path f, short replication, Progressable progress) + throws IOException { + return create(f, true, + getConf().getInt("io.file.buffer.size", 4096), + replication, + getDefaultBlockSize(), progress); + } + + + /** + * Opens an FSDataOutputStream at the indicated Path. + * @param f the file name to open + * @param overwrite if a file with this name already exists, then if true, + * the file will be overwritten, and if false an error will be thrown. + * @param bufferSize the size of the buffer to be used. + */ + public FSDataOutputStream create(Path f, + boolean overwrite, + int bufferSize + ) throws IOException { + return create(f, overwrite, bufferSize, + getDefaultReplication(), + getDefaultBlockSize()); + } + + /** + * Opens an FSDataOutputStream at the indicated Path with write-progress + * reporting. + * @param f the file name to open + * @param overwrite if a file with this name already exists, then if true, + * the file will be overwritten, and if false an error will be thrown. + * @param bufferSize the size of the buffer to be used. + */ + public FSDataOutputStream create(Path f, + boolean overwrite, + int bufferSize, + Progressable progress + ) throws IOException { + return create(f, overwrite, bufferSize, + getDefaultReplication(), + getDefaultBlockSize(), progress); + } + + + /** + * Opens an FSDataOutputStream at the indicated Path. + * @param f the file name to open + * @param overwrite if a file with this name already exists, then if true, + * the file will be overwritten, and if false an error will be thrown. + * @param bufferSize the size of the buffer to be used. + * @param replication required block replication for the file. + */ + public FSDataOutputStream create(Path f, + boolean overwrite, + int bufferSize, + short replication, + long blockSize + ) throws IOException { + return create(f, overwrite, bufferSize, replication, blockSize, null); + } + + /** + * Opens an FSDataOutputStream at the indicated Path with write-progress + * reporting. + * @param f the file name to open + * @param overwrite if a file with this name already exists, then if true, + * the file will be overwritten, and if false an error will be thrown. + * @param bufferSize the size of the buffer to be used. + * @param replication required block replication for the file. + */ + public FSDataOutputStream create(Path f, + boolean overwrite, + int bufferSize, + short replication, + long blockSize, + Progressable progress + ) throws IOException { + return this.create(f, FsPermission.getDefault(), + overwrite, bufferSize, replication, blockSize, progress); + } + + /** + * Opens an FSDataOutputStream at the indicated Path with write-progress + * reporting. + * @param f the file name to open + * @param permission + * @param overwrite if a file with this name already exists, then if true, + * the file will be overwritten, and if false an error will be thrown. + * @param bufferSize the size of the buffer to be used. + * @param replication required block replication for the file. + * @param blockSize + * @param progress + * @throws IOException + * @see #setPermission(Path, FsPermission) + */ + public abstract FSDataOutputStream create(Path f, + FsPermission permission, + boolean overwrite, + int bufferSize, + short replication, + long blockSize, + Progressable progress) throws IOException; + + /** + * Creates the given Path as a brand-new zero-length file. If + * create fails, or if it already existed, return false. + */ + public boolean createNewFile(Path f) throws IOException { + if (exists(f)) { + return false; + } else { + create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close(); + return true; + } + } + + /** + * Append to an existing file (optional operation). + * Same as append(f, getConf().getInt("io.file.buffer.size", 4096), null) + * @param f the existing file to be appended. + * @throws IOException + */ + public FSDataOutputStream append(Path f) throws IOException { + return append(f, getConf().getInt("io.file.buffer.size", 4096), null); + } + /** + * Append to an existing file (optional operation). + * Same as append(f, bufferSize, null). + * @param f the existing file to be appended. + * @param bufferSize the size of the buffer to be used. + * @throws IOException + */ + public FSDataOutputStream append(Path f, int bufferSize) throws IOException { + return append(f, bufferSize, null); + } + + /** + * Append to an existing file (optional operation). + * @param f the existing file to be appended. + * @param bufferSize the size of the buffer to be used. + * @param progress for reporting progress if it is not null. + * @throws IOException + */ + public abstract FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException; + + /** + * Set replication for an existing file. + * + * @param src file name + * @param replication new replication + * @throws IOException + * @return true if successful; + * false if file does not exist or is a directory + */ + public boolean setReplication(Path src, short replication) + throws IOException { + return true; + } + + /** + * Renames Path src to Path dst. Can take place on local fs + * or remote DFS. + */ + public abstract boolean rename(Path src, Path dst) throws IOException; + + /** Delete a file. + * + * @param f the path to delete. + * @param recursive if path is a directory and set to + * true, the directory is deleted else throws an exception. In + * case of a file the recursive can be set to either true or false. + * @return true if delete is successful else false. + * @throws IOException + */ + public abstract boolean delete(Path f, boolean recursive) throws IOException; + + /** + * Mark a path to be deleted when FileSystem is closed. + * When the JVM shuts down, + * all FileSystem objects will be closed automatically. + * Then, + * the marked path will be deleted as a result of closing the FileSystem. + * + * The path has to exist in the file system. + * + * @param f the path to delete. + * @return true if deleteOnExit is successful, otherwise false. + * @throws IOException + */ + public boolean deleteOnExit(Path f) throws IOException { + if (!exists(f)) { + return false; + } + synchronized (deleteOnExit) { + deleteOnExit.add(f); + } + return true; + } + + /** + * Delete all files that were marked as delete-on-exit. This recursively + * deletes all files in the specified paths. + */ + protected void processDeleteOnExit() { + synchronized (deleteOnExit) { + for (Iterator iter = deleteOnExit.iterator(); iter.hasNext();) { + Path path = iter.next(); + try { + delete(path, true); + } + catch (IOException e) { + LOG.info("Ignoring failure to deleteOnExit for path " + path); + } + iter.remove(); + } + } + } + + /** Check if exists. + * @param f source file + */ + public boolean exists(Path f) throws IOException { + try { + return getFileStatus(f) != null; + } catch (FileNotFoundException e) { + return false; + } + } + + /** True iff the named path is a directory. + * Note: Avoid using this method. Instead reuse the FileStatus + * returned by getFileStatus() or listStatus() methods. + */ + public boolean isDirectory(Path f) throws IOException { + try { + return getFileStatus(f).isDir(); + } catch (FileNotFoundException e) { + return false; // f does not exist + } + } + + /** True iff the named path is a regular file. + * Note: Avoid using this method. Instead reuse the FileStatus + * returned by getFileStatus() or listStatus() methods. + */ + public boolean isFile(Path f) throws IOException { + try { + return !getFileStatus(f).isDir(); + } catch (FileNotFoundException e) { + return false; // f does not exist + } + } + + /** Return the {@link ContentSummary} of a given {@link Path}. */ + public ContentSummary getContentSummary(Path f) throws IOException { + FileStatus status = getFileStatus(f); + if (!status.isDir()) { + // f is a file + return new ContentSummary(status.getLen(), 1, 0); + } + // f is a directory + long[] summary = {0, 0, 1}; + for(FileStatus s : listStatus(f)) { + ContentSummary c = s.isDir() ? getContentSummary(s.getPath()) : + new ContentSummary(s.getLen(), 1, 0); + summary[0] += c.getLength(); + summary[1] += c.getFileCount(); + summary[2] += c.getDirectoryCount(); + } + return new ContentSummary(summary[0], summary[1], summary[2]); + } + + final private static PathFilter DEFAULT_FILTER = new PathFilter() { + public boolean accept(Path file) { + return true; + } + }; + + /** + * List the statuses of the files/directories in the given path if the path is + * a directory. + * + * @param f + * given path + * @return the statuses of the files/directories in the given patch + * @throws IOException + */ + public abstract FileStatus[] listStatus(Path f) throws IOException; + + /* + * Filter files/directories in the given path using the user-supplied path + * filter. Results are added to the given array results. + */ + private void listStatus(ArrayList results, Path f, + PathFilter filter) throws IOException { + FileStatus listing[] = listStatus(f); + if (listing != null) { + for (int i = 0; i < listing.length; i++) { + if (filter.accept(listing[i].getPath())) { + results.add(listing[i]); + } + } + } + } + + /** + * Filter files/directories in the given path using the user-supplied path + * filter. + * + * @param f + * a path name + * @param filter + * the user-supplied path filter + * @return an array of FileStatus objects for the files under the given path + * after applying the filter + * @throws IOException + * if encounter any problem while fetching the status + */ + public FileStatus[] listStatus(Path f, PathFilter filter) throws IOException { + ArrayList results = new ArrayList(); + listStatus(results, f, filter); + return results.toArray(new FileStatus[results.size()]); + } + + /** + * Filter files/directories in the given list of paths using default + * path filter. + * + * @param files + * a list of paths + * @return a list of statuses for the files under the given paths after + * applying the filter default Path filter + * @exception IOException + */ + public FileStatus[] listStatus(Path[] files) + throws IOException { + return listStatus(files, DEFAULT_FILTER); + } + + /** + * Filter files/directories in the given list of paths using user-supplied + * path filter. + * + * @param files + * a list of paths + * @param filter + * the user-supplied path filter + * @return a list of statuses for the files under the given paths after + * applying the filter + * @exception IOException + */ + public FileStatus[] listStatus(Path[] files, PathFilter filter) + throws IOException { + ArrayList results = new ArrayList(); + for (int i = 0; i < files.length; i++) { + listStatus(results, files[i], filter); + } + return results.toArray(new FileStatus[results.size()]); + } + + /** + *

Return all the files that match filePattern and are not checksum + * files. Results are sorted by their names. + * + *

+ * A filename pattern is composed of regular characters and + * special pattern matching characters, which are: + * + *

+ *
+ *
+ *

+ *

? + *
Matches any single character. + * + *

+ *

* + *
Matches zero or more characters. + * + *

+ *

[abc] + *
Matches a single character from character set + * {a,b,c}. + * + *

+ *

[a-b] + *
Matches a single character from the character range + * {a...b}. Note that character a must be + * lexicographically less than or equal to character b. + * + *

+ *

[^a] + *
Matches a single character that is not from character set or range + * {a}. Note that the ^ character must occur + * immediately to the right of the opening bracket. + * + *

+ *

\c + *
Removes (escapes) any special meaning of character c. + * + *

+ *

{ab,cd} + *
Matches a string from the string set {ab, cd} + * + *

+ *

{ab,c{de,fh}} + *
Matches a string from the string set {ab, cde, cfh} + * + *
+ *
+ *
+ * + * @param pathPattern a regular expression specifying a pth pattern + + * @return an array of paths that match the path pattern + * @throws IOException + */ + public FileStatus[] globStatus(Path pathPattern) throws IOException { + return globStatus(pathPattern, DEFAULT_FILTER); + } + + /** + * Return an array of FileStatus objects whose path names match pathPattern + * and is accepted by the user-supplied path filter. Results are sorted by + * their path names. + * Return null if pathPattern has no glob and the path does not exist. + * Return an empty array if pathPattern has a glob and no path matches it. + * + * @param pathPattern + * a regular expression specifying the path pattern + * @param filter + * a user-supplied path filter + * @return an array of FileStatus objects + * @throws IOException if any I/O error occurs when fetching file status + */ + public FileStatus[] globStatus(Path pathPattern, PathFilter filter) + throws IOException { + String filename = pathPattern.toUri().getPath(); + List filePatterns = GlobExpander.expand(filename); + if (filePatterns.size() == 1) { + return globStatusInternal(pathPattern, filter); + } else { + List results = new ArrayList(); + for (String filePattern : filePatterns) { + FileStatus[] files = globStatusInternal(new Path(filePattern), filter); + for (FileStatus file : files) { + results.add(file); + } + } + return results.toArray(new FileStatus[results.size()]); + } + } + + private FileStatus[] globStatusInternal(Path pathPattern, PathFilter filter) + throws IOException { + Path[] parents = new Path[1]; + int level = 0; + String filename = pathPattern.toUri().getPath(); + + // path has only zero component + if ("".equals(filename) || Path.SEPARATOR.equals(filename)) { + return getFileStatus(new Path[]{pathPattern}); + } + + // path has at least one component + String[] components = filename.split(Path.SEPARATOR); + // get the first component + if (pathPattern.isAbsolute()) { + parents[0] = new Path(Path.SEPARATOR); + level = 1; + } else { + parents[0] = new Path(Path.CUR_DIR); + } + + // glob the paths that match the parent path, i.e., [0, components.length-1] + boolean[] hasGlob = new boolean[]{false}; + Path[] parentPaths = globPathsLevel(parents, components, level, hasGlob); + FileStatus[] results; + if (parentPaths == null || parentPaths.length == 0) { + results = null; + } else { + // Now work on the last component of the path + GlobFilter fp = new GlobFilter(components[components.length - 1], filter); + if (fp.hasPattern()) { // last component has a pattern + // list parent directories and then glob the results + results = listStatus(parentPaths, fp); + hasGlob[0] = true; + } else { // last component does not have a pattern + // get all the path names + ArrayList filteredPaths = new ArrayList(parentPaths.length); + for (int i = 0; i < parentPaths.length; i++) { + parentPaths[i] = new Path(parentPaths[i], + components[components.length - 1]); + if (fp.accept(parentPaths[i])) { + filteredPaths.add(parentPaths[i]); + } + } + // get all their statuses + results = getFileStatus( + filteredPaths.toArray(new Path[filteredPaths.size()])); + } + } + + // Decide if the pathPattern contains a glob or not + if (results == null) { + if (hasGlob[0]) { + results = new FileStatus[0]; + } + } else { + if (results.length == 0 ) { + if (!hasGlob[0]) { + results = null; + } + } else { + Arrays.sort(results); + } + } + return results; + } + + /* + * For a path of N components, return a list of paths that match the + * components [level, N-1]. + */ + private Path[] globPathsLevel(Path[] parents, String[] filePattern, + int level, boolean[] hasGlob) throws IOException { + if (level == filePattern.length - 1) + return parents; + if (parents == null || parents.length == 0) { + return null; + } + GlobFilter fp = new GlobFilter(filePattern[level]); + if (fp.hasPattern()) { + parents = FileUtil.stat2Paths(listStatus(parents, fp)); + hasGlob[0] = true; + } else { + for (int i = 0; i < parents.length; i++) { + parents[i] = new Path(parents[i], filePattern[level]); + } + } + return globPathsLevel(parents, filePattern, level + 1, hasGlob); + } + + /* A class that could decide if a string matches the glob or not */ + private static class GlobFilter implements PathFilter { + private PathFilter userFilter = DEFAULT_FILTER; + private Pattern regex; + private boolean hasPattern = false; + + /** Default pattern character: Escape any special meaning. */ + private static final char PAT_ESCAPE = '\\'; + /** Default pattern character: Any single character. */ + private static final char PAT_ANY = '.'; + /** Default pattern character: Character set close. */ + private static final char PAT_SET_CLOSE = ']'; + + GlobFilter() { + } + + GlobFilter(String filePattern) throws IOException { + setRegex(filePattern); + } + + GlobFilter(String filePattern, PathFilter filter) throws IOException { + userFilter = filter; + setRegex(filePattern); + } + + private boolean isJavaRegexSpecialChar(char pChar) { + return pChar == '.' || pChar == '$' || pChar == '(' || pChar == ')' || + pChar == '|' || pChar == '+'; + } + void setRegex(String filePattern) throws IOException { + int len; + int setOpen; + int curlyOpen; + boolean setRange; + + StringBuilder fileRegex = new StringBuilder(); + + // Validate the pattern + len = filePattern.length(); + if (len == 0) + return; + + setOpen = 0; + setRange = false; + curlyOpen = 0; + + for (int i = 0; i < len; i++) { + char pCh; + + // Examine a single pattern character + pCh = filePattern.charAt(i); + if (pCh == PAT_ESCAPE) { + fileRegex.append(pCh); + i++; + if (i >= len) + error("An escaped character does not present", filePattern, i); + pCh = filePattern.charAt(i); + } else if (isJavaRegexSpecialChar(pCh)) { + fileRegex.append(PAT_ESCAPE); + } else if (pCh == '*') { + fileRegex.append(PAT_ANY); + hasPattern = true; + } else if (pCh == '?') { + pCh = PAT_ANY; + hasPattern = true; + } else if (pCh == '{') { + fileRegex.append('('); + pCh = '('; + curlyOpen++; + hasPattern = true; + } else if (pCh == ',' && curlyOpen > 0) { + fileRegex.append(")|"); + pCh = '('; + } else if (pCh == '}' && curlyOpen > 0) { + // End of a group + curlyOpen--; + fileRegex.append(")"); + pCh = ')'; + } else if (pCh == '[' && setOpen == 0) { + setOpen++; + hasPattern = true; + } else if (pCh == '^' && setOpen > 0) { + } else if (pCh == '-' && setOpen > 0) { + // Character set range + setRange = true; + } else if (pCh == PAT_SET_CLOSE && setRange) { + // Incomplete character set range + error("Incomplete character set range", filePattern, i); + } else if (pCh == PAT_SET_CLOSE && setOpen > 0) { + // End of a character set + if (setOpen < 2) + error("Unexpected end of set", filePattern, i); + setOpen = 0; + } else if (setOpen > 0) { + // Normal character, or the end of a character set range + setOpen++; + setRange = false; + } + fileRegex.append(pCh); + } + + // Check for a well-formed pattern + if (setOpen > 0 || setRange || curlyOpen > 0) { + // Incomplete character set or character range + error("Expecting set closure character or end of range, or }", + filePattern, len); + } + regex = Pattern.compile(fileRegex.toString()); + } + + boolean hasPattern() { + return hasPattern; + } + + public boolean accept(Path path) { + return regex.matcher(path.getName()).matches() && userFilter.accept(path); + } + + private void error(String s, String pattern, int pos) throws IOException { + throw new IOException("Illegal file pattern: " + +s+ " for glob "+ pattern + " at " + pos); + } + } + + /** Return the current user's home directory in this filesystem. + * The default implementation returns "/user/$USER/". + */ + public Path getHomeDirectory() { + return new Path("/user/"+System.getProperty("user.name")) + .makeQualified(this); + } + + + /** + * Set the current working directory for the given file system. All relative + * paths will be resolved relative to it. + * + * @param new_dir + */ + public abstract void setWorkingDirectory(Path new_dir); + + /** + * Get the current working directory for the given file system + * @return the directory pathname + */ + public abstract Path getWorkingDirectory(); + + /** + * Call {@link #mkdirs(Path, FsPermission)} with default permission. + */ + public boolean mkdirs(Path f) throws IOException { + return mkdirs(f, FsPermission.getDefault()); + } + + /** + * Make the given file and all non-existent parents into + * directories. Has the semantics of Unix 'mkdir -p'. + * Existence of the directory hierarchy is not an error. + */ + public abstract boolean mkdirs(Path f, FsPermission permission + ) throws IOException; + + /** + * The src file is on the local disk. Add it to FS at + * the given dst name and the source is kept intact afterwards + */ + public void copyFromLocalFile(Path src, Path dst) + throws IOException { + copyFromLocalFile(false, src, dst); + } + + /** + * The src files is on the local disk. Add it to FS at + * the given dst name, removing the source afterwards. + */ + public void moveFromLocalFile(Path[] srcs, Path dst) + throws IOException { + copyFromLocalFile(true, true, srcs, dst); + } + + /** + * The src file is on the local disk. Add it to FS at + * the given dst name, removing the source afterwards. + */ + public void moveFromLocalFile(Path src, Path dst) + throws IOException { + copyFromLocalFile(true, src, dst); + } + + /** + * The src file is on the local disk. Add it to FS at + * the given dst name. + * delSrc indicates if the source should be removed + */ + public void copyFromLocalFile(boolean delSrc, Path src, Path dst) + throws IOException { + copyFromLocalFile(delSrc, true, src, dst); + } + + /** + * The src files are on the local disk. Add it to FS at + * the given dst name. + * delSrc indicates if the source should be removed + */ + public void copyFromLocalFile(boolean delSrc, boolean overwrite, + Path[] srcs, Path dst) + throws IOException { + Configuration conf = getConf(); + FileUtil.copy(getLocal(conf), srcs, this, dst, delSrc, overwrite, conf); + } + + /** + * The src file is on the local disk. Add it to FS at + * the given dst name. + * delSrc indicates if the source should be removed + */ + public void copyFromLocalFile(boolean delSrc, boolean overwrite, + Path src, Path dst) + throws IOException { + Configuration conf = getConf(); + FileUtil.copy(getLocal(conf), src, this, dst, delSrc, overwrite, conf); + } + + /** + * The src file is under FS, and the dst is on the local disk. + * Copy it from FS control to the local dst name. + */ + public void copyToLocalFile(Path src, Path dst) throws IOException { + copyToLocalFile(false, src, dst); + } + + /** + * The src file is under FS, and the dst is on the local disk. + * Copy it from FS control to the local dst name. + * Remove the source afterwards + */ + public void moveToLocalFile(Path src, Path dst) throws IOException { + copyToLocalFile(true, src, dst); + } + + /** + * The src file is under FS, and the dst is on the local disk. + * Copy it from FS control to the local dst name. + * delSrc indicates if the src will be removed or not. + */ + public void copyToLocalFile(boolean delSrc, Path src, Path dst) + throws IOException { + FileUtil.copy(this, src, getLocal(getConf()), dst, delSrc, getConf()); + } + + /** + * Returns a local File that the user can write output to. The caller + * provides both the eventual FS target name and the local working + * file. If the FS is local, we write directly into the target. If + * the FS is remote, we write into the tmp local area. + */ + public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + return tmpLocalFile; + } + + /** + * Called when we're all done writing to the target. A local FS will + * do nothing, because we've written to exactly the right place. A remote + * FS will copy the contents of tmpLocalFile to the correct target at + * fsOutputFile. + */ + public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + moveFromLocalFile(tmpLocalFile, fsOutputFile); + } + + /** + * No more filesystem operations are needed. Will + * release any held locks. + */ + public void close() throws IOException { + // delete all files that were marked as delete-on-exit. + processDeleteOnExit(); + CACHE.remove(this.key, this); + } + + /** Return the total size of all files in the filesystem.*/ + public long getUsed() throws IOException{ + long used = 0; + FileStatus[] files = listStatus(new Path("/")); + for(FileStatus file:files){ + used += file.getLen(); + } + return used; + } + + /** Return the number of bytes that large input files should be optimally + * be split into to minimize i/o time. */ + public long getDefaultBlockSize() { + // default to 32MB: large enough to minimize the impact of seeks + return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024); + } + + /** + * Get the default replication. + */ + public short getDefaultReplication() { return 1; } + + /** + * Return a file status object that represents the path. + * @param f The path we want information from + * @return a FileStatus object + * @throws FileNotFoundException when the path does not exist; + * IOException see specific implementation + */ + public abstract FileStatus getFileStatus(Path f) throws IOException; + + /** + * Get the checksum of a file. + * + * @param f The file path + * @return The file checksum. The default return value is null, + * which indicates that no checksum algorithm is implemented + * in the corresponding FileSystem. + */ + public FileChecksum getFileChecksum(Path f) throws IOException { + return null; + } + + /** + * Set the verify checksum flag. This is only applicable if the + * corresponding FileSystem supports checksum. By default doesn't do anything. + * @param verifyChecksum + */ + public void setVerifyChecksum(boolean verifyChecksum) { + //doesn't do anything + } + + /** + * Return a list of file status objects that corresponds to the list of paths + * excluding those non-existent paths. + * + * @param paths + * the list of paths we want information from + * @return a list of FileStatus objects + * @throws IOException + * see specific implementation + */ + private FileStatus[] getFileStatus(Path[] paths) throws IOException { + if (paths == null) { + return null; + } + ArrayList results = new ArrayList(paths.length); + for (int i = 0; i < paths.length; i++) { + try { + results.add(getFileStatus(paths[i])); + } catch (FileNotFoundException e) { // do nothing + } + } + return results.toArray(new FileStatus[results.size()]); + } + + /** + * Returns a status object describing the use and capacity of the + * file system. If the file system has multiple partitions, the + * use and capacity of the root partition is reflected. + * + * @return a FsStatus object + * @throws IOException + * see specific implementation + */ + public FsStatus getStatus() throws IOException { + return getStatus(null); + } + + /** + * Returns a status object describing the use and capacity of the + * file system. If the file system has multiple partitions, the + * use and capacity of the partition pointed to by the specified + * path is reflected. + * @param p Path for which status should be obtained. null means + * the default partition. + * @return a FsStatus object + * @throws IOException + * see specific implementation + */ + public FsStatus getStatus(Path p) throws IOException { + return new FsStatus(Long.MAX_VALUE, 0, Long.MAX_VALUE); + } + + /** + * Set permission of a path. + * @param p + * @param permission + */ + public void setPermission(Path p, FsPermission permission + ) throws IOException { + } + + /** + * Set owner of a path (i.e. a file or a directory). + * The parameters username and groupname cannot both be null. + * @param p The path + * @param username If it is null, the original username remains unchanged. + * @param groupname If it is null, the original groupname remains unchanged. + */ + public void setOwner(Path p, String username, String groupname + ) throws IOException { + } + + /** + * Set access time of a file + * @param p The path + * @param mtime Set the modification time of this file. + * The number of milliseconds since Jan 1, 1970. + * A value of -1 means that this call should not set modification time. + * @param atime Set the access time of this file. + * The number of milliseconds since Jan 1, 1970. + * A value of -1 means that this call should not set access time. + */ + public void setTimes(Path p, long mtime, long atime + ) throws IOException { + } + + private static FileSystem createFileSystem(URI uri, Configuration conf + ) throws IOException { + Class clazz = conf.getClass("fs." + uri.getScheme() + ".impl", null); + if (clazz == null) { + throw new IOException("No FileSystem for scheme: " + uri.getScheme()); + } + FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf); + fs.initialize(uri, conf); + return fs; + } + + /** Caching FileSystem objects */ + static class Cache { + private final Map map = new HashMap(); + + /** A variable that makes all objects in the cache unique */ + private static AtomicLong unique = new AtomicLong(1); + + synchronized FileSystem get(URI uri, Configuration conf) throws IOException{ + Key key = new Key(uri, conf); + return getInternal(uri, conf, key); + } + + /** The objects inserted into the cache using this method are all unique */ + synchronized FileSystem getUnique(URI uri, Configuration conf) throws IOException{ + Key key = new Key(uri, conf, unique.getAndIncrement()); + return getInternal(uri, conf, key); + } + + private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{ + FileSystem fs = map.get(key); + if (fs == null) { + fs = createFileSystem(uri, conf); + if (map.isEmpty() && !clientFinalizer.isAlive()) { + Runtime.getRuntime().addShutdownHook(clientFinalizer); + } + fs.key = key; + map.put(key, fs); + } + return fs; + } + + synchronized void remove(Key key, FileSystem fs) { + if (map.containsKey(key) && fs == map.get(key)) { + map.remove(key); + if (map.isEmpty() && !clientFinalizer.isAlive()) { + if (!Runtime.getRuntime().removeShutdownHook(clientFinalizer)) { + LOG.info("Could not cancel cleanup thread, though no " + + "FileSystems are open"); + } + } + } + } + + synchronized void closeAll() throws IOException { + List exceptions = new ArrayList(); + for(; !map.isEmpty(); ) { + Map.Entry e = map.entrySet().iterator().next(); + final Key key = e.getKey(); + final FileSystem fs = e.getValue(); + + //remove from cache + remove(key, fs); + + if (fs != null) { + try { + fs.close(); + } + catch(IOException ioe) { + exceptions.add(ioe); + } + } + } + + if (!exceptions.isEmpty()) { + throw MultipleIOException.createIOException(exceptions); + } + } + + /** FileSystem.Cache.Key */ + static class Key { + final String scheme; + final String authority; + final String username; + final long unique; // an artificial way to make a key unique + + Key(URI uri, Configuration conf) throws IOException { + this(uri, conf, 0); + } + + Key(URI uri, Configuration conf, long unique) throws IOException { + scheme = uri.getScheme()==null?"":uri.getScheme().toLowerCase(); + authority = uri.getAuthority()==null?"":uri.getAuthority().toLowerCase(); + this.unique = unique; + UserGroupInformation ugi = UserGroupInformation.readFrom(conf); + if (ugi == null) { + try { + ugi = UserGroupInformation.login(conf); + } catch(LoginException e) { + LOG.warn("uri=" + uri, e); + } + } + username = ugi == null? null: ugi.getUserName(); + } + + /** {@inheritDoc} */ + public int hashCode() { + return (scheme + authority + username).hashCode() + (int)unique; + } + + static boolean isEqual(Object a, Object b) { + return a == b || (a != null && a.equals(b)); + } + + /** {@inheritDoc} */ + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (obj != null && obj instanceof Key) { + Key that = (Key)obj; + return isEqual(this.scheme, that.scheme) + && isEqual(this.authority, that.authority) + && isEqual(this.username, that.username) + && (this.unique == that.unique); + } + return false; + } + + /** {@inheritDoc} */ + public String toString() { + return username + "@" + scheme + "://" + authority; + } + } + } + + public static final class Statistics { + private final String scheme; + private AtomicLong bytesRead = new AtomicLong(); + private AtomicLong bytesWritten = new AtomicLong(); + + public Statistics(String scheme) { + this.scheme = scheme; + } + + /** + * Increment the bytes read in the statistics + * @param newBytes the additional bytes read + */ + public void incrementBytesRead(long newBytes) { + bytesRead.getAndAdd(newBytes); + } + + /** + * Increment the bytes written in the statistics + * @param newBytes the additional bytes written + */ + public void incrementBytesWritten(long newBytes) { + bytesWritten.getAndAdd(newBytes); + } + + /** + * Get the total number of bytes read + * @return the number of bytes + */ + public long getBytesRead() { + return bytesRead.get(); + } + + /** + * Get the total number of bytes written + * @return the number of bytes + */ + public long getBytesWritten() { + return bytesWritten.get(); + } + + public String toString() { + return bytesRead + " bytes read and " + bytesWritten + + " bytes written"; + } + + /** + * Reset the counts of bytes to 0. + */ + public void reset() { + bytesWritten.set(0); + bytesRead.set(0); + } + + /** + * Get the uri scheme associated with this statistics object. + * @return the schema associated with this set of statistics + */ + public String getScheme() { + return scheme; + } + } + + /** + * Get the Map of Statistics object indexed by URI Scheme. + * @return a Map having a key as URI scheme and value as Statistics object + * @deprecated use {@link #getAllStatistics} instead + */ + @Deprecated + public static synchronized Map getStatistics() { + Map result = new HashMap(); + for(Statistics stat: statisticsTable.values()) { + result.put(stat.getScheme(), stat); + } + return result; + } + + /** + * Return the FileSystem classes that have Statistics + */ + public static synchronized List getAllStatistics() { + return new ArrayList(statisticsTable.values()); + } + + /** + * Get the statistics for a particular file system + * @param cls the class to lookup + * @return a statistics object + */ + public static synchronized + Statistics getStatistics(String scheme, Class cls) { + Statistics result = statisticsTable.get(cls); + if (result == null) { + result = new Statistics(scheme); + statisticsTable.put(cls, result); + } + return result; + } + + public static synchronized void clearStatistics() { + for(Statistics stat: statisticsTable.values()) { + stat.reset(); + } + } + + public static synchronized + void printStatistics() throws IOException { + for (Map.Entry, Statistics> pair: + statisticsTable.entrySet()) { + System.out.println(" FileSystem " + pair.getKey().getName() + + ": " + pair.getValue()); + } + } +} diff --git a/src/java/org/apache/hadoop/fs/FileUtil.java b/src/java/org/apache/hadoop/fs/FileUtil.java new file mode 100644 index 00000000000..d1b1d0b89f8 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FileUtil.java @@ -0,0 +1,794 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.*; +import java.util.Enumeration; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.util.Shell.ShellCommandExecutor; +import org.mortbay.log.Log; + +/** + * A collection of file-processing util methods + */ +public class FileUtil { + /** + * convert an array of FileStatus to an array of Path + * + * @param stats + * an array of FileStatus objects + * @return an array of paths corresponding to the input + */ + public static Path[] stat2Paths(FileStatus[] stats) { + if (stats == null) + return null; + Path[] ret = new Path[stats.length]; + for (int i = 0; i < stats.length; ++i) { + ret[i] = stats[i].getPath(); + } + return ret; + } + + /** + * convert an array of FileStatus to an array of Path. + * If stats if null, return path + * @param stats + * an array of FileStatus objects + * @param path + * default path to return in stats is null + * @return an array of paths corresponding to the input + */ + public static Path[] stat2Paths(FileStatus[] stats, Path path) { + if (stats == null) + return new Path[]{path}; + else + return stat2Paths(stats); + } + + /** + * Delete a directory and all its contents. If + * we return false, the directory may be partially-deleted. + */ + public static boolean fullyDelete(File dir) throws IOException { + File contents[] = dir.listFiles(); + if (contents != null) { + for (int i = 0; i < contents.length; i++) { + if (contents[i].isFile()) { + if (!contents[i].delete()) { + return false; + } + } else { + //try deleting the directory + // this might be a symlink + boolean b = false; + b = contents[i].delete(); + if (b){ + //this was indeed a symlink or an empty directory + continue; + } + // if not an empty directory or symlink let + // fullydelete handle it. + if (!fullyDelete(contents[i])) { + return false; + } + } + } + } + return dir.delete(); + } + + /** + * Recursively delete a directory. + * + * @param fs {@link FileSystem} on which the path is present + * @param dir directory to recursively delete + * @throws IOException + * @deprecated Use {@link FileSystem#delete(Path, boolean)} + */ + @Deprecated + public static void fullyDelete(FileSystem fs, Path dir) + throws IOException { + fs.delete(dir, true); + } + + // + // If the destination is a subdirectory of the source, then + // generate exception + // + private static void checkDependencies(FileSystem srcFS, + Path src, + FileSystem dstFS, + Path dst) + throws IOException { + if (srcFS == dstFS) { + String srcq = src.makeQualified(srcFS).toString() + Path.SEPARATOR; + String dstq = dst.makeQualified(dstFS).toString() + Path.SEPARATOR; + if (dstq.startsWith(srcq)) { + if (srcq.length() == dstq.length()) { + throw new IOException("Cannot copy " + src + " to itself."); + } else { + throw new IOException("Cannot copy " + src + " to its subdirectory " + + dst); + } + } + } + } + + /** Copy files between FileSystems. */ + public static boolean copy(FileSystem srcFS, Path src, + FileSystem dstFS, Path dst, + boolean deleteSource, + Configuration conf) throws IOException { + return copy(srcFS, src, dstFS, dst, deleteSource, true, conf); + } + + public static boolean copy(FileSystem srcFS, Path[] srcs, + FileSystem dstFS, Path dst, + boolean deleteSource, + boolean overwrite, Configuration conf) + throws IOException { + boolean gotException = false; + boolean returnVal = true; + StringBuffer exceptions = new StringBuffer(); + + if (srcs.length == 1) + return copy(srcFS, srcs[0], dstFS, dst, deleteSource, overwrite, conf); + + // Check if dest is directory + if (!dstFS.exists(dst)) { + throw new IOException("`" + dst +"': specified destination directory " + + "doest not exist"); + } else { + FileStatus sdst = dstFS.getFileStatus(dst); + if (!sdst.isDir()) + throw new IOException("copying multiple files, but last argument `" + + dst + "' is not a directory"); + } + + for (Path src : srcs) { + try { + if (!copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf)) + returnVal = false; + } catch (IOException e) { + gotException = true; + exceptions.append(e.getMessage()); + exceptions.append("\n"); + } + } + if (gotException) { + throw new IOException(exceptions.toString()); + } + return returnVal; + } + + /** Copy files between FileSystems. */ + public static boolean copy(FileSystem srcFS, Path src, + FileSystem dstFS, Path dst, + boolean deleteSource, + boolean overwrite, + Configuration conf) throws IOException { + FileStatus fileStatus = srcFS.getFileStatus(src); + return copy(srcFS, fileStatus, dstFS, dst, deleteSource, overwrite, conf); + } + + /** Copy files between FileSystems. */ + private static boolean copy(FileSystem srcFS, FileStatus srcStatus, + FileSystem dstFS, Path dst, + boolean deleteSource, + boolean overwrite, + Configuration conf) throws IOException { + Path src = srcStatus.getPath(); + dst = checkDest(src.getName(), dstFS, dst, overwrite); + if (srcStatus.isDir()) { + checkDependencies(srcFS, src, dstFS, dst); + if (!dstFS.mkdirs(dst)) { + return false; + } + FileStatus contents[] = srcFS.listStatus(src); + for (int i = 0; i < contents.length; i++) { + copy(srcFS, contents[i], dstFS, + new Path(dst, contents[i].getPath().getName()), + deleteSource, overwrite, conf); + } + } else { + InputStream in=null; + OutputStream out = null; + try { + in = srcFS.open(src); + out = dstFS.create(dst, overwrite); + IOUtils.copyBytes(in, out, conf, true); + } catch (IOException e) { + IOUtils.closeStream(out); + IOUtils.closeStream(in); + throw e; + } + } + if (deleteSource) { + return srcFS.delete(src, true); + } else { + return true; + } + + } + + /** Copy all files in a directory to one output file (merge). */ + public static boolean copyMerge(FileSystem srcFS, Path srcDir, + FileSystem dstFS, Path dstFile, + boolean deleteSource, + Configuration conf, String addString) throws IOException { + dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false); + + if (!srcFS.getFileStatus(srcDir).isDir()) + return false; + + OutputStream out = dstFS.create(dstFile); + + try { + FileStatus contents[] = srcFS.listStatus(srcDir); + for (int i = 0; i < contents.length; i++) { + if (!contents[i].isDir()) { + InputStream in = srcFS.open(contents[i].getPath()); + try { + IOUtils.copyBytes(in, out, conf, false); + if (addString!=null) + out.write(addString.getBytes("UTF-8")); + + } finally { + in.close(); + } + } + } + } finally { + out.close(); + } + + + if (deleteSource) { + return srcFS.delete(srcDir, true); + } else { + return true; + } + } + + /** Copy local files to a FileSystem. */ + public static boolean copy(File src, + FileSystem dstFS, Path dst, + boolean deleteSource, + Configuration conf) throws IOException { + dst = checkDest(src.getName(), dstFS, dst, false); + + if (src.isDirectory()) { + if (!dstFS.mkdirs(dst)) { + return false; + } + File contents[] = src.listFiles(); + for (int i = 0; i < contents.length; i++) { + copy(contents[i], dstFS, new Path(dst, contents[i].getName()), + deleteSource, conf); + } + } else if (src.isFile()) { + InputStream in = null; + OutputStream out =null; + try { + in = new FileInputStream(src); + out = dstFS.create(dst); + IOUtils.copyBytes(in, out, conf); + } catch (IOException e) { + IOUtils.closeStream( out ); + IOUtils.closeStream( in ); + throw e; + } + } else { + throw new IOException(src.toString() + + ": No such file or directory"); + } + if (deleteSource) { + return FileUtil.fullyDelete(src); + } else { + return true; + } + } + + /** Copy FileSystem files to local files. */ + public static boolean copy(FileSystem srcFS, Path src, + File dst, boolean deleteSource, + Configuration conf) throws IOException { + FileStatus filestatus = srcFS.getFileStatus(src); + return copy(srcFS, filestatus, dst, deleteSource, conf); + } + + /** Copy FileSystem files to local files. */ + private static boolean copy(FileSystem srcFS, FileStatus srcStatus, + File dst, boolean deleteSource, + Configuration conf) throws IOException { + Path src = srcStatus.getPath(); + if (srcStatus.isDir()) { + if (!dst.mkdirs()) { + return false; + } + FileStatus contents[] = srcFS.listStatus(src); + for (int i = 0; i < contents.length; i++) { + copy(srcFS, contents[i], + new File(dst, contents[i].getPath().getName()), + deleteSource, conf); + } + } else { + InputStream in = srcFS.open(src); + IOUtils.copyBytes(in, new FileOutputStream(dst), conf); + } + if (deleteSource) { + return srcFS.delete(src, true); + } else { + return true; + } + } + + private static Path checkDest(String srcName, FileSystem dstFS, Path dst, + boolean overwrite) throws IOException { + if (dstFS.exists(dst)) { + FileStatus sdst = dstFS.getFileStatus(dst); + if (sdst.isDir()) { + if (null == srcName) { + throw new IOException("Target " + dst + " is a directory"); + } + return checkDest(null, dstFS, new Path(dst, srcName), overwrite); + } else if (!overwrite) { + throw new IOException("Target " + dst + " already exists"); + } + } + return dst; + } + + /** + * This class is only used on windows to invoke the cygpath command. + */ + private static class CygPathCommand extends Shell { + String[] command; + String result; + CygPathCommand(String path) throws IOException { + command = new String[]{"cygpath", "-u", path}; + run(); + } + String getResult() throws IOException { + return result; + } + protected String[] getExecString() { + return command; + } + protected void parseExecResult(BufferedReader lines) throws IOException { + String line = lines.readLine(); + if (line == null) { + throw new IOException("Can't convert '" + command[2] + + " to a cygwin path"); + } + result = line; + } + } + + /** + * Convert a os-native filename to a path that works for the shell. + * @param filename The filename to convert + * @return The unix pathname + * @throws IOException on windows, there can be problems with the subprocess + */ + public static String makeShellPath(String filename) throws IOException { + if (Path.WINDOWS) { + return new CygPathCommand(filename).getResult(); + } else { + return filename; + } + } + + /** + * Convert a os-native filename to a path that works for the shell. + * @param file The filename to convert + * @return The unix pathname + * @throws IOException on windows, there can be problems with the subprocess + */ + public static String makeShellPath(File file) throws IOException { + return makeShellPath(file, false); + } + + /** + * Convert a os-native filename to a path that works for the shell. + * @param file The filename to convert + * @param makeCanonicalPath + * Whether to make canonical path for the file passed + * @return The unix pathname + * @throws IOException on windows, there can be problems with the subprocess + */ + public static String makeShellPath(File file, boolean makeCanonicalPath) + throws IOException { + if (makeCanonicalPath) { + return makeShellPath(file.getCanonicalPath()); + } else { + return makeShellPath(file.toString()); + } + } + + /** + * Takes an input dir and returns the du on that local directory. Very basic + * implementation. + * + * @param dir + * The input dir to get the disk space of this local dir + * @return The total disk space of the input local directory + */ + public static long getDU(File dir) { + long size = 0; + if (!dir.exists()) + return 0; + if (!dir.isDirectory()) { + return dir.length(); + } else { + size = dir.length(); + File[] allFiles = dir.listFiles(); + for (int i = 0; i < allFiles.length; i++) { + size = size + getDU(allFiles[i]); + } + return size; + } + } + + /** + * Given a File input it will unzip the file in a the unzip directory + * passed as the second parameter + * @param inFile The zip file as input + * @param unzipDir The unzip directory where to unzip the zip file. + * @throws IOException + */ + public static void unZip(File inFile, File unzipDir) throws IOException { + Enumeration entries; + ZipFile zipFile = new ZipFile(inFile); + + try { + entries = zipFile.entries(); + while (entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + if (!entry.isDirectory()) { + InputStream in = zipFile.getInputStream(entry); + try { + File file = new File(unzipDir, entry.getName()); + if (!file.getParentFile().mkdirs()) { + if (!file.getParentFile().isDirectory()) { + throw new IOException("Mkdirs failed to create " + + file.getParentFile().toString()); + } + } + OutputStream out = new FileOutputStream(file); + try { + byte[] buffer = new byte[8192]; + int i; + while ((i = in.read(buffer)) != -1) { + out.write(buffer, 0, i); + } + } finally { + out.close(); + } + } finally { + in.close(); + } + } + } + } finally { + zipFile.close(); + } + } + + /** + * Given a Tar File as input it will untar the file in a the untar directory + * passed as the second parameter + * + * This utility will untar ".tar" files and ".tar.gz","tgz" files. + * + * @param inFile The tar file as input. + * @param untarDir The untar directory where to untar the tar file. + * @throws IOException + */ + public static void unTar(File inFile, File untarDir) throws IOException { + if (!untarDir.mkdirs()) { + if (!untarDir.isDirectory()) { + throw new IOException("Mkdirs failed to create " + untarDir); + } + } + + StringBuffer untarCommand = new StringBuffer(); + boolean gzipped = inFile.toString().endsWith("gz"); + if (gzipped) { + untarCommand.append(" gzip -dc '"); + untarCommand.append(FileUtil.makeShellPath(inFile)); + untarCommand.append("' | ("); + } + untarCommand.append("cd '"); + untarCommand.append(FileUtil.makeShellPath(untarDir)); + untarCommand.append("' ; "); + untarCommand.append("tar -xf "); + + if (gzipped) { + untarCommand.append(" -)"); + } else { + untarCommand.append(FileUtil.makeShellPath(inFile)); + } + String[] shellCmd = { "bash", "-c", untarCommand.toString() }; + ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd); + shexec.execute(); + int exitcode = shexec.getExitCode(); + if (exitcode != 0) { + throw new IOException("Error untarring file " + inFile + + ". Tar process exited with exit code " + exitcode); + } + } + + /** + * Class for creating hardlinks. + * Supports Unix, Cygwin, WindXP. + * + */ + public static class HardLink { + enum OSType { + OS_TYPE_UNIX, + OS_TYPE_WINXP, + OS_TYPE_SOLARIS, + OS_TYPE_MAC; + } + + private static String[] hardLinkCommand; + private static String[] getLinkCountCommand; + private static OSType osType; + + static { + osType = getOSType(); + switch(osType) { + case OS_TYPE_WINXP: + hardLinkCommand = new String[] {"fsutil","hardlink","create", null, null}; + getLinkCountCommand = new String[] {"stat","-c%h"}; + break; + case OS_TYPE_SOLARIS: + hardLinkCommand = new String[] {"ln", null, null}; + getLinkCountCommand = new String[] {"ls","-l"}; + break; + case OS_TYPE_MAC: + hardLinkCommand = new String[] {"ln", null, null}; + getLinkCountCommand = new String[] {"stat","-f%l"}; + break; + case OS_TYPE_UNIX: + default: + hardLinkCommand = new String[] {"ln", null, null}; + getLinkCountCommand = new String[] {"stat","-c%h"}; + } + } + + static private OSType getOSType() { + String osName = System.getProperty("os.name"); + if (osName.indexOf("Windows") >= 0 && + (osName.indexOf("XP") >= 0 || osName.indexOf("2003") >= 0 || osName.indexOf("Vista") >= 0)) + return OSType.OS_TYPE_WINXP; + else if (osName.indexOf("SunOS") >= 0) + return OSType.OS_TYPE_SOLARIS; + else if (osName.indexOf("Mac") >= 0) + return OSType.OS_TYPE_MAC; + else + return OSType.OS_TYPE_UNIX; + } + + /** + * Creates a hardlink + */ + public static void createHardLink(File target, + File linkName) throws IOException { + int len = hardLinkCommand.length; + if (osType == OSType.OS_TYPE_WINXP) { + hardLinkCommand[len-1] = target.getCanonicalPath(); + hardLinkCommand[len-2] = linkName.getCanonicalPath(); + } else { + hardLinkCommand[len-2] = makeShellPath(target, true); + hardLinkCommand[len-1] = makeShellPath(linkName, true); + } + // execute shell command + Process process = Runtime.getRuntime().exec(hardLinkCommand); + try { + if (process.waitFor() != 0) { + String errMsg = new BufferedReader(new InputStreamReader( + process.getInputStream())).readLine(); + if (errMsg == null) errMsg = ""; + String inpMsg = new BufferedReader(new InputStreamReader( + process.getErrorStream())).readLine(); + if (inpMsg == null) inpMsg = ""; + throw new IOException(errMsg + inpMsg); + } + } catch (InterruptedException e) { + throw new IOException(StringUtils.stringifyException(e)); + } finally { + process.destroy(); + } + } + + /** + * Retrieves the number of links to the specified file. + */ + public static int getLinkCount(File fileName) throws IOException { + int len = getLinkCountCommand.length; + String[] cmd = new String[len + 1]; + for (int i = 0; i < len; i++) { + cmd[i] = getLinkCountCommand[i]; + } + cmd[len] = fileName.toString(); + String inpMsg = ""; + String errMsg = ""; + int exitValue = -1; + BufferedReader in = null; + BufferedReader err = null; + + // execute shell command + Process process = Runtime.getRuntime().exec(cmd); + try { + exitValue = process.waitFor(); + in = new BufferedReader(new InputStreamReader( + process.getInputStream())); + inpMsg = in.readLine(); + if (inpMsg == null) inpMsg = ""; + + err = new BufferedReader(new InputStreamReader( + process.getErrorStream())); + errMsg = err.readLine(); + if (errMsg == null) errMsg = ""; + if (exitValue != 0) { + throw new IOException(inpMsg + errMsg); + } + if (getOSType() == OSType.OS_TYPE_SOLARIS) { + String[] result = inpMsg.split("\\s+"); + return Integer.parseInt(result[1]); + } else { + return Integer.parseInt(inpMsg); + } + } catch (NumberFormatException e) { + throw new IOException(StringUtils.stringifyException(e) + + inpMsg + errMsg + + " on file:" + fileName); + } catch (InterruptedException e) { + throw new IOException(StringUtils.stringifyException(e) + + inpMsg + errMsg + + " on file:" + fileName); + } finally { + process.destroy(); + if (in != null) in.close(); + if (err != null) err.close(); + } + } + } + + /** + * Create a soft link between a src and destination + * only on a local disk. HDFS does not support this + * @param target the target for symlink + * @param linkname the symlink + * @return value returned by the command + */ + public static int symLink(String target, String linkname) throws IOException{ + String cmd = "ln -s " + target + " " + linkname; + Process p = Runtime.getRuntime().exec(cmd, null); + int returnVal = -1; + try{ + returnVal = p.waitFor(); + } catch(InterruptedException e){ + //do nothing as of yet + } + return returnVal; + } + + /** + * Change the permissions on a filename. + * @param filename the name of the file to change + * @param perm the permission string + * @return the exit code from the command + * @throws IOException + * @throws InterruptedException + */ + public static int chmod(String filename, String perm + ) throws IOException, InterruptedException { + return chmod(filename, perm, false); + } + + /** + * Change the permissions on a file / directory, recursively, if + * needed. + * @param filename name of the file whose permissions are to change + * @param perm permission string + * @param recursive true, if permissions should be changed recursively + * @return the exit code from the command. + * @throws IOException + * @throws InterruptedException + */ + public static int chmod(String filename, String perm, boolean recursive) + throws IOException, InterruptedException { + StringBuffer cmdBuf = new StringBuffer(); + cmdBuf.append("chmod "); + if (recursive) { + cmdBuf.append("-R "); + } + cmdBuf.append(perm).append(" "); + cmdBuf.append(filename); + String[] shellCmd = {"bash", "-c" ,cmdBuf.toString()}; + ShellCommandExecutor shExec = new ShellCommandExecutor(shellCmd); + try { + shExec.execute(); + }catch(Exception e) { + if(Log.isDebugEnabled()) { + Log.debug("Error while changing permission : " + filename + +" Exception: " + StringUtils.stringifyException(e)); + } + } + return shExec.getExitCode(); + } + + /** + * Create a tmp file for a base file. + * @param basefile the base file of the tmp + * @param prefix file name prefix of tmp + * @param isDeleteOnExit if true, the tmp will be deleted when the VM exits + * @return a newly created tmp file + * @exception IOException If a tmp file cannot created + * @see java.io.File#createTempFile(String, String, File) + * @see java.io.File#deleteOnExit() + */ + public static final File createLocalTempFile(final File basefile, + final String prefix, + final boolean isDeleteOnExit) + throws IOException { + File tmp = File.createTempFile(prefix + basefile.getName(), + "", basefile.getParentFile()); + if (isDeleteOnExit) { + tmp.deleteOnExit(); + } + return tmp; + } + + /** + * Move the src file to the name specified by target. + * @param src the source file + * @param target the target file + * @exception IOException If this operation fails + */ + public static void replaceFile(File src, File target) throws IOException { + /* renameTo() has two limitations on Windows platform. + * src.renameTo(target) fails if + * 1) If target already exists OR + * 2) If target is already open for reading/writing. + */ + if (!src.renameTo(target)) { + int retries = 5; + while (target.exists() && !target.delete() && retries-- >= 0) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + throw new IOException("replaceFile interrupted."); + } + } + if (!src.renameTo(target)) { + throw new IOException("Unable to rename " + src + + " to " + target); + } + } + } +} diff --git a/src/java/org/apache/hadoop/fs/FilterFileSystem.java b/src/java/org/apache/hadoop/fs/FilterFileSystem.java new file mode 100644 index 00000000000..2a2aa619afc --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FilterFileSystem.java @@ -0,0 +1,278 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.*; +import java.net.URI; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Progressable; + +/**************************************************************** + * A FilterFileSystem contains + * some other file system, which it uses as + * its basic file system, possibly transforming + * the data along the way or providing additional + * functionality. The class FilterFileSystem + * itself simply overrides all methods of + * FileSystem with versions that + * pass all requests to the contained file + * system. Subclasses of FilterFileSystem + * may further override some of these methods + * and may also provide additional methods + * and fields. + * + *****************************************************************/ +public class FilterFileSystem extends FileSystem { + + protected FileSystem fs; + + /* + * so that extending classes can define it + */ + public FilterFileSystem() { + } + + public FilterFileSystem(FileSystem fs) { + this.fs = fs; + this.statistics = fs.statistics; + } + + /** Called after a new FileSystem instance is constructed. + * @param name a uri whose authority section names the host, port, etc. + * for this FileSystem + * @param conf the configuration + */ + public void initialize(URI name, Configuration conf) throws IOException { + fs.initialize(name, conf); + } + + /** Returns a URI whose scheme and authority identify this FileSystem.*/ + public URI getUri() { + return fs.getUri(); + } + + /** Make sure that a path specifies a FileSystem. */ + public Path makeQualified(Path path) { + return fs.makeQualified(path); + } + + /////////////////////////////////////////////////////////////// + // FileSystem + /////////////////////////////////////////////////////////////// + + /** Check that a Path belongs to this FileSystem. */ + protected void checkPath(Path path) { + fs.checkPath(path); + } + + public BlockLocation[] getFileBlockLocations(FileStatus file, long start, + long len) throws IOException { + return fs.getFileBlockLocations(file, start, len); + } + + /** + * Opens an FSDataInputStream at the indicated Path. + * @param f the file name to open + * @param bufferSize the size of the buffer to be used. + */ + public FSDataInputStream open(Path f, int bufferSize) throws IOException { + return fs.open(f, bufferSize); + } + + /** {@inheritDoc} */ + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException { + return fs.append(f, bufferSize, progress); + } + + /** {@inheritDoc} */ + @Override + public FSDataOutputStream create(Path f, FsPermission permission, + boolean overwrite, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + return fs.create(f, permission, + overwrite, bufferSize, replication, blockSize, progress); + } + + /** + * Set replication for an existing file. + * + * @param src file name + * @param replication new replication + * @throws IOException + * @return true if successful; + * false if file does not exist or is a directory + */ + public boolean setReplication(Path src, short replication) throws IOException { + return fs.setReplication(src, replication); + } + + /** + * Renames Path src to Path dst. Can take place on local fs + * or remote DFS. + */ + public boolean rename(Path src, Path dst) throws IOException { + return fs.rename(src, dst); + } + + /** Delete a file */ + public boolean delete(Path f, boolean recursive) throws IOException { + return fs.delete(f, recursive); + } + + /** List files in a directory. */ + public FileStatus[] listStatus(Path f) throws IOException { + return fs.listStatus(f); + } + + public Path getHomeDirectory() { + return fs.getHomeDirectory(); + } + + + /** + * Set the current working directory for the given file system. All relative + * paths will be resolved relative to it. + * + * @param newDir + */ + public void setWorkingDirectory(Path newDir) { + fs.setWorkingDirectory(newDir); + } + + /** + * Get the current working directory for the given file system + * + * @return the directory pathname + */ + public Path getWorkingDirectory() { + return fs.getWorkingDirectory(); + } + + /** {@inheritDoc} */ + @Override + public FsStatus getStatus(Path p) throws IOException { + return fs.getStatus(p); + } + + /** {@inheritDoc} */ + @Override + public boolean mkdirs(Path f, FsPermission permission) throws IOException { + return fs.mkdirs(f, permission); + } + + /** + * The src file is on the local disk. Add it to FS at + * the given dst name. + * delSrc indicates if the source should be removed + */ + public void copyFromLocalFile(boolean delSrc, Path src, Path dst) + throws IOException { + fs.copyFromLocalFile(delSrc, src, dst); + } + + /** + * The src file is under FS, and the dst is on the local disk. + * Copy it from FS control to the local dst name. + * delSrc indicates if the src will be removed or not. + */ + public void copyToLocalFile(boolean delSrc, Path src, Path dst) + throws IOException { + fs.copyToLocalFile(delSrc, src, dst); + } + + /** + * Returns a local File that the user can write output to. The caller + * provides both the eventual FS target name and the local working + * file. If the FS is local, we write directly into the target. If + * the FS is remote, we write into the tmp local area. + */ + public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + return fs.startLocalOutput(fsOutputFile, tmpLocalFile); + } + + /** + * Called when we're all done writing to the target. A local FS will + * do nothing, because we've written to exactly the right place. A remote + * FS will copy the contents of tmpLocalFile to the correct target at + * fsOutputFile. + */ + public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + fs.completeLocalOutput(fsOutputFile, tmpLocalFile); + } + + /** Return the number of bytes that large input files should be optimally + * be split into to minimize i/o time. */ + public long getDefaultBlockSize() { + return fs.getDefaultBlockSize(); + } + + /** + * Get the default replication. + */ + public short getDefaultReplication() { + return fs.getDefaultReplication(); + } + + /** + * Get file status. + */ + public FileStatus getFileStatus(Path f) throws IOException { + return fs.getFileStatus(f); + } + + /** {@inheritDoc} */ + public FileChecksum getFileChecksum(Path f) throws IOException { + return fs.getFileChecksum(f); + } + + /** {@inheritDoc} */ + public void setVerifyChecksum(boolean verifyChecksum) { + fs.setVerifyChecksum(verifyChecksum); + } + + @Override + public Configuration getConf() { + return fs.getConf(); + } + + @Override + public void close() throws IOException { + super.close(); + fs.close(); + } + + /** {@inheritDoc} */ + @Override + public void setOwner(Path p, String username, String groupname + ) throws IOException { + fs.setOwner(p, username, groupname); + } + + /** {@inheritDoc} */ + @Override + public void setPermission(Path p, FsPermission permission + ) throws IOException { + fs.setPermission(p, permission); + } +} diff --git a/src/java/org/apache/hadoop/fs/FsShell.java b/src/java/org/apache/hadoop/fs/FsShell.java new file mode 100644 index 00000000000..987b4999668 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FsShell.java @@ -0,0 +1,1925 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.zip.GZIPInputStream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.shell.CommandFormat; +import org.apache.hadoop.fs.shell.Count; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.util.StringUtils; + +/** Provide command line access to a FileSystem. */ +public class FsShell extends Configured implements Tool { + + protected FileSystem fs; + private Trash trash; + public static final SimpleDateFormat dateForm = + new SimpleDateFormat("yyyy-MM-dd HH:mm"); + protected static final SimpleDateFormat modifFmt = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + static final int BORDER = 2; + static { + modifFmt.setTimeZone(TimeZone.getTimeZone("UTC")); + } + static final String SETREP_SHORT_USAGE="-setrep [-R] [-w] "; + static final String GET_SHORT_USAGE = "-get [-ignoreCrc] [-crc] "; + static final String COPYTOLOCAL_SHORT_USAGE = GET_SHORT_USAGE.replace( + "-get", "-copyToLocal"); + static final String TAIL_USAGE="-tail [-f] "; + + /** + */ + public FsShell() { + this(null); + } + + public FsShell(Configuration conf) { + super(conf); + fs = null; + trash = null; + } + + protected void init() throws IOException { + getConf().setQuietMode(true); + if (this.fs == null) { + this.fs = FileSystem.get(getConf()); + } + if (this.trash == null) { + this.trash = new Trash(getConf()); + } + } + + + /** + * Copies from stdin to the indicated file. + */ + private void copyFromStdin(Path dst, FileSystem dstFs) throws IOException { + if (dstFs.isDirectory(dst)) { + throw new IOException("When source is stdin, destination must be a file."); + } + if (dstFs.exists(dst)) { + throw new IOException("Target " + dst.toString() + " already exists."); + } + FSDataOutputStream out = dstFs.create(dst); + try { + IOUtils.copyBytes(System.in, out, getConf(), false); + } + finally { + out.close(); + } + } + + /** + * Print from src to stdout. + */ + private void printToStdout(InputStream in) throws IOException { + try { + IOUtils.copyBytes(in, System.out, getConf(), false); + } finally { + in.close(); + } + } + + + /** + * Add local files to the indicated FileSystem name. src is kept. + */ + void copyFromLocal(Path[] srcs, String dstf) throws IOException { + Path dstPath = new Path(dstf); + FileSystem dstFs = dstPath.getFileSystem(getConf()); + if (srcs.length == 1 && srcs[0].toString().equals("-")) + copyFromStdin(dstPath, dstFs); + else + dstFs.copyFromLocalFile(false, false, srcs, dstPath); + } + + /** + * Add local files to the indicated FileSystem name. src is removed. + */ + void moveFromLocal(Path[] srcs, String dstf) throws IOException { + Path dstPath = new Path(dstf); + FileSystem dstFs = dstPath.getFileSystem(getConf()); + dstFs.moveFromLocalFile(srcs, dstPath); + } + + /** + * Add a local file to the indicated FileSystem name. src is removed. + */ + void moveFromLocal(Path src, String dstf) throws IOException { + moveFromLocal((new Path[]{src}), dstf); + } + + /** + * Obtain the indicated files that match the file pattern srcf + * and copy them to the local name. srcf is kept. + * When copying multiple files, the destination must be a directory. + * Otherwise, IOException is thrown. + * @param argv: arguments + * @param pos: Ignore everything before argv[pos] + * @exception: IOException + * @see org.apache.hadoop.fs.FileSystem.globStatus + */ + void copyToLocal(String[]argv, int pos) throws IOException { + CommandFormat cf = new CommandFormat("copyToLocal", 2,2,"crc","ignoreCrc"); + + String srcstr = null; + String dststr = null; + try { + List parameters = cf.parse(argv, pos); + srcstr = parameters.get(0); + dststr = parameters.get(1); + } + catch(IllegalArgumentException iae) { + System.err.println("Usage: java FsShell " + GET_SHORT_USAGE); + throw iae; + } + boolean copyCrc = cf.getOpt("crc"); + final boolean verifyChecksum = !cf.getOpt("ignoreCrc"); + + if (dststr.equals("-")) { + if (copyCrc) { + System.err.println("-crc option is not valid when destination is stdout."); + } + cat(srcstr, verifyChecksum); + } else { + File dst = new File(dststr); + Path srcpath = new Path(srcstr); + FileSystem srcFS = getSrcFileSystem(srcpath, verifyChecksum); + if (copyCrc && !(srcFS instanceof ChecksumFileSystem)) { + System.err.println("-crc option is not valid when source file system " + + "does not have crc files. Automatically turn the option off."); + copyCrc = false; + } + FileStatus[] srcs = srcFS.globStatus(srcpath); + boolean dstIsDir = dst.isDirectory(); + if (srcs.length > 1 && !dstIsDir) { + throw new IOException("When copying multiple files, " + + "destination should be a directory."); + } + for (FileStatus status : srcs) { + Path p = status.getPath(); + File f = dstIsDir? new File(dst, p.getName()): dst; + copyToLocal(srcFS, status, f, copyCrc); + } + } + } + + /** + * Return the {@link FileSystem} specified by src and the conf. + * It the {@link FileSystem} supports checksum, set verifyChecksum. + */ + private FileSystem getSrcFileSystem(Path src, boolean verifyChecksum + ) throws IOException { + FileSystem srcFs = src.getFileSystem(getConf()); + srcFs.setVerifyChecksum(verifyChecksum); + return srcFs; + } + + /** + * The prefix for the tmp file used in copyToLocal. + * It must be at least three characters long, required by + * {@link java.io.File#createTempFile(String, String, File)}. + */ + static final String COPYTOLOCAL_PREFIX = "_copyToLocal_"; + + /** + * Copy a source file from a given file system to local destination. + * @param srcFS source file system + * @param src source path + * @param dst destination + * @param copyCrc copy CRC files? + * @exception IOException If some IO failed + */ + private void copyToLocal(final FileSystem srcFS, final FileStatus srcStatus, + final File dst, final boolean copyCrc) + throws IOException { + /* Keep the structure similar to ChecksumFileSystem.copyToLocal(). + * Ideal these two should just invoke FileUtil.copy() and not repeat + * recursion here. Of course, copy() should support two more options : + * copyCrc and useTmpFile (may be useTmpFile need not be an option). + */ + + Path src = srcStatus.getPath(); + if (!srcStatus.isDir()) { + if (dst.exists()) { + // match the error message in FileUtil.checkDest(): + throw new IOException("Target " + dst + " already exists"); + } + + // use absolute name so that tmp file is always created under dest dir + File tmp = FileUtil.createLocalTempFile(dst.getAbsoluteFile(), + COPYTOLOCAL_PREFIX, true); + if (!FileUtil.copy(srcFS, src, tmp, false, srcFS.getConf())) { + throw new IOException("Failed to copy " + src + " to " + dst); + } + + if (!tmp.renameTo(dst)) { + throw new IOException("Failed to rename tmp file " + tmp + + " to local destination \"" + dst + "\"."); + } + + if (copyCrc) { + if (!(srcFS instanceof ChecksumFileSystem)) { + throw new IOException("Source file system does not have crc files"); + } + + ChecksumFileSystem csfs = (ChecksumFileSystem) srcFS; + File dstcs = FileSystem.getLocal(srcFS.getConf()) + .pathToFile(csfs.getChecksumFile(new Path(dst.getCanonicalPath()))); + FileSystem fs = csfs.getRawFileSystem(); + FileStatus status = csfs.getFileStatus(csfs.getChecksumFile(src)); + copyToLocal(fs, status, dstcs, false); + } + } else { + // once FileUtil.copy() supports tmp file, we don't need to mkdirs(). + if (!dst.mkdirs()) { + throw new IOException("Failed to create local destination \"" + + dst + "\"."); + } + for(FileStatus status : srcFS.listStatus(src)) { + copyToLocal(srcFS, status, + new File(dst, status.getPath().getName()), copyCrc); + } + } + } + + /** + * Get all the files in the directories that match the source file + * pattern and merge and sort them to only one file on local fs + * srcf is kept. + * @param srcf: a file pattern specifying source files + * @param dstf: a destination local file/directory + * @exception: IOException + * @see org.apache.hadoop.fs.FileSystem.globStatus + */ + void copyMergeToLocal(String srcf, Path dst) throws IOException { + copyMergeToLocal(srcf, dst, false); + } + + + /** + * Get all the files in the directories that match the source file pattern + * and merge and sort them to only one file on local fs + * srcf is kept. + * + * Also adds a string between the files (useful for adding \n + * to a text file) + * @param srcf: a file pattern specifying source files + * @param dstf: a destination local file/directory + * @param endline: if an end of line character is added to a text file + * @exception: IOException + * @see org.apache.hadoop.fs.FileSystem.globStatus + */ + void copyMergeToLocal(String srcf, Path dst, boolean endline) throws IOException { + Path srcPath = new Path(srcf); + FileSystem srcFs = srcPath.getFileSystem(getConf()); + Path [] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), + srcPath); + for(int i=0; isrcf and display + * their content on stdout. + * @param srcf: a file pattern specifying source files + * @exception: IOException + * @see org.apache.hadoop.fs.FileSystem.globStatus + */ + void cat(String src, boolean verifyChecksum) throws IOException { + //cat behavior in Linux + // [~/1207]$ ls ?.txt + // x.txt z.txt + // [~/1207]$ cat x.txt y.txt z.txt + // xxx + // cat: y.txt: No such file or directory + // zzz + + Path srcPattern = new Path(src); + new DelayedExceptionThrowing() { + @Override + void process(Path p, FileSystem srcFs) throws IOException { + if (srcFs.getFileStatus(p).isDir()) { + throw new IOException("Source must be a file."); + } + printToStdout(srcFs.open(p)); + } + }.globAndProcess(srcPattern, getSrcFileSystem(srcPattern, verifyChecksum)); + } + + private class TextRecordInputStream extends InputStream { + SequenceFile.Reader r; + WritableComparable key; + Writable val; + + DataInputBuffer inbuf; + DataOutputBuffer outbuf; + + public TextRecordInputStream(FileStatus f) throws IOException { + r = new SequenceFile.Reader(fs, f.getPath(), getConf()); + key = ReflectionUtils.newInstance(r.getKeyClass().asSubclass(WritableComparable.class), + getConf()); + val = ReflectionUtils.newInstance(r.getValueClass().asSubclass(Writable.class), + getConf()); + inbuf = new DataInputBuffer(); + outbuf = new DataOutputBuffer(); + } + + public int read() throws IOException { + int ret; + if (null == inbuf || -1 == (ret = inbuf.read())) { + if (!r.next(key, val)) { + return -1; + } + byte[] tmp = key.toString().getBytes(); + outbuf.write(tmp, 0, tmp.length); + outbuf.write('\t'); + tmp = val.toString().getBytes(); + outbuf.write(tmp, 0, tmp.length); + outbuf.write('\n'); + inbuf.reset(outbuf.getData(), outbuf.getLength()); + outbuf.reset(); + ret = inbuf.read(); + } + return ret; + } + } + + private InputStream forMagic(Path p, FileSystem srcFs) throws IOException { + FSDataInputStream i = srcFs.open(p); + switch(i.readShort()) { + case 0x1f8b: // RFC 1952 + i.seek(0); + return new GZIPInputStream(i); + case 0x5345: // 'S' 'E' + if (i.readByte() == 'Q') { + i.close(); + return new TextRecordInputStream(srcFs.getFileStatus(p)); + } + break; + } + i.seek(0); + return i; + } + + void text(String srcf) throws IOException { + Path srcPattern = new Path(srcf); + new DelayedExceptionThrowing() { + @Override + void process(Path p, FileSystem srcFs) throws IOException { + if (srcFs.isDirectory(p)) { + throw new IOException("Source must be a file."); + } + printToStdout(forMagic(p, srcFs)); + } + }.globAndProcess(srcPattern, srcPattern.getFileSystem(getConf())); + } + + /** + * Parse the incoming command string + * @param cmd + * @param pos ignore anything before this pos in cmd + * @throws IOException + */ + private void setReplication(String[] cmd, int pos) throws IOException { + CommandFormat c = new CommandFormat("setrep", 2, 2, "R", "w"); + String dst = null; + short rep = 0; + + try { + List parameters = c.parse(cmd, pos); + rep = Short.parseShort(parameters.get(0)); + dst = parameters.get(1); + } catch (NumberFormatException nfe) { + System.err.println("Illegal replication, a positive integer expected"); + throw nfe; + } + catch(IllegalArgumentException iae) { + System.err.println("Usage: java FsShell " + SETREP_SHORT_USAGE); + throw iae; + } + + if (rep < 1) { + System.err.println("Cannot set replication to: " + rep); + throw new IllegalArgumentException("replication must be >= 1"); + } + + List waitList = c.getOpt("w")? new ArrayList(): null; + setReplication(rep, dst, c.getOpt("R"), waitList); + + if (waitList != null) { + waitForReplication(waitList, rep); + } + } + + /** + * Wait for all files in waitList to have replication number equal to rep. + * @param waitList The files are waited for. + * @param rep The new replication number. + * @throws IOException IOException + */ + void waitForReplication(List waitList, int rep) throws IOException { + for(Path f : waitList) { + System.out.print("Waiting for " + f + " ..."); + System.out.flush(); + + boolean printWarning = false; + FileStatus status = fs.getFileStatus(f); + long len = status.getLen(); + + for(boolean done = false; !done; ) { + BlockLocation[] locations = fs.getFileBlockLocations(status, 0, len); + int i = 0; + for(; i < locations.length && + locations[i].getHosts().length == rep; i++) + if (!printWarning && locations[i].getHosts().length > rep) { + System.out.println("\nWARNING: the waiting time may be long for " + + "DECREASING the number of replication."); + printWarning = true; + } + done = i == locations.length; + + if (!done) { + System.out.print("."); + System.out.flush(); + try {Thread.sleep(10000);} catch (InterruptedException e) {} + } + } + + System.out.println(" done"); + } + } + + /** + * Set the replication for files that match file pattern srcf + * if it's a directory and recursive is true, + * set replication for all the subdirs and those files too. + * @param newRep new replication factor + * @param srcf a file pattern specifying source files + * @param recursive if need to set replication factor for files in subdirs + * @throws IOException + * @see org.apache.hadoop.fs.FileSystem#globStatus(Path) + */ + void setReplication(short newRep, String srcf, boolean recursive, + List waitingList) + throws IOException { + Path srcPath = new Path(srcf); + FileSystem srcFs = srcPath.getFileSystem(getConf()); + Path[] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), + srcPath); + for(int i=0; i waitingList) + throws IOException { + if (!srcFs.getFileStatus(src).isDir()) { + setFileReplication(src, srcFs, newRep, waitingList); + return; + } + FileStatus items[] = srcFs.listStatus(src); + if (items == null) { + throw new IOException("Could not get listing for " + src); + } else { + + for (int i = 0; i < items.length; i++) { + if (!items[i].isDir()) { + setFileReplication(items[i].getPath(), srcFs, newRep, waitingList); + } else if (recursive) { + setReplication(newRep, srcFs, items[i].getPath(), recursive, + waitingList); + } + } + } + } + + /** + * Actually set the replication for this file + * If it fails either throw IOException or print an error msg + * @param file: a file/directory + * @param newRep: new replication factor + * @throws IOException + */ + private void setFileReplication(Path file, FileSystem srcFs, short newRep, List waitList) + throws IOException { + if (srcFs.setReplication(file, newRep)) { + if (waitList != null) { + waitList.add(file); + } + System.out.println("Replication " + newRep + " set: " + file); + } else { + System.err.println("Could not set replication for: " + file); + } + } + + + /** + * Get a listing of all files in that match the file pattern srcf. + * @param srcf a file pattern specifying source files + * @param recursive if need to list files in subdirs + * @throws IOException + * @see org.apache.hadoop.fs.FileSystem#globStatus(Path) + */ + private int ls(String srcf, boolean recursive) throws IOException { + Path srcPath = new Path(srcf); + FileSystem srcFs = srcPath.getFileSystem(this.getConf()); + FileStatus[] srcs = srcFs.globStatus(srcPath); + if (srcs==null || srcs.length==0) { + throw new FileNotFoundException("Cannot access " + srcf + + ": No such file or directory."); + } + + boolean printHeader = (srcs.length == 1) ? true: false; + int numOfErrors = 0; + for(int i=0; isrc + * ideally we should provide "-l" option, that lists like "ls -l". + */ + private int ls(FileStatus src, FileSystem srcFs, boolean recursive, + boolean printHeader) throws IOException { + final String cmd = recursive? "lsr": "ls"; + final FileStatus[] items = shellListStatus(cmd, srcFs, src); + if (items == null) { + return 1; + } else { + int numOfErrors = 0; + if (!recursive && printHeader) { + if (items.length != 0) { + System.out.println("Found " + items.length + " items"); + } + } + + int maxReplication = 3, maxLen = 10, maxOwner = 0,maxGroup = 0; + + for(int i = 0; i < items.length; i++) { + FileStatus stat = items[i]; + int replication = String.valueOf(stat.getReplication()).length(); + int len = String.valueOf(stat.getLen()).length(); + int owner = String.valueOf(stat.getOwner()).length(); + int group = String.valueOf(stat.getGroup()).length(); + + if (replication > maxReplication) maxReplication = replication; + if (len > maxLen) maxLen = len; + if (owner > maxOwner) maxOwner = owner; + if (group > maxGroup) maxGroup = group; + } + + for (int i = 0; i < items.length; i++) { + FileStatus stat = items[i]; + Path cur = stat.getPath(); + String mdate = dateForm.format(new Date(stat.getModificationTime())); + + System.out.print((stat.isDir() ? "d" : "-") + + stat.getPermission() + " "); + System.out.printf("%"+ maxReplication + + "s ", (!stat.isDir() ? stat.getReplication() : "-")); + if (maxOwner > 0) + System.out.printf("%-"+ maxOwner + "s ", stat.getOwner()); + if (maxGroup > 0) + System.out.printf("%-"+ maxGroup + "s ", stat.getGroup()); + System.out.printf("%"+ maxLen + "d ", stat.getLen()); + System.out.print(mdate + " "); + System.out.println(cur.toUri().getPath()); + if (recursive && stat.isDir()) { + numOfErrors += ls(stat,srcFs, recursive, printHeader); + } + } + return numOfErrors; + } + } + + /** + * Show the size of a partition in the filesystem that contains + * the specified path. + * @param path a path specifying the source partition. null means /. + * @throws IOException + */ + void df(String path) throws IOException { + if (path == null) path = "/"; + final Path srcPath = new Path(path); + final FileSystem srcFs = srcPath.getFileSystem(getConf()); + if (! srcFs.exists(srcPath)) { + throw new FileNotFoundException("Cannot access "+srcPath.toString()); + } + final FsStatus stats = srcFs.getStatus(srcPath); + final int PercentUsed = (int)(100.0f * (float)stats.getUsed() / (float)stats.getCapacity()); + System.out.println("Filesystem\t\tSize\tUsed\tAvail\tUse%"); + System.out.printf("%s\t\t%d\t%d\t%d\t%d%%\n", + path, + stats.getCapacity(), stats.getUsed(), stats.getRemaining(), + PercentUsed); + } + + /** + * Show the size of all files that match the file pattern src + * @param src a file pattern specifying source files + * @throws IOException + * @see org.apache.hadoop.fs.FileSystem#globStatus(Path) + */ + void du(String src) throws IOException { + Path srcPath = new Path(src); + FileSystem srcFs = srcPath.getFileSystem(getConf()); + Path[] pathItems = FileUtil.stat2Paths(srcFs.globStatus(srcPath), + srcPath); + FileStatus items[] = srcFs.listStatus(pathItems); + if ((items == null) || ((items.length == 0) && + (!srcFs.exists(srcPath)))){ + throw new FileNotFoundException("Cannot access " + src + + ": No such file or directory."); + } else { + System.out.println("Found " + items.length + " items"); + int maxLength = 10; + + long length[] = new long[items.length]; + for (int i = 0; i < items.length; i++) { + length[i] = items[i].isDir() ? + srcFs.getContentSummary(items[i].getPath()).getLength() : + items[i].getLen(); + int len = String.valueOf(length[i]).length(); + if (len > maxLength) maxLength = len; + } + for(int i = 0; i < items.length; i++) { + System.out.printf("%-"+ (maxLength + BORDER) +"d", length[i]); + System.out.println(items[i].getPath()); + } + } + } + + /** + * Show the summary disk usage of each dir/file + * that matches the file pattern src + * @param src a file pattern specifying source files + * @throws IOException + * @see org.apache.hadoop.fs.FileSystem#globStatus(Path) + */ + void dus(String src) throws IOException { + Path srcPath = new Path(src); + FileSystem srcFs = srcPath.getFileSystem(getConf()); + FileStatus status[] = srcFs.globStatus(new Path(src)); + if (status==null || status.length==0) { + throw new FileNotFoundException("Cannot access " + src + + ": No such file or directory."); + } + for(int i=0; i 2) + throw new IOException("Not a flag: " + argv[i]); + char flag = argv[i].toCharArray()[1]; + Path f = new Path(argv[++i]); + FileSystem srcFs = f.getFileSystem(getConf()); + switch(flag) { + case 'e': + return srcFs.exists(f) ? 0 : 1; + case 'z': + return srcFs.getFileStatus(f).getLen() == 0 ? 0 : 1; + case 'd': + return srcFs.getFileStatus(f).isDir() ? 0 : 1; + default: + throw new IOException("Unknown flag: " + flag); + } + } + + /** + * Print statistics about path in specified format. + * Format sequences: + * %b: Size of file in blocks + * %n: Filename + * %o: Block size + * %r: replication + * %y: UTC date as "yyyy-MM-dd HH:mm:ss" + * %Y: Milliseconds since January 1, 1970 UTC + */ + void stat(char[] fmt, String src) throws IOException { + Path srcPath = new Path(src); + FileSystem srcFs = srcPath.getFileSystem(getConf()); + FileStatus glob[] = srcFs.globStatus(srcPath); + if (null == glob) + throw new IOException("cannot stat `" + src + "': No such file or directory"); + for (FileStatus f : glob) { + StringBuilder buf = new StringBuilder(); + for (int i = 0; i < fmt.length; ++i) { + if (fmt[i] != '%') { + buf.append(fmt[i]); + } else { + if (i + 1 == fmt.length) break; + switch(fmt[++i]) { + case 'b': + buf.append(f.getLen()); + break; + case 'F': + buf.append(f.isDir() ? "directory" : "regular file"); + break; + case 'n': + buf.append(f.getPath().getName()); + break; + case 'o': + buf.append(f.getBlockSize()); + break; + case 'r': + buf.append(f.getReplication()); + break; + case 'y': + buf.append(modifFmt.format(new Date(f.getModificationTime()))); + break; + case 'Y': + buf.append(f.getModificationTime()); + break; + default: + buf.append(fmt[i]); + break; + } + } + } + System.out.println(buf.toString()); + } + } + + /** + * Move files that match the file pattern srcf + * to a destination file. + * When moving mutiple files, the destination must be a directory. + * Otherwise, IOException is thrown. + * @param srcf a file pattern specifying source files + * @param dstf a destination local file/directory + * @throws IOException + * @see org.apache.hadoop.fs.FileSystem#globStatus(Path) + */ + void rename(String srcf, String dstf) throws IOException { + Path srcPath = new Path(srcf); + Path dstPath = new Path(dstf); + FileSystem fs = srcPath.getFileSystem(getConf()); + URI srcURI = fs.getUri(); + URI dstURI = dstPath.getFileSystem(getConf()).getUri(); + if (srcURI.compareTo(dstURI) != 0) { + throw new IOException("src and destination filesystems do not match."); + } + Path[] srcs = FileUtil.stat2Paths(fs.globStatus(srcPath), srcPath); + Path dst = new Path(dstf); + if (srcs.length > 1 && !fs.isDirectory(dst)) { + throw new IOException("When moving multiple files, " + + "destination should be a directory."); + } + for(int i=0; i 3) { + Path dst = new Path(dest); + FileSystem dstFs = dst.getFileSystem(getConf()); + if (!dstFs.isDirectory(dst)) { + throw new IOException("When moving multiple files, " + + "destination " + dest + " should be a directory."); + } + } + // + // for each source file, issue the rename + // + for (; i < argv.length - 1; i++) { + try { + // + // issue the rename to the fs + // + rename(argv[i], dest); + } catch (RemoteException e) { + // + // This is a error returned by hadoop server. Print + // out the first line of the error mesage. + // + exitCode = -1; + try { + String[] content; + content = e.getLocalizedMessage().split("\n"); + System.err.println(cmd.substring(1) + ": " + content[0]); + } catch (Exception ex) { + System.err.println(cmd.substring(1) + ": " + + ex.getLocalizedMessage()); + } + } catch (IOException e) { + // + // IO exception encountered locally. + // + exitCode = -1; + System.err.println(cmd.substring(1) + ": " + + e.getLocalizedMessage()); + } + } + return exitCode; + } + + /** + * Copy files that match the file pattern srcf + * to a destination file. + * When copying mutiple files, the destination must be a directory. + * Otherwise, IOException is thrown. + * @param srcf a file pattern specifying source files + * @param dstf a destination local file/directory + * @throws IOException + * @see org.apache.hadoop.fs.FileSystem#globStatus(Path) + */ + void copy(String srcf, String dstf, Configuration conf) throws IOException { + Path srcPath = new Path(srcf); + FileSystem srcFs = srcPath.getFileSystem(getConf()); + Path dstPath = new Path(dstf); + FileSystem dstFs = dstPath.getFileSystem(getConf()); + Path [] srcs = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath); + if (srcs.length > 1 && !dstFs.isDirectory(dstPath)) { + throw new IOException("When copying multiple files, " + + "destination should be a directory."); + } + for(int i=0; i 3) { + Path dst = new Path(dest); + if (!fs.isDirectory(dst)) { + throw new IOException("When copying multiple files, " + + "destination " + dest + " should be a directory."); + } + } + // + // for each source file, issue the copy + // + for (; i < argv.length - 1; i++) { + try { + // + // issue the copy to the fs + // + copy(argv[i], dest, conf); + } catch (RemoteException e) { + // + // This is a error returned by hadoop server. Print + // out the first line of the error mesage. + // + exitCode = -1; + try { + String[] content; + content = e.getLocalizedMessage().split("\n"); + System.err.println(cmd.substring(1) + ": " + + content[0]); + } catch (Exception ex) { + System.err.println(cmd.substring(1) + ": " + + ex.getLocalizedMessage()); + } + } catch (IOException e) { + // + // IO exception encountered locally. + // + exitCode = -1; + System.err.println(cmd.substring(1) + ": " + + e.getLocalizedMessage()); + } + } + return exitCode; + } + + /** + * Delete all files that match the file pattern srcf. + * @param srcf a file pattern specifying source files + * @param recursive if need to delete subdirs + * @throws IOException + * @see org.apache.hadoop.fs.FileSystem#globStatus(Path) + */ + void delete(String srcf, final boolean recursive) throws IOException { + //rm behavior in Linux + // [~/1207]$ ls ?.txt + // x.txt z.txt + // [~/1207]$ rm x.txt y.txt z.txt + // rm: cannot remove `y.txt': No such file or directory + + Path srcPattern = new Path(srcf); + new DelayedExceptionThrowing() { + @Override + void process(Path p, FileSystem srcFs) throws IOException { + delete(p, srcFs, recursive); + } + }.globAndProcess(srcPattern, srcPattern.getFileSystem(getConf())); + } + + /* delete a file */ + private void delete(Path src, FileSystem srcFs, boolean recursive) throws IOException { + if (srcFs.isDirectory(src) && !recursive) { + throw new IOException("Cannot remove directory \"" + src + + "\", use -rmr instead"); + } + Trash trashTmp = new Trash(srcFs, getConf()); + if (trashTmp.moveToTrash(src)) { + System.out.println("Moved to trash: " + src); + return; + } + if (srcFs.delete(src, true)) { + System.out.println("Deleted " + src); + } else { + if (!srcFs.exists(src)) { + throw new FileNotFoundException("cannot remove " + + src + ": No such file or directory."); + } + throw new IOException("Delete failed " + src); + } + } + + private void expunge() throws IOException { + trash.expunge(); + trash.checkpoint(); + } + + /** + * Returns the Trash object associated with this shell. + */ + public Path getCurrentTrashDir() { + return trash.getCurrentTrashDir(); + } + + /** + * Parse the incoming command string + * @param cmd + * @param pos ignore anything before this pos in cmd + * @throws IOException + */ + private void tail(String[] cmd, int pos) throws IOException { + CommandFormat c = new CommandFormat("tail", 1, 1, "f"); + String src = null; + Path path = null; + + try { + List parameters = c.parse(cmd, pos); + src = parameters.get(0); + } catch(IllegalArgumentException iae) { + System.err.println("Usage: java FsShell " + TAIL_USAGE); + throw iae; + } + boolean foption = c.getOpt("f") ? true: false; + path = new Path(src); + FileSystem srcFs = path.getFileSystem(getConf()); + FileStatus fileStatus = srcFs.getFileStatus(path); + if (fileStatus.isDir()) { + throw new IOException("Source must be a file."); + } + + long fileSize = fileStatus.getLen(); + long offset = (fileSize > 1024) ? fileSize - 1024: 0; + + while (true) { + FSDataInputStream in = srcFs.open(path); + in.seek(offset); + IOUtils.copyBytes(in, System.out, 1024, false); + offset = in.getPos(); + in.close(); + if (!foption) { + break; + } + fileSize = srcFs.getFileStatus(path).getLen(); + offset = (fileSize > offset) ? offset: fileSize; + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + break; + } + } + } + + /** + * This class runs a command on a given FileStatus. This can be used for + * running various commands like chmod, chown etc. + */ + static abstract class CmdHandler { + + protected int errorCode = 0; + protected boolean okToContinue = true; + protected String cmdName; + + int getErrorCode() { return errorCode; } + boolean okToContinue() { return okToContinue; } + String getName() { return cmdName; } + + protected CmdHandler(String cmdName, FileSystem fs) { + this.cmdName = cmdName; + } + + public abstract void run(FileStatus file, FileSystem fs) throws IOException; + } + + /** helper returns listStatus() */ + private static FileStatus[] shellListStatus(String cmd, + FileSystem srcFs, + FileStatus src) { + if (!src.isDir()) { + FileStatus[] files = { src }; + return files; + } + Path path = src.getPath(); + try { + FileStatus[] files = srcFs.listStatus(path); + if ( files == null ) { + System.err.println(cmd + + ": could not get listing for '" + path + "'"); + } + return files; + } catch (IOException e) { + System.err.println(cmd + + ": could not get get listing for '" + path + "' : " + + e.getMessage().split("\n")[0]); + } + return null; + } + + + /** + * Runs the command on a given file with the command handler. + * If recursive is set, command is run recursively. + */ + private static int runCmdHandler(CmdHandler handler, FileStatus stat, + FileSystem srcFs, + boolean recursive) throws IOException { + int errors = 0; + handler.run(stat, srcFs); + if (recursive && stat.isDir() && handler.okToContinue()) { + FileStatus[] files = shellListStatus(handler.getName(), srcFs, stat); + if (files == null) { + return 1; + } + for(FileStatus file : files ) { + errors += runCmdHandler(handler, file, srcFs, recursive); + } + } + return errors; + } + + ///top level runCmdHandler + int runCmdHandler(CmdHandler handler, String[] args, + int startIndex, boolean recursive) + throws IOException { + int errors = 0; + + for (int i=startIndex; i 0 || handler.getErrorCode() != 0) ? 1 : 0; + } + + /** + * Return an abbreviated English-language desc of the byte length + * @deprecated Consider using {@link org.apache.hadoop.util.StringUtils#byteDesc} instead. + */ + @Deprecated + public static String byteDesc(long len) { + return StringUtils.byteDesc(len); + } + + /** + * @deprecated Consider using {@link org.apache.hadoop.util.StringUtils#limitDecimalTo2} instead. + */ + @Deprecated + public static synchronized String limitDecimalTo2(double d) { + return StringUtils.limitDecimalTo2(d); + } + + private void printHelp(String cmd) { + String summary = "hadoop fs is the command to execute fs commands. " + + "The full syntax is: \n\n" + + "hadoop fs [-fs ] [-conf ]\n\t" + + "[-D ] [-ls ] [-lsr ] [-df []] [-du ]\n\t" + + "[-dus ] [-mv ] [-cp ] [-rm ]\n\t" + + "[-rmr ] [-put ... ] [-copyFromLocal ... ]\n\t" + + "[-moveFromLocal ... ] [" + + GET_SHORT_USAGE + "\n\t" + + "[-getmerge [addnl]] [-cat ]\n\t" + + "[" + COPYTOLOCAL_SHORT_USAGE + "] [-moveToLocal ]\n\t" + + "[-mkdir ] [-report] [" + SETREP_SHORT_USAGE + "]\n\t" + + "[-touchz ] [-test -[ezd] ] [-stat [format] ]\n\t" + + "[-tail [-f] ] [-text ]\n\t" + + "[" + FsShellPermissions.CHMOD_USAGE + "]\n\t" + + "[" + FsShellPermissions.CHOWN_USAGE + "]\n\t" + + "[" + FsShellPermissions.CHGRP_USAGE + "]\n\t" + + "[" + Count.USAGE + "]\n\t" + + "[-help [cmd]]\n"; + + String conf ="-conf : Specify an application configuration file."; + + String D = "-D : Use value for given property."; + + String fs = "-fs [local | ]: \tSpecify the file system to use.\n" + + "\t\tIf not specified, the current configuration is used, \n" + + "\t\ttaken from the following, in increasing precedence: \n" + + "\t\t\tcore-default.xml inside the hadoop jar file \n" + + "\t\t\tcore-site.xml in $HADOOP_CONF_DIR \n" + + "\t\t'local' means use the local file system as your DFS. \n" + + "\t\t specifies a particular file system to \n" + + "\t\tcontact. This argument is optional but if used must appear\n" + + "\t\tappear first on the command line. Exactly one additional\n" + + "\t\targument must be specified. \n"; + + + String ls = "-ls : \tList the contents that match the specified file pattern. If\n" + + "\t\tpath is not specified, the contents of /user/\n" + + "\t\twill be listed. Directory entries are of the form \n" + + "\t\t\tdirName (full path) \n" + + "\t\tand file entries are of the form \n" + + "\t\t\tfileName(full path) size \n" + + "\t\twhere n is the number of replicas specified for the file \n" + + "\t\tand size is the size of the file, in bytes.\n"; + + String lsr = "-lsr : \tRecursively list the contents that match the specified\n" + + "\t\tfile pattern. Behaves very similarly to hadoop fs -ls,\n" + + "\t\texcept that the data is shown for all the entries in the\n" + + "\t\tsubtree.\n"; + + String df = "-df []: \tShows the capacity, free and used space of the filesystem.\n"+ + "\t\tIf the filesystem has multiple partitions, and no path to a particular partition\n"+ + "\t\tis specified, then the status of the root partitions will be shown.\n"; + + String du = "-du : \tShow the amount of space, in bytes, used by the files that \n" + + "\t\tmatch the specified file pattern. Equivalent to the unix\n" + + "\t\tcommand \"du -sb /*\" in case of a directory, \n" + + "\t\tand to \"du -b \" in case of a file.\n" + + "\t\tThe output is in the form \n" + + "\t\t\tname(full path) size (in bytes)\n"; + + String dus = "-dus : \tShow the amount of space, in bytes, used by the files that \n" + + "\t\tmatch the specified file pattern. Equivalent to the unix\n" + + "\t\tcommand \"du -sb\" The output is in the form \n" + + "\t\t\tname(full path) size (in bytes)\n"; + + String mv = "-mv : Move files that match the specified file pattern \n" + + "\t\tto a destination . When moving multiple files, the \n" + + "\t\tdestination must be a directory. \n"; + + String cp = "-cp : Copy files that match the file pattern to a \n" + + "\t\tdestination. When copying multiple files, the destination\n" + + "\t\tmust be a directory. \n"; + + String rm = "-rm : \tDelete all files that match the specified file pattern.\n" + + "\t\tEquivlent to the Unix command \"rm \"\n"; + + String rmr = "-rmr : \tRemove all directories which match the specified file \n" + + "\t\tpattern. Equivlent to the Unix command \"rm -rf \"\n"; + + String put = "-put ... : \tCopy files " + + "from the local file system \n\t\tinto fs. \n"; + + String copyFromLocal = "-copyFromLocal ... :" + + " Identical to the -put command.\n"; + + String moveFromLocal = "-moveFromLocal ... :" + + " Same as -put, except that the source is\n\t\tdeleted after it's copied.\n"; + + String get = GET_SHORT_USAGE + + ": Copy files that match the file pattern \n" + + "\t\tto the local name. is kept. When copying mutiple, \n" + + "\t\tfiles, the destination must be a directory. \n"; + + String getmerge = "-getmerge : Get all the files in the directories that \n" + + "\t\tmatch the source file pattern and merge and sort them to only\n" + + "\t\tone file on local fs. is kept.\n"; + + String cat = "-cat : \tFetch all files that match the file pattern \n" + + "\t\tand display their content on stdout.\n"; + + + String text = "-text : \tTakes a source file and outputs the file in text format.\n" + + "\t\tThe allowed formats are zip and TextRecordInputStream.\n"; + + + String copyToLocal = COPYTOLOCAL_SHORT_USAGE + + ": Identical to the -get command.\n"; + + String moveToLocal = "-moveToLocal : Not implemented yet \n"; + + String mkdir = "-mkdir : \tCreate a directory in specified location. \n"; + + String setrep = SETREP_SHORT_USAGE + + ": Set the replication level of a file. \n" + + "\t\tThe -R flag requests a recursive change of replication level \n" + + "\t\tfor an entire tree.\n"; + + String touchz = "-touchz : Write a timestamp in yyyy-MM-dd HH:mm:ss format\n" + + "\t\tin a file at . An error is returned if the file exists with non-zero length\n"; + + String test = "-test -[ezd] : If file { exists, has zero length, is a directory\n" + + "\t\tthen return 0, else return 1.\n"; + + String stat = "-stat [format] : Print statistics about the file/directory at \n" + + "\t\tin the specified format. Format accepts filesize in blocks (%b), filename (%n),\n" + + "\t\tblock size (%o), replication (%r), modification date (%y, %Y)\n"; + + String tail = TAIL_USAGE + + ": Show the last 1KB of the file. \n" + + "\t\tThe -f option shows apended data as the file grows. \n"; + + String chmod = FsShellPermissions.CHMOD_USAGE + "\n" + + "\t\tChanges permissions of a file.\n" + + "\t\tThis works similar to shell's chmod with a few exceptions.\n\n" + + "\t-R\tmodifies the files recursively. This is the only option\n" + + "\t\tcurrently supported.\n\n" + + "\tMODE\tMode is same as mode used for chmod shell command.\n" + + "\t\tOnly letters recognized are 'rwxXt'. E.g. +t,a+r,g-w,+rwx,o=r\n\n" + + "\tOCTALMODE Mode specifed in 3 or 4 digits. If 4 digits, the first may\n" + + "\tbe 1 or 0 to turn the sticky bit on or off, respectively. Unlike " + + "\tshell command, it is not possible to specify only part of the mode\n" + + "\t\tE.g. 754 is same as u=rwx,g=rx,o=r\n\n" + + "\t\tIf none of 'augo' is specified, 'a' is assumed and unlike\n" + + "\t\tshell command, no umask is applied.\n"; + + String chown = FsShellPermissions.CHOWN_USAGE + "\n" + + "\t\tChanges owner and group of a file.\n" + + "\t\tThis is similar to shell's chown with a few exceptions.\n\n" + + "\t-R\tmodifies the files recursively. This is the only option\n" + + "\t\tcurrently supported.\n\n" + + "\t\tIf only owner or group is specified then only owner or\n" + + "\t\tgroup is modified.\n\n" + + "\t\tThe owner and group names may only cosists of digits, alphabet,\n"+ + "\t\tand any of '-_.@/' i.e. [-_.@/a-zA-Z0-9]. The names are case\n" + + "\t\tsensitive.\n\n" + + "\t\tWARNING: Avoid using '.' to separate user name and group though\n" + + "\t\tLinux allows it. If user names have dots in them and you are\n" + + "\t\tusing local file system, you might see surprising results since\n" + + "\t\tshell command 'chown' is used for local files.\n"; + + String chgrp = FsShellPermissions.CHGRP_USAGE + "\n" + + "\t\tThis is equivalent to -chown ... :GROUP ...\n"; + + String help = "-help [cmd]: \tDisplays help for given command or all commands if none\n" + + "\t\tis specified.\n"; + + if ("fs".equals(cmd)) { + System.out.println(fs); + } else if ("conf".equals(cmd)) { + System.out.println(conf); + } else if ("D".equals(cmd)) { + System.out.println(D); + } else if ("ls".equals(cmd)) { + System.out.println(ls); + } else if ("lsr".equals(cmd)) { + System.out.println(lsr); + } else if ("df".equals(cmd)) { + System.out.println(df); + } else if ("du".equals(cmd)) { + System.out.println(du); + } else if ("dus".equals(cmd)) { + System.out.println(dus); + } else if ("rm".equals(cmd)) { + System.out.println(rm); + } else if ("rmr".equals(cmd)) { + System.out.println(rmr); + } else if ("mkdir".equals(cmd)) { + System.out.println(mkdir); + } else if ("mv".equals(cmd)) { + System.out.println(mv); + } else if ("cp".equals(cmd)) { + System.out.println(cp); + } else if ("put".equals(cmd)) { + System.out.println(put); + } else if ("copyFromLocal".equals(cmd)) { + System.out.println(copyFromLocal); + } else if ("moveFromLocal".equals(cmd)) { + System.out.println(moveFromLocal); + } else if ("get".equals(cmd)) { + System.out.println(get); + } else if ("getmerge".equals(cmd)) { + System.out.println(getmerge); + } else if ("copyToLocal".equals(cmd)) { + System.out.println(copyToLocal); + } else if ("moveToLocal".equals(cmd)) { + System.out.println(moveToLocal); + } else if ("cat".equals(cmd)) { + System.out.println(cat); + } else if ("get".equals(cmd)) { + System.out.println(get); + } else if ("setrep".equals(cmd)) { + System.out.println(setrep); + } else if ("touchz".equals(cmd)) { + System.out.println(touchz); + } else if ("test".equals(cmd)) { + System.out.println(test); + } else if ("text".equals(cmd)) { + System.out.println(text); + } else if ("stat".equals(cmd)) { + System.out.println(stat); + } else if ("tail".equals(cmd)) { + System.out.println(tail); + } else if ("chmod".equals(cmd)) { + System.out.println(chmod); + } else if ("chown".equals(cmd)) { + System.out.println(chown); + } else if ("chgrp".equals(cmd)) { + System.out.println(chgrp); + } else if (Count.matches(cmd)) { + System.out.println(Count.DESCRIPTION); + } else if ("help".equals(cmd)) { + System.out.println(help); + } else { + System.out.println(summary); + System.out.println(fs); + System.out.println(ls); + System.out.println(lsr); + System.out.println(df); + System.out.println(du); + System.out.println(dus); + System.out.println(mv); + System.out.println(cp); + System.out.println(rm); + System.out.println(rmr); + System.out.println(put); + System.out.println(copyFromLocal); + System.out.println(moveFromLocal); + System.out.println(get); + System.out.println(getmerge); + System.out.println(cat); + System.out.println(copyToLocal); + System.out.println(moveToLocal); + System.out.println(mkdir); + System.out.println(setrep); + System.out.println(tail); + System.out.println(touchz); + System.out.println(test); + System.out.println(text); + System.out.println(stat); + System.out.println(chmod); + System.out.println(chown); + System.out.println(chgrp); + System.out.println(Count.DESCRIPTION); + System.out.println(help); + } + + + } + + /** + * Apply operation specified by 'cmd' on all parameters + * starting from argv[startindex]. + */ + private int doall(String cmd, String argv[], int startindex) { + int exitCode = 0; + int i = startindex; + // + // for each source file, issue the command + // + for (; i < argv.length; i++) { + try { + // + // issue the command to the fs + // + if ("-cat".equals(cmd)) { + cat(argv[i], true); + } else if ("-mkdir".equals(cmd)) { + mkdir(argv[i]); + } else if ("-rm".equals(cmd)) { + delete(argv[i], false); + } else if ("-rmr".equals(cmd)) { + delete(argv[i], true); + } else if ("-df".equals(cmd)) { + df(argv[i]); + } else if ("-du".equals(cmd)) { + du(argv[i]); + } else if ("-dus".equals(cmd)) { + dus(argv[i]); + } else if (Count.matches(cmd)) { + new Count(argv, i, getConf()).runAll(); + } else if ("-ls".equals(cmd)) { + exitCode = ls(argv[i], false); + } else if ("-lsr".equals(cmd)) { + exitCode = ls(argv[i], true); + } else if ("-touchz".equals(cmd)) { + touchz(argv[i]); + } else if ("-text".equals(cmd)) { + text(argv[i]); + } + } catch (RemoteException e) { + // + // This is a error returned by hadoop server. Print + // out the first line of the error message. + // + exitCode = -1; + try { + String[] content; + content = e.getLocalizedMessage().split("\n"); + System.err.println(cmd.substring(1) + ": " + + content[0]); + } catch (Exception ex) { + System.err.println(cmd.substring(1) + ": " + + ex.getLocalizedMessage()); + } + } catch (IOException e) { + // + // IO exception encountered locally. + // + exitCode = -1; + String content = e.getLocalizedMessage(); + if (content != null) { + content = content.split("\n")[0]; + } + System.err.println(cmd.substring(1) + ": " + + content); + } + } + return exitCode; + } + + /** + * Displays format of commands. + * + */ + private static void printUsage(String cmd) { + String prefix = "Usage: java " + FsShell.class.getSimpleName(); + if ("-fs".equals(cmd)) { + System.err.println("Usage: java FsShell" + + " [-fs ]"); + } else if ("-conf".equals(cmd)) { + System.err.println("Usage: java FsShell" + + " [-conf ]"); + } else if ("-D".equals(cmd)) { + System.err.println("Usage: java FsShell" + + " [-D <[property=value>]"); + } else if ("-ls".equals(cmd) || "-lsr".equals(cmd) || + "-du".equals(cmd) || "-dus".equals(cmd) || + "-rm".equals(cmd) || "-rmr".equals(cmd) || + "-touchz".equals(cmd) || "-mkdir".equals(cmd) || + "-text".equals(cmd)) { + System.err.println("Usage: java FsShell" + + " [" + cmd + " ]"); + } else if ("-df".equals(cmd) ) { + System.err.println("Usage: java FsShell" + + " [" + cmd + " []]"); + } else if (Count.matches(cmd)) { + System.err.println(prefix + " [" + Count.USAGE + "]"); + } else if ("-mv".equals(cmd) || "-cp".equals(cmd)) { + System.err.println("Usage: java FsShell" + + " [" + cmd + " ]"); + } else if ("-put".equals(cmd) || "-copyFromLocal".equals(cmd) || + "-moveFromLocal".equals(cmd)) { + System.err.println("Usage: java FsShell" + + " [" + cmd + " ... ]"); + } else if ("-get".equals(cmd)) { + System.err.println("Usage: java FsShell [" + GET_SHORT_USAGE + "]"); + } else if ("-copyToLocal".equals(cmd)) { + System.err.println("Usage: java FsShell [" + COPYTOLOCAL_SHORT_USAGE+ "]"); + } else if ("-moveToLocal".equals(cmd)) { + System.err.println("Usage: java FsShell" + + " [" + cmd + " [-crc] ]"); + } else if ("-cat".equals(cmd)) { + System.err.println("Usage: java FsShell" + + " [" + cmd + " ]"); + } else if ("-setrep".equals(cmd)) { + System.err.println("Usage: java FsShell [" + SETREP_SHORT_USAGE + "]"); + } else if ("-test".equals(cmd)) { + System.err.println("Usage: java FsShell" + + " [-test -[ezd] ]"); + } else if ("-stat".equals(cmd)) { + System.err.println("Usage: java FsShell" + + " [-stat [format] ]"); + } else if ("-tail".equals(cmd)) { + System.err.println("Usage: java FsShell [" + TAIL_USAGE + "]"); + } else { + System.err.println("Usage: java FsShell"); + System.err.println(" [-ls ]"); + System.err.println(" [-lsr ]"); + System.err.println(" [-df []]"); + System.err.println(" [-du ]"); + System.err.println(" [-dus ]"); + System.err.println(" [" + Count.USAGE + "]"); + System.err.println(" [-mv ]"); + System.err.println(" [-cp ]"); + System.err.println(" [-rm ]"); + System.err.println(" [-rmr ]"); + System.err.println(" [-expunge]"); + System.err.println(" [-put ... ]"); + System.err.println(" [-copyFromLocal ... ]"); + System.err.println(" [-moveFromLocal ... ]"); + System.err.println(" [" + GET_SHORT_USAGE + "]"); + System.err.println(" [-getmerge [addnl]]"); + System.err.println(" [-cat ]"); + System.err.println(" [-text ]"); + System.err.println(" [" + COPYTOLOCAL_SHORT_USAGE + "]"); + System.err.println(" [-moveToLocal [-crc] ]"); + System.err.println(" [-mkdir ]"); + System.err.println(" [" + SETREP_SHORT_USAGE + "]"); + System.err.println(" [-touchz ]"); + System.err.println(" [-test -[ezd] ]"); + System.err.println(" [-stat [format] ]"); + System.err.println(" [" + TAIL_USAGE + "]"); + System.err.println(" [" + FsShellPermissions.CHMOD_USAGE + "]"); + System.err.println(" [" + FsShellPermissions.CHOWN_USAGE + "]"); + System.err.println(" [" + FsShellPermissions.CHGRP_USAGE + "]"); + System.err.println(" [-help [cmd]]"); + System.err.println(); + ToolRunner.printGenericCommandUsage(System.err); + } + } + + /** + * run + */ + public int run(String argv[]) throws Exception { + + if (argv.length < 1) { + printUsage(""); + return -1; + } + + int exitCode = -1; + int i = 0; + String cmd = argv[i++]; + // + // verify that we have enough command line parameters + // + if ("-put".equals(cmd) || "-test".equals(cmd) || + "-copyFromLocal".equals(cmd) || "-moveFromLocal".equals(cmd)) { + if (argv.length < 3) { + printUsage(cmd); + return exitCode; + } + } else if ("-get".equals(cmd) || + "-copyToLocal".equals(cmd) || "-moveToLocal".equals(cmd)) { + if (argv.length < 3) { + printUsage(cmd); + return exitCode; + } + } else if ("-mv".equals(cmd) || "-cp".equals(cmd)) { + if (argv.length < 3) { + printUsage(cmd); + return exitCode; + } + } else if ("-rm".equals(cmd) || "-rmr".equals(cmd) || + "-cat".equals(cmd) || "-mkdir".equals(cmd) || + "-touchz".equals(cmd) || "-stat".equals(cmd) || + "-text".equals(cmd)) { + if (argv.length < 2) { + printUsage(cmd); + return exitCode; + } + } + // initialize FsShell + try { + init(); + } catch (RPC.VersionMismatch v) { + System.err.println("Version Mismatch between client and server" + + "... command aborted."); + return exitCode; + } catch (IOException e) { + System.err.println("Bad connection to FS. command aborted."); + return exitCode; + } + + exitCode = 0; + try { + if ("-put".equals(cmd) || "-copyFromLocal".equals(cmd)) { + Path[] srcs = new Path[argv.length-2]; + for (int j=0 ; i < argv.length-1 ;) + srcs[j++] = new Path(argv[i++]); + copyFromLocal(srcs, argv[i++]); + } else if ("-moveFromLocal".equals(cmd)) { + Path[] srcs = new Path[argv.length-2]; + for (int j=0 ; i < argv.length-1 ;) + srcs[j++] = new Path(argv[i++]); + moveFromLocal(srcs, argv[i++]); + } else if ("-get".equals(cmd) || "-copyToLocal".equals(cmd)) { + copyToLocal(argv, i); + } else if ("-getmerge".equals(cmd)) { + if (argv.length>i+2) + copyMergeToLocal(argv[i++], new Path(argv[i++]), Boolean.parseBoolean(argv[i++])); + else + copyMergeToLocal(argv[i++], new Path(argv[i++])); + } else if ("-cat".equals(cmd)) { + exitCode = doall(cmd, argv, i); + } else if ("-text".equals(cmd)) { + exitCode = doall(cmd, argv, i); + } else if ("-moveToLocal".equals(cmd)) { + moveToLocal(argv[i++], new Path(argv[i++])); + } else if ("-setrep".equals(cmd)) { + setReplication(argv, i); + } else if ("-chmod".equals(cmd) || + "-chown".equals(cmd) || + "-chgrp".equals(cmd)) { + FsShellPermissions.changePermissions(fs, cmd, argv, i, this); + } else if ("-ls".equals(cmd)) { + if (i < argv.length) { + exitCode = doall(cmd, argv, i); + } else { + exitCode = ls(Path.CUR_DIR, false); + } + } else if ("-lsr".equals(cmd)) { + if (i < argv.length) { + exitCode = doall(cmd, argv, i); + } else { + exitCode = ls(Path.CUR_DIR, true); + } + } else if ("-mv".equals(cmd)) { + exitCode = rename(argv, getConf()); + } else if ("-cp".equals(cmd)) { + exitCode = copy(argv, getConf()); + } else if ("-rm".equals(cmd)) { + exitCode = doall(cmd, argv, i); + } else if ("-rmr".equals(cmd)) { + exitCode = doall(cmd, argv, i); + } else if ("-expunge".equals(cmd)) { + expunge(); + } else if ("-df".equals(cmd)) { + if (argv.length-1 > 0) { + exitCode = doall(cmd, argv, i); + } else { + df(null); + } + } else if ("-du".equals(cmd)) { + if (i < argv.length) { + exitCode = doall(cmd, argv, i); + } else { + du("."); + } + } else if ("-dus".equals(cmd)) { + if (i < argv.length) { + exitCode = doall(cmd, argv, i); + } else { + dus("."); + } + } else if (Count.matches(cmd)) { + exitCode = new Count(argv, i, getConf()).runAll(); + } else if ("-mkdir".equals(cmd)) { + exitCode = doall(cmd, argv, i); + } else if ("-touchz".equals(cmd)) { + exitCode = doall(cmd, argv, i); + } else if ("-test".equals(cmd)) { + exitCode = test(argv, i); + } else if ("-stat".equals(cmd)) { + if (i + 1 < argv.length) { + stat(argv[i++].toCharArray(), argv[i++]); + } else { + stat("%y".toCharArray(), argv[i]); + } + } else if ("-help".equals(cmd)) { + if (i < argv.length) { + printHelp(argv[i]); + } else { + printHelp(""); + } + } else if ("-tail".equals(cmd)) { + tail(argv, i); + } else { + exitCode = -1; + System.err.println(cmd.substring(1) + ": Unknown command"); + printUsage(""); + } + } catch (IllegalArgumentException arge) { + exitCode = -1; + System.err.println(cmd.substring(1) + ": " + arge.getLocalizedMessage()); + printUsage(cmd); + } catch (RemoteException e) { + // + // This is a error returned by hadoop server. Print + // out the first line of the error mesage, ignore the stack trace. + exitCode = -1; + try { + String[] content; + content = e.getLocalizedMessage().split("\n"); + System.err.println(cmd.substring(1) + ": " + + content[0]); + } catch (Exception ex) { + System.err.println(cmd.substring(1) + ": " + + ex.getLocalizedMessage()); + } + } catch (IOException e) { + // + // IO exception encountered locally. + // + exitCode = -1; + System.err.println(cmd.substring(1) + ": " + + e.getLocalizedMessage()); + } catch (Exception re) { + exitCode = -1; + System.err.println(cmd.substring(1) + ": " + re.getLocalizedMessage()); + } finally { + } + return exitCode; + } + + public void close() throws IOException { + if (fs != null) { + fs.close(); + fs = null; + } + } + + /** + * main() has some simple utility methods + */ + public static void main(String argv[]) throws Exception { + FsShell shell = new FsShell(); + int res; + try { + res = ToolRunner.run(shell, argv); + } finally { + shell.close(); + } + System.exit(res); + } + + /** + * Accumulate exceptions if there is any. Throw them at last. + */ + private abstract class DelayedExceptionThrowing { + abstract void process(Path p, FileSystem srcFs) throws IOException; + + final void globAndProcess(Path srcPattern, FileSystem srcFs + ) throws IOException { + List exceptions = new ArrayList(); + for(Path p : FileUtil.stat2Paths(srcFs.globStatus(srcPattern), + srcPattern)) + try { process(p, srcFs); } + catch(IOException ioe) { exceptions.add(ioe); } + + if (!exceptions.isEmpty()) + if (exceptions.size() == 1) + throw exceptions.get(0); + else + throw new IOException("Multiple IOExceptions: " + exceptions); + } + } +} diff --git a/src/java/org/apache/hadoop/fs/FsShellPermissions.java b/src/java/org/apache/hadoop/fs/FsShellPermissions.java new file mode 100644 index 00000000000..27997c7e7a8 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FsShellPermissions.java @@ -0,0 +1,315 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.hadoop.fs.FsShell.CmdHandler; +import org.apache.hadoop.fs.permission.FsPermission; + + +/** + * This class is the home for file permissions related commands. + * Moved to this separate class since FsShell is getting too large. + */ +class FsShellPermissions { + + /*========== chmod ==========*/ + + /* + * The pattern is almost as flexible as mode allowed by chmod shell command. + * The main restriction is that we recognize only rwxXt. To reduce errors we + * also enforce octal mode specifications of either 3 digits without a sticky + * bit setting or four digits with a sticky bit setting. + */ + private static Pattern chmodNormalPattern = + Pattern.compile("\\G\\s*([ugoa]*)([+=-]+)([rwxXt]+)([,\\s]*)\\s*"); + private static Pattern chmodOctalPattern = + Pattern.compile("^\\s*[+]?([01]?)([0-7]{3})\\s*$"); + + static String CHMOD_USAGE = + "-chmod [-R] PATH..."; + + private static class ChmodHandler extends CmdHandler { + + private short userMode; + private short groupMode; + private short othersMode; + private short stickyMode; + private char userType = '+'; + private char groupType = '+'; + private char othersType = '+'; + private char stickyBitType = '+'; + + private void applyNormalPattern(String modeStr, Matcher matcher) + throws IOException { + // Are there multiple permissions stored in one chmod? + boolean commaSeperated = false; + + for(int i=0; i < 1 || matcher.end() < modeStr.length(); i++) { + if (i>0 && (!commaSeperated || !matcher.find())) { + patternError(modeStr); + } + + /* groups : 1 : [ugoa]* + * 2 : [+-=] + * 3 : [rwxXt]+ + * 4 : [,\s]* + */ + + String str = matcher.group(2); + char type = str.charAt(str.length() - 1); + + boolean user, group, others, stickyBit; + user = group = others = stickyBit = false; + + for(char c : matcher.group(1).toCharArray()) { + switch (c) { + case 'u' : user = true; break; + case 'g' : group = true; break; + case 'o' : others = true; break; + case 'a' : break; + default : throw new RuntimeException("Unexpected"); + } + } + + if (!(user || group || others)) { // same as specifying 'a' + user = group = others = true; + } + + short mode = 0; + + for(char c : matcher.group(3).toCharArray()) { + switch (c) { + case 'r' : mode |= 4; break; + case 'w' : mode |= 2; break; + case 'x' : mode |= 1; break; + case 'X' : mode |= 8; break; + case 't' : stickyBit = true; break; + default : throw new RuntimeException("Unexpected"); + } + } + + if ( user ) { + userMode = mode; + userType = type; + } + + if ( group ) { + groupMode = mode; + groupType = type; + } + + if ( others ) { + othersMode = mode; + othersType = type; + + stickyMode = (short) (stickyBit ? 1 : 0); + stickyBitType = type; + } + + commaSeperated = matcher.group(4).contains(","); + } + } + + private void applyOctalPattern(String modeStr, Matcher matcher) { + userType = groupType = othersType = '='; + + // Check if sticky bit is specified + String sb = matcher.group(1); + if(!sb.isEmpty()) { + stickyMode = Short.valueOf(sb.substring(0, 1)); + stickyBitType = '='; + } + + String str = matcher.group(2); + userMode = Short.valueOf(str.substring(0, 1)); + groupMode = Short.valueOf(str.substring(1, 2)); + othersMode = Short.valueOf(str.substring(2, 3)); + } + + private void patternError(String mode) throws IOException { + throw new IOException("chmod : mode '" + mode + + "' does not match the expected pattern."); + } + + ChmodHandler(FileSystem fs, String modeStr) throws IOException { + super("chmod", fs); + Matcher matcher = null; + + if ((matcher = chmodNormalPattern.matcher(modeStr)).find()) { + applyNormalPattern(modeStr, matcher); + } else if ((matcher = chmodOctalPattern.matcher(modeStr)).matches()) { + applyOctalPattern(modeStr, matcher); + } else { + patternError(modeStr); + } + } + + private int applyChmod(char type, int mode, int existing, boolean exeOk) { + boolean capX = false; + + if ((mode&8) != 0) { // convert X to x; + capX = true; + mode &= ~8; + mode |= 1; + } + + switch (type) { + case '+' : mode = mode | existing; break; + case '-' : mode = (~mode) & existing; break; + case '=' : break; + default : throw new RuntimeException("Unexpected"); + } + + // if X is specified add 'x' only if exeOk or x was already set. + if (capX && !exeOk && (mode&1) != 0 && (existing&1) == 0) { + mode &= ~1; // remove x + } + + return mode; + } + + @Override + public void run(FileStatus file, FileSystem srcFs) throws IOException { + FsPermission perms = file.getPermission(); + int existing = perms.toShort(); + boolean exeOk = file.isDir() || (existing & 0111) != 0; + int newperms = ( applyChmod(stickyBitType, stickyMode, + (existing>>>9), false) << 9 | + applyChmod(userType, userMode, + (existing>>>6)&7, exeOk) << 6 | + applyChmod(groupType, groupMode, + (existing>>>3)&7, exeOk) << 3 | + applyChmod(othersType, othersMode, existing&7, exeOk)); + + if (existing != newperms) { + try { + srcFs.setPermission(file.getPath(), + new FsPermission((short)newperms)); + } catch (IOException e) { + System.err.println(getName() + ": changing permissions of '" + + file.getPath() + "':" + e.getMessage()); + } + } + } + } + + /*========== chown ==========*/ + + static private String allowedChars = "[-_./@a-zA-Z0-9]"; + ///allows only "allowedChars" above in names for owner and group + static private Pattern chownPattern = + Pattern.compile("^\\s*(" + allowedChars + "+)?" + + "([:](" + allowedChars + "*))?\\s*$"); + static private Pattern chgrpPattern = + Pattern.compile("^\\s*(" + allowedChars + "+)\\s*$"); + + static String CHOWN_USAGE = "-chown [-R] [OWNER][:[GROUP]] PATH..."; + static String CHGRP_USAGE = "-chgrp [-R] GROUP PATH..."; + + private static class ChownHandler extends CmdHandler { + protected String owner = null; + protected String group = null; + + protected ChownHandler(String cmd, FileSystem fs) { //for chgrp + super(cmd, fs); + } + + ChownHandler(FileSystem fs, String ownerStr) throws IOException { + super("chown", fs); + Matcher matcher = chownPattern.matcher(ownerStr); + if (!matcher.matches()) { + throw new IOException("'" + ownerStr + "' does not match " + + "expected pattern for [owner][:group]."); + } + owner = matcher.group(1); + group = matcher.group(3); + if (group != null && group.length() == 0) { + group = null; + } + if (owner == null && group == null) { + throw new IOException("'" + ownerStr + "' does not specify " + + " onwer or group."); + } + } + + @Override + public void run(FileStatus file, FileSystem srcFs) throws IOException { + //Should we do case insensitive match? + String newOwner = (owner == null || owner.equals(file.getOwner())) ? + null : owner; + String newGroup = (group == null || group.equals(file.getGroup())) ? + null : group; + + if (newOwner != null || newGroup != null) { + try { + srcFs.setOwner(file.getPath(), newOwner, newGroup); + } catch (IOException e) { + System.err.println(getName() + ": changing ownership of '" + + file.getPath() + "':" + e.getMessage()); + + } + } + } + } + + /*========== chgrp ==========*/ + + private static class ChgrpHandler extends ChownHandler { + ChgrpHandler(FileSystem fs, String groupStr) throws IOException { + super("chgrp", fs); + + Matcher matcher = chgrpPattern.matcher(groupStr); + if (!matcher.matches()) { + throw new IOException("'" + groupStr + "' does not match " + + "expected pattern for group"); + } + group = matcher.group(1); + } + } + + static void changePermissions(FileSystem fs, String cmd, + String argv[], int startIndex, FsShell shell) + throws IOException { + CmdHandler handler = null; + boolean recursive = false; + + // handle common arguments, currently only "-R" + for (; startIndex < argv.length && argv[startIndex].equals("-R"); + startIndex++) { + recursive = true; + } + + if ( startIndex >= argv.length ) { + throw new IOException("Not enough arguments for the command"); + } + + if (cmd.equals("-chmod")) { + handler = new ChmodHandler(fs, argv[startIndex++]); + } else if (cmd.equals("-chown")) { + handler = new ChownHandler(fs, argv[startIndex++]); + } else if (cmd.equals("-chgrp")) { + handler = new ChgrpHandler(fs, argv[startIndex++]); + } + + shell.runCmdHandler(handler, argv, startIndex, recursive); + } +} diff --git a/src/java/org/apache/hadoop/fs/FsStatus.java b/src/java/org/apache/hadoop/fs/FsStatus.java new file mode 100644 index 00000000000..0c7a5ac5747 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FsStatus.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.Writable; + +/** This class is used to represent the capacity, free and used space on a + * {@link FileSystem}. + */ +public class FsStatus implements Writable { + private long capacity; + private long used; + private long remaining; + + /** Construct a FsStatus object, using the specified statistics */ + public FsStatus(long capacity, long used, long remaining) { + this.capacity = capacity; + this.used = used; + this.remaining = remaining; + } + + /** Return the capacity in bytes of the file system */ + public long getCapacity() { + return capacity; + } + + /** Return the number of bytes used on the file system */ + public long getUsed() { + return used; + } + + /** Return the number of remaining bytes on the file system */ + public long getRemaining() { + return remaining; + } + + ////////////////////////////////////////////////// + // Writable + ////////////////////////////////////////////////// + public void write(DataOutput out) throws IOException { + out.writeLong(capacity); + out.writeLong(used); + out.writeLong(remaining); + } + + public void readFields(DataInput in) throws IOException { + capacity = in.readLong(); + used = in.readLong(); + remaining = in.readLong(); + } +} diff --git a/src/java/org/apache/hadoop/fs/FsUrlConnection.java b/src/java/org/apache/hadoop/fs/FsUrlConnection.java new file mode 100644 index 00000000000..c919b8b4047 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FsUrlConnection.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; + +import org.apache.hadoop.conf.Configuration; + +/** + * Representation of a URL connection to open InputStreams. + */ +class FsUrlConnection extends URLConnection { + + private Configuration conf; + + private InputStream is; + + FsUrlConnection(Configuration conf, URL url) { + super(url); + this.conf = conf; + } + + @Override + public void connect() throws IOException { + try { + FileSystem fs = FileSystem.get(url.toURI(), conf); + is = fs.open(new Path(url.getPath())); + } catch (URISyntaxException e) { + throw new IOException(e.toString()); + } + } + + /* @inheritDoc */ + @Override + public InputStream getInputStream() throws IOException { + if (is == null) { + connect(); + } + return is; + } + +} diff --git a/src/java/org/apache/hadoop/fs/FsUrlStreamHandler.java b/src/java/org/apache/hadoop/fs/FsUrlStreamHandler.java new file mode 100644 index 00000000000..37c6fcf4807 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FsUrlStreamHandler.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.IOException; +import java.net.URL; +import java.net.URLStreamHandler; + +import org.apache.hadoop.conf.Configuration; + +/** + * URLStream handler relying on FileSystem and on a given Configuration to + * handle URL protocols. + */ +class FsUrlStreamHandler extends URLStreamHandler { + + private Configuration conf; + + FsUrlStreamHandler(Configuration conf) { + this.conf = conf; + } + + FsUrlStreamHandler() { + this.conf = new Configuration(); + } + + @Override + protected FsUrlConnection openConnection(URL url) throws IOException { + return new FsUrlConnection(conf, url); + } + +} diff --git a/src/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java b/src/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java new file mode 100644 index 00000000000..624d7050b93 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.net.URLStreamHandlerFactory; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; + +/** + * Factory for URL stream handlers. + * + * There is only one handler whose job is to create UrlConnections. A + * FsUrlConnection relies on FileSystem to choose the appropriate FS + * implementation. + * + * Before returning our handler, we make sure that FileSystem knows an + * implementation for the requested scheme/protocol. + */ +public class FsUrlStreamHandlerFactory implements + URLStreamHandlerFactory { + + // The configuration holds supported FS implementation class names. + private Configuration conf; + + // This map stores whether a protocol is know or not by FileSystem + private Map protocols = new HashMap(); + + // The URL Stream handler + private java.net.URLStreamHandler handler; + + public FsUrlStreamHandlerFactory() { + this.conf = new Configuration(); + // force the resolution of the configuration files + // this is required if we want the factory to be able to handle + // file:// URLs + this.conf.getClass("fs.file.impl", null); + this.handler = new FsUrlStreamHandler(this.conf); + } + + public FsUrlStreamHandlerFactory(Configuration conf) { + this.conf = new Configuration(conf); + // force the resolution of the configuration files + this.conf.getClass("fs.file.impl", null); + this.handler = new FsUrlStreamHandler(this.conf); + } + + public java.net.URLStreamHandler createURLStreamHandler(String protocol) { + if (!protocols.containsKey(protocol)) { + boolean known = + (conf.getClass("fs." + protocol + ".impl", null) != null); + protocols.put(protocol, known); + } + if (protocols.get(protocol)) { + return handler; + } else { + // FileSystem does not know the protocol, let the VM handle this + return null; + } + } + +} diff --git a/src/java/org/apache/hadoop/fs/GlobExpander.java b/src/java/org/apache/hadoop/fs/GlobExpander.java new file mode 100644 index 00000000000..bc9b27674e0 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/GlobExpander.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +class GlobExpander { + + static class StringWithOffset { + String string; + int offset; + public StringWithOffset(String string, int offset) { + super(); + this.string = string; + this.offset = offset; + } + } + + /** + * Expand globs in the given filePattern into a collection of + * file patterns so that in the expanded set no file pattern has a + * slash character ("/") in a curly bracket pair. + * @param filePattern + * @return expanded file patterns + * @throws IOException + */ + public static List expand(String filePattern) throws IOException { + List fullyExpanded = new ArrayList(); + List toExpand = new ArrayList(); + toExpand.add(new StringWithOffset(filePattern, 0)); + while (!toExpand.isEmpty()) { + StringWithOffset path = toExpand.remove(0); + List expanded = expandLeftmost(path); + if (expanded == null) { + fullyExpanded.add(path.string); + } else { + toExpand.addAll(0, expanded); + } + } + return fullyExpanded; + } + + /** + * Expand the leftmost outer curly bracket pair containing a + * slash character ("/") in filePattern. + * @param filePattern + * @return expanded file patterns + * @throws IOException + */ + private static List expandLeftmost(StringWithOffset + filePatternWithOffset) throws IOException { + + String filePattern = filePatternWithOffset.string; + int leftmost = leftmostOuterCurlyContainingSlash(filePattern, + filePatternWithOffset.offset); + if (leftmost == -1) { + return null; + } + int curlyOpen = 0; + StringBuilder prefix = new StringBuilder(filePattern.substring(0, leftmost)); + StringBuilder suffix = new StringBuilder(); + List alts = new ArrayList(); + StringBuilder alt = new StringBuilder(); + StringBuilder cur = prefix; + for (int i = leftmost; i < filePattern.length(); i++) { + char c = filePattern.charAt(i); + if (cur == suffix) { + cur.append(c); + } else if (c == '\\') { + i++; + if (i >= filePattern.length()) { + throw new IOException("Illegal file pattern: " + + "An escaped character does not present for glob " + + filePattern + " at " + i); + } + c = filePattern.charAt(i); + cur.append(c); + } else if (c == '{') { + if (curlyOpen++ == 0) { + alt.setLength(0); + cur = alt; + } else { + cur.append(c); + } + + } else if (c == '}' && curlyOpen > 0) { + if (--curlyOpen == 0) { + alts.add(alt.toString()); + alt.setLength(0); + cur = suffix; + } else { + cur.append(c); + } + } else if (c == ',') { + if (curlyOpen == 1) { + alts.add(alt.toString()); + alt.setLength(0); + } else { + cur.append(c); + } + } else { + cur.append(c); + } + } + List exp = new ArrayList(); + for (String string : alts) { + exp.add(new StringWithOffset(prefix + string + suffix, prefix.length())); + } + return exp; + } + + /** + * Finds the index of the leftmost opening curly bracket containing a + * slash character ("/") in filePattern. + * @param filePattern + * @return the index of the leftmost opening curly bracket containing a + * slash character ("/"), or -1 if there is no such bracket + * @throws IOException + */ + private static int leftmostOuterCurlyContainingSlash(String filePattern, + int offset) throws IOException { + int curlyOpen = 0; + int leftmost = -1; + boolean seenSlash = false; + for (int i = offset; i < filePattern.length(); i++) { + char c = filePattern.charAt(i); + if (c == '\\') { + i++; + if (i >= filePattern.length()) { + throw new IOException("Illegal file pattern: " + + "An escaped character does not present for glob " + + filePattern + " at " + i); + } + } else if (c == '{') { + if (curlyOpen++ == 0) { + leftmost = i; + } + } else if (c == '}' && curlyOpen > 0) { + if (--curlyOpen == 0 && leftmost != -1 && seenSlash) { + return leftmost; + } + } else if (c == '/' && curlyOpen > 0) { + seenSlash = true; + } + } + return -1; + } + +} diff --git a/src/java/org/apache/hadoop/fs/HarFileSystem.java b/src/java/org/apache/hadoop/fs/HarFileSystem.java new file mode 100644 index 00000000000..bcec4b660f1 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/HarFileSystem.java @@ -0,0 +1,892 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.util.LineReader; +import org.apache.hadoop.util.Progressable; + +/** + * This is an implementation of the Hadoop Archive + * Filesystem. This archive Filesystem has index files + * of the form _index* and has contents of the form + * part-*. The index files store the indexes of the + * real files. The index files are of the form _masterindex + * and _index. The master index is a level of indirection + * in to the index file to make the look ups faster. the index + * file is sorted with hash code of the paths that it contains + * and the master index contains pointers to the positions in + * index for ranges of hashcodes. + */ + +public class HarFileSystem extends FilterFileSystem { + public static final int VERSION = 1; + // uri representation of this Har filesystem + private URI uri; + // the version of this har filesystem + private int version; + // underlying uri + private URI underLyingURI; + // the top level path of the archive + // in the underlying file system + private Path archivePath; + // the masterIndex of the archive + private Path masterIndex; + // the index file + private Path archiveIndex; + // the har auth + private String harAuth; + + /** + * public construction of harfilesystem + * + */ + public HarFileSystem() { + } + + /** + * Constructor to create a HarFileSystem with an + * underlying filesystem. + * @param fs + */ + public HarFileSystem(FileSystem fs) { + super(fs); + } + + /** + * Initialize a Har filesystem per har archive. The + * archive home directory is the top level directory + * in the filesystem that contains the HAR archive. + * Be careful with this method, you do not want to go + * on creating new Filesystem instances per call to + * path.getFileSystem(). + * the uri of Har is + * har://underlyingfsscheme-host:port/archivepath. + * or + * har:///archivepath. This assumes the underlying filesystem + * to be used in case not specified. + */ + public void initialize(URI name, Configuration conf) throws IOException { + //decode the name + underLyingURI = decodeHarURI(name, conf); + // we got the right har Path- now check if this is + //truly a har filesystem + Path harPath = archivePath(new Path(name.toString())); + if (harPath == null) { + throw new IOException("Invalid path for the Har Filesystem. " + + name.toString()); + } + if (fs == null) { + fs = FileSystem.get(underLyingURI, conf); + } + this.uri = harPath.toUri(); + this.archivePath = new Path(this.uri.getPath()); + this.harAuth = getHarAuth(this.underLyingURI); + //check for the underlying fs containing + // the index file + this.masterIndex = new Path(archivePath, "_masterindex"); + this.archiveIndex = new Path(archivePath, "_index"); + if (!fs.exists(masterIndex) || !fs.exists(archiveIndex)) { + throw new IOException("Invalid path for the Har Filesystem. " + + "No index file in " + harPath); + } + try{ + this.version = getHarVersion(); + } catch(IOException io) { + throw new IOException("Unable to " + + "read the version of the Har file system: " + this.archivePath); + } + if (this.version != HarFileSystem.VERSION) { + throw new IOException("Invalid version " + + this.version + " expected " + HarFileSystem.VERSION); + } + } + + // get the version of the filesystem from the masterindex file + // the version is currently not useful since its the first version + // of archives + public int getHarVersion() throws IOException { + FSDataInputStream masterIn = fs.open(masterIndex); + LineReader lmaster = new LineReader(masterIn, getConf()); + Text line = new Text(); + lmaster.readLine(line); + try { + masterIn.close(); + } catch(IOException e){ + //disregard it. + // its a read. + } + String versionLine = line.toString(); + String[] arr = versionLine.split(" "); + int version = Integer.parseInt(arr[0]); + return version; + } + + /* + * find the parent path that is the + * archive path in the path. The last + * path segment that ends with .har is + * the path that will be returned. + */ + private Path archivePath(Path p) { + Path retPath = null; + Path tmp = p; + for (int i=0; i< p.depth(); i++) { + if (tmp.toString().endsWith(".har")) { + retPath = tmp; + break; + } + tmp = tmp.getParent(); + } + return retPath; + } + + /** + * decode the raw URI to get the underlying URI + * @param rawURI raw Har URI + * @return filtered URI of the underlying fileSystem + */ + private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException { + String tmpAuth = rawURI.getAuthority(); + //we are using the default file + //system in the config + //so create a underlying uri and + //return it + if (tmpAuth == null) { + //create a path + return FileSystem.getDefaultUri(conf); + } + String host = rawURI.getHost(); + String[] str = host.split("-", 2); + if (str[0] == null) { + throw new IOException("URI: " + rawURI + " is an invalid Har URI."); + } + String underLyingScheme = str[0]; + String underLyingHost = (str.length > 1)? str[1]:null; + int underLyingPort = rawURI.getPort(); + String auth = (underLyingHost == null && underLyingPort == -1)? + null:(underLyingHost+":"+underLyingPort); + URI tmp = null; + if (rawURI.getQuery() != null) { + // query component not allowed + throw new IOException("query component in Path not supported " + rawURI); + } + try { + tmp = new URI(underLyingScheme, auth, rawURI.getPath(), + rawURI.getQuery(), rawURI.getFragment()); + } catch (URISyntaxException e) { + // do nothing should not happen + } + return tmp; + } + + /** + * return the top level archive. + */ + public Path getWorkingDirectory() { + return new Path(uri.toString()); + } + + /** + * Create a har specific auth + * har-underlyingfs:port + * @param underLyingURI the uri of underlying + * filesystem + * @return har specific auth + */ + private String getHarAuth(URI underLyingUri) { + String auth = underLyingUri.getScheme() + "-"; + if (underLyingUri.getHost() != null) { + auth += underLyingUri.getHost() + ":"; + if (underLyingUri.getPort() != -1) { + auth += underLyingUri.getPort(); + } + } + else { + auth += ":"; + } + return auth; + } + + /** + * Returns the uri of this filesystem. + * The uri is of the form + * har://underlyingfsschema-host:port/pathintheunderlyingfs + */ + @Override + public URI getUri() { + return this.uri; + } + + /** + * this method returns the path + * inside the har filesystem. + * this is relative path inside + * the har filesystem. + * @param path the fully qualified path in the har filesystem. + * @return relative path in the filesystem. + */ + private Path getPathInHar(Path path) { + Path harPath = new Path(path.toUri().getPath()); + if (archivePath.compareTo(harPath) == 0) + return new Path(Path.SEPARATOR); + Path tmp = new Path(harPath.getName()); + Path parent = harPath.getParent(); + while (!(parent.compareTo(archivePath) == 0)) { + if (parent.toString().equals(Path.SEPARATOR)) { + tmp = null; + break; + } + tmp = new Path(parent.getName(), tmp); + parent = parent.getParent(); + } + if (tmp != null) + tmp = new Path(Path.SEPARATOR, tmp); + return tmp; + } + + //the relative path of p. basically + // getting rid of /. Parsing and doing + // string manipulation is not good - so + // just use the path api to do it. + private Path makeRelative(String initial, Path p) { + Path root = new Path(Path.SEPARATOR); + if (root.compareTo(p) == 0) + return new Path(initial); + Path retPath = new Path(p.getName()); + Path parent = p.getParent(); + for (int i=0; i < p.depth()-1; i++) { + retPath = new Path(parent.getName(), retPath); + parent = parent.getParent(); + } + return new Path(initial, retPath.toString()); + } + + /* this makes a path qualified in the har filesystem + * (non-Javadoc) + * @see org.apache.hadoop.fs.FilterFileSystem#makeQualified( + * org.apache.hadoop.fs.Path) + */ + @Override + public Path makeQualified(Path path) { + // make sure that we just get the + // path component + Path fsPath = path; + if (!path.isAbsolute()) { + fsPath = new Path(archivePath, path); + } + + URI tmpURI = fsPath.toUri(); + fsPath = new Path(tmpURI.getPath()); + //change this to Har uri + URI tmp = null; + try { + tmp = new URI(uri.getScheme(), harAuth, fsPath.toString(), + tmpURI.getQuery(), tmpURI.getFragment()); + } catch(URISyntaxException ue) { + LOG.error("Error in URI ", ue); + } + if (tmp != null) { + return new Path(tmp.toString()); + } + return null; + } + + /** + * get block locations from the underlying fs + * @param file the input filestatus to get block locations + * @param start the start in the file + * @param len the length in the file + * @return block locations for this segment of file + * @throws IOException + */ + @Override + public BlockLocation[] getFileBlockLocations(FileStatus file, long start, + long len) throws IOException { + // need to look up the file in the underlying fs + // look up the index + + // make sure this is a prt of this har filesystem + Path p = makeQualified(file.getPath()); + Path harPath = getPathInHar(p); + String line = fileStatusInIndex(harPath); + if (line == null) { + throw new FileNotFoundException("File " + file.getPath() + " not found"); + } + HarStatus harStatus = new HarStatus(line); + if (harStatus.isDir()) { + return new BlockLocation[0]; + } + FileStatus fsFile = fs.getFileStatus(new Path(archivePath, + harStatus.getPartName())); + BlockLocation[] rawBlocks = fs.getFileBlockLocations(fsFile, + harStatus.getStartIndex() + start, len); + return fakeBlockLocations(rawBlocks, harStatus.getStartIndex()); + } + + /** + * fake the rawblocks since map reduce uses the block offsets to + * fo some computations regarding the blocks + * @param rawBlocks the raw blocks returned by the filesystem + * @return faked blocks with changed offsets. + */ + private BlockLocation[] fakeBlockLocations(BlockLocation[] rawBlocks, + long startIndex) { + for (BlockLocation block : rawBlocks) { + long rawOffset = block.getOffset(); + block.setOffset(rawOffset - startIndex); + } + return rawBlocks; + } + + /** + * the hash of the path p inside iniside + * the filesystem + * @param p the path in the harfilesystem + * @return the hash code of the path. + */ + public static int getHarHash(Path p) { + return (p.toString().hashCode() & 0x7fffffff); + } + + static class Store { + public Store() { + begin = end = startHash = endHash = 0; + } + public Store(long begin, long end, int startHash, int endHash) { + this.begin = begin; + this.end = end; + this.startHash = startHash; + this.endHash = endHash; + } + public long begin; + public long end; + public int startHash; + public int endHash; + } + + // make sure that this harPath is relative to the har filesystem + // this only works for relative paths. This returns the line matching + // the file in the index. Returns a null if there is not matching + // filename in the index file. + private String fileStatusInIndex(Path harPath) throws IOException { + // read the index file + int hashCode = getHarHash(harPath); + // get the master index to find the pos + // in the index file + FSDataInputStream in = fs.open(masterIndex); + FileStatus masterStat = fs.getFileStatus(masterIndex); + LineReader lin = new LineReader(in, getConf()); + Text line = new Text(); + long read = lin.readLine(line); + //ignore the first line. this is the header of the index files + String[] readStr = null; + List stores = new ArrayList(); + while(read < masterStat.getLen()) { + int b = lin.readLine(line); + read += b; + readStr = line.toString().split(" "); + int startHash = Integer.parseInt(readStr[0]); + int endHash = Integer.parseInt(readStr[1]); + if (startHash <= hashCode && hashCode <= endHash) { + stores.add(new Store(Long.parseLong(readStr[2]), + Long.parseLong(readStr[3]), startHash, + endHash)); + } + line.clear(); + } + try { + lin.close(); + } catch(IOException io){ + // do nothing just a read. + } + FSDataInputStream aIn = fs.open(archiveIndex); + LineReader aLin = new LineReader(aIn, getConf()); + String retStr = null; + // now start reading the real index file + read = 0; + for (Store s: stores) { + aIn.seek(s.begin); + while (read + s.begin < s.end) { + int tmp = aLin.readLine(line); + read += tmp; + String lineFeed = line.toString(); + String[] parsed = lineFeed.split(" "); + if (harPath.compareTo(new Path(parsed[0])) == 0) { + // bingo! + retStr = lineFeed; + break; + } + line.clear(); + } + if (retStr != null) + break; + } + try { + aIn.close(); + } catch(IOException io) { + //do nothing + } + return retStr; + } + + // a single line parser for hadoop archives status + // stored in a single line in the index files + // the format is of the form + // filename "dir"/"file" partFileName startIndex length + // + private static class HarStatus { + boolean isDir; + String name; + List children; + String partName; + long startIndex; + long length; + public HarStatus(String harString) { + String[] splits = harString.split(" "); + this.name = splits[0]; + this.isDir = "dir".equals(splits[1]) ? true: false; + // this is equal to "none" if its a directory + this.partName = splits[2]; + this.startIndex = Long.parseLong(splits[3]); + this.length = Long.parseLong(splits[4]); + if (isDir) { + children = new ArrayList(); + for (int i = 5; i < splits.length; i++) { + children.add(splits[i]); + } + } + } + public boolean isDir() { + return isDir; + } + + public String getName() { + return name; + } + + public List getChildren() { + return children; + } + public String getFileName() { + return name; + } + public String getPartName() { + return partName; + } + public long getStartIndex() { + return startIndex; + } + public long getLength() { + return length; + } + } + + /** + * return the filestatus of files in har archive. + * The permission returned are that of the archive + * index files. The permissions are not persisted + * while creating a hadoop archive. + * @param f the path in har filesystem + * @return filestatus. + * @throws IOException + */ + @Override + public FileStatus getFileStatus(Path f) throws IOException { + FileStatus archiveStatus = fs.getFileStatus(archiveIndex); + // get the fs DataInputStream for the underlying file + // look up the index. + Path p = makeQualified(f); + Path harPath = getPathInHar(p); + if (harPath == null) { + throw new IOException("Invalid file name: " + f + " in " + uri); + } + String readStr = fileStatusInIndex(harPath); + if (readStr == null) { + throw new FileNotFoundException("File: " + f + " does not exist in " + uri); + } + HarStatus hstatus = null; + hstatus = new HarStatus(readStr); + return new FileStatus(hstatus.isDir()?0:hstatus.getLength(), hstatus.isDir(), + (int)archiveStatus.getReplication(), archiveStatus.getBlockSize(), + archiveStatus.getModificationTime(), archiveStatus.getAccessTime(), + new FsPermission( + archiveStatus.getPermission()), archiveStatus.getOwner(), + archiveStatus.getGroup(), + makeRelative(this.uri.toString(), new Path(hstatus.name))); + } + + /** + * Returns a har input stream which fakes end of + * file. It reads the index files to get the part + * file name and the size and start of the file. + */ + @Override + public FSDataInputStream open(Path f, int bufferSize) throws IOException { + // get the fs DataInputStream for the underlying file + // look up the index. + Path p = makeQualified(f); + Path harPath = getPathInHar(p); + if (harPath == null) { + throw new IOException("Invalid file name: " + f + " in " + uri); + } + String readStr = fileStatusInIndex(harPath); + if (readStr == null) { + throw new FileNotFoundException(f + ": not found in " + archivePath); + } + HarStatus hstatus = new HarStatus(readStr); + // we got it.. woo hooo!!! + if (hstatus.isDir()) { + throw new FileNotFoundException(f + " : not a file in " + + archivePath); + } + return new HarFSDataInputStream(fs, new Path(archivePath, + hstatus.getPartName()), + hstatus.getStartIndex(), hstatus.getLength(), bufferSize); + } + + /* + * create throws an exception in Har filesystem. + * The archive once created cannot be changed. + */ + public FSDataOutputStream create(Path f, int bufferSize) + throws IOException { + throw new IOException("Har: Create not allowed"); + } + + public FSDataOutputStream create(Path f, + FsPermission permission, + boolean overwrite, + int bufferSize, + short replication, + long blockSize, + Progressable progress) throws IOException { + throw new IOException("Har: create not allowed."); + } + + @Override + public void close() throws IOException { + if (fs != null) { + try { + fs.close(); + } catch(IOException ie) { + //this might already be closed + // ignore + } + } + } + + /** + * Not implemented. + */ + @Override + public boolean setReplication(Path src, short replication) throws IOException{ + throw new IOException("Har: setreplication not allowed"); + } + + /** + * Not implemented. + */ + @Override + public boolean delete(Path f, boolean recursive) throws IOException { + throw new IOException("Har: delete not allowed"); + } + + /** + * liststatus returns the children of a directory + * after looking up the index files. + */ + @Override + public FileStatus[] listStatus(Path f) throws IOException { + //need to see if the file is an index in file + //get the filestatus of the archive directory + // we will create fake filestatuses to return + // to the client + List statuses = new ArrayList(); + FileStatus archiveStatus = fs.getFileStatus(archiveIndex); + Path tmpPath = makeQualified(f); + Path harPath = getPathInHar(tmpPath); + String readStr = fileStatusInIndex(harPath); + if (readStr == null) { + throw new FileNotFoundException("File " + f + " not found in " + archivePath); + } + HarStatus hstatus = new HarStatus(readStr); + if (!hstatus.isDir()) + statuses.add(new FileStatus(hstatus.getLength(), + hstatus.isDir(), + archiveStatus.getReplication(), archiveStatus.getBlockSize(), + archiveStatus.getModificationTime(), archiveStatus.getAccessTime(), + new FsPermission(archiveStatus.getPermission()), + archiveStatus.getOwner(), archiveStatus.getGroup(), + makeRelative(this.uri.toString(), new Path(hstatus.name)))); + else + for (String child: hstatus.children) { + FileStatus tmp = getFileStatus(new Path(tmpPath, child)); + statuses.add(tmp); + } + return statuses.toArray(new FileStatus[statuses.size()]); + } + + /** + * return the top level archive path. + */ + public Path getHomeDirectory() { + return new Path(uri.toString()); + } + + public void setWorkingDirectory(Path newDir) { + //does nothing. + } + + /** + * not implemented. + */ + public boolean mkdirs(Path f, FsPermission permission) throws IOException { + throw new IOException("Har: mkdirs not allowed"); + } + + /** + * not implemented. + */ + public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws + IOException { + throw new IOException("Har: copyfromlocalfile not allowed"); + } + + /** + * copies the file in the har filesystem to a local file. + */ + public void copyToLocalFile(boolean delSrc, Path src, Path dst) + throws IOException { + FileUtil.copy(this, src, getLocal(getConf()), dst, false, getConf()); + } + + /** + * not implemented. + */ + public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + throw new IOException("Har: startLocalOutput not allowed"); + } + + /** + * not implemented. + */ + public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + throw new IOException("Har: completeLocalOutput not allowed"); + } + + /** + * not implemented. + */ + public void setOwner(Path p, String username, String groupname) + throws IOException { + throw new IOException("Har: setowner not allowed"); + } + + /** + * Not implemented. + */ + public void setPermission(Path p, FsPermission permisssion) + throws IOException { + throw new IOException("Har: setPermission not allowed"); + } + + /** + * Hadoop archives input stream. This input stream fakes EOF + * since archive files are part of bigger part files. + */ + private static class HarFSDataInputStream extends FSDataInputStream { + /** + * Create an input stream that fakes all the reads/positions/seeking. + */ + private static class HarFsInputStream extends FSInputStream { + private long position, start, end; + //The underlying data input stream that the + // underlying filesystem will return. + private FSDataInputStream underLyingStream; + //one byte buffer + private byte[] oneBytebuff = new byte[1]; + HarFsInputStream(FileSystem fs, Path path, long start, + long length, int bufferSize) throws IOException { + underLyingStream = fs.open(path, bufferSize); + underLyingStream.seek(start); + // the start of this file in the part file + this.start = start; + // the position pointer in the part file + this.position = start; + // the end pointer in the part file + this.end = start + length; + } + + public synchronized int available() throws IOException { + long remaining = end - underLyingStream.getPos(); + if (remaining > (long)Integer.MAX_VALUE) { + return Integer.MAX_VALUE; + } + return (int) remaining; + } + + public synchronized void close() throws IOException { + underLyingStream.close(); + super.close(); + } + + //not implemented + @Override + public void mark(int readLimit) { + // do nothing + } + + /** + * reset is not implemented + */ + public void reset() throws IOException { + throw new IOException("reset not implemented."); + } + + public synchronized int read() throws IOException { + int ret = read(oneBytebuff, 0, 1); + return (ret <= 0) ? -1: (oneBytebuff[0] & 0xff); + } + + public synchronized int read(byte[] b) throws IOException { + int ret = read(b, 0, b.length); + if (ret != -1) { + position += ret; + } + return ret; + } + + /** + * + */ + public synchronized int read(byte[] b, int offset, int len) + throws IOException { + int newlen = len; + int ret = -1; + if (position + len > end) { + newlen = (int) (end - position); + } + // end case + if (newlen == 0) + return ret; + ret = underLyingStream.read(b, offset, newlen); + position += ret; + return ret; + } + + public synchronized long skip(long n) throws IOException { + long tmpN = n; + if (tmpN > 0) { + if (position + tmpN > end) { + tmpN = end - position; + } + underLyingStream.seek(tmpN + position); + position += tmpN; + return tmpN; + } + return (tmpN < 0)? -1 : 0; + } + + public synchronized long getPos() throws IOException { + return (position - start); + } + + public synchronized void seek(long pos) throws IOException { + if (pos < 0 || (start + pos > end)) { + throw new IOException("Failed to seek: EOF"); + } + position = start + pos; + underLyingStream.seek(position); + } + + public boolean seekToNewSource(long targetPos) throws IOException { + //do not need to implement this + // hdfs in itself does seektonewsource + // while reading. + return false; + } + + /** + * implementing position readable. + */ + public int read(long pos, byte[] b, int offset, int length) + throws IOException { + int nlength = length; + if (start + nlength + pos > end) { + nlength = (int) (end - (start + pos)); + } + return underLyingStream.read(pos + start , b, offset, nlength); + } + + /** + * position readable again. + */ + public void readFully(long pos, byte[] b, int offset, int length) + throws IOException { + if (start + length + pos > end) { + throw new IOException("Not enough bytes to read."); + } + underLyingStream.readFully(pos + start, b, offset, length); + } + + public void readFully(long pos, byte[] b) throws IOException { + readFully(pos, b, 0, b.length); + } + + } + + /** + * constructors for har input stream. + * @param fs the underlying filesystem + * @param p The path in the underlying filesystem + * @param start the start position in the part file + * @param length the length of valid data in the part file + * @param bufsize the buffer size + * @throws IOException + */ + public HarFSDataInputStream(FileSystem fs, Path p, long start, + long length, int bufsize) throws IOException { + super(new HarFsInputStream(fs, p, start, length, bufsize)); + } + + /** + * constructor for har input stream. + * @param fs the underlying filesystem + * @param p the path in the underlying file system + * @param start the start position in the part file + * @param length the length of valid data in the part file. + * @throws IOException + */ + public HarFSDataInputStream(FileSystem fs, Path p, long start, long length) + throws IOException { + super(new HarFsInputStream(fs, p, start, length, 0)); + } + } +} diff --git a/src/java/org/apache/hadoop/fs/LengthFileChecksum.java b/src/java/org/apache/hadoop/fs/LengthFileChecksum.java new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/java/org/apache/hadoop/fs/LocalDirAllocator.java b/src/java/org/apache/hadoop/fs/LocalDirAllocator.java new file mode 100644 index 00000000000..5d04d280da3 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/LocalDirAllocator.java @@ -0,0 +1,418 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.*; +import java.util.*; + +import org.apache.commons.logging.*; + +import org.apache.hadoop.util.*; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.DiskChecker.DiskErrorException; +import org.apache.hadoop.conf.Configuration; + +/** An implementation of a round-robin scheme for disk allocation for creating + * files. The way it works is that it is kept track what disk was last + * allocated for a file write. For the current request, the next disk from + * the set of disks would be allocated if the free space on the disk is + * sufficient enough to accomodate the file that is being considered for + * creation. If the space requirements cannot be met, the next disk in order + * would be tried and so on till a disk is found with sufficient capacity. + * Once a disk with sufficient space is identified, a check is done to make + * sure that the disk is writable. Also, there is an API provided that doesn't + * take the space requirements into consideration but just checks whether the + * disk under consideration is writable (this should be used for cases where + * the file size is not known apriori). An API is provided to read a path that + * was created earlier. That API works by doing a scan of all the disks for the + * input pathname. + * This implementation also provides the functionality of having multiple + * allocators per JVM (one for each unique functionality or context, like + * mapred, dfs-client, etc.). It ensures that there is only one instance of + * an allocator per context per JVM. + * Note: + * 1. The contexts referred above are actually the configuration items defined + * in the Configuration class like "mapred.local.dir" (for which we want to + * control the dir allocations). The context-strings are exactly those + * configuration items. + * 2. This implementation does not take into consideration cases where + * a disk becomes read-only or goes out of space while a file is being written + * to (disks are shared between multiple processes, and so the latter situation + * is probable). + * 3. In the class implementation, "Disk" is referred to as "Dir", which + * actually points to the configured directory on the Disk which will be the + * parent for all file write/read allocations. + */ +public class LocalDirAllocator { + + //A Map from the config item names like "mapred.local.dir", + //"dfs.client.buffer.dir" to the instance of the AllocatorPerContext. This + //is a static object to make sure there exists exactly one instance per JVM + private static Map contexts = + new TreeMap(); + private String contextCfgItemName; + + /**Create an allocator object + * @param contextCfgItemName + */ + public LocalDirAllocator(String contextCfgItemName) { + this.contextCfgItemName = contextCfgItemName; + } + + /** This method must be used to obtain the dir allocation context for a + * particular value of the context name. The context name must be an item + * defined in the Configuration object for which we want to control the + * dir allocations (e.g., mapred.local.dir). The method will + * create a context for that name if it doesn't already exist. + */ + private AllocatorPerContext obtainContext(String contextCfgItemName) { + synchronized (contexts) { + AllocatorPerContext l = contexts.get(contextCfgItemName); + if (l == null) { + contexts.put(contextCfgItemName, + (l = new AllocatorPerContext(contextCfgItemName))); + } + return l; + } + } + + /** Get a path from the local FS. This method should be used if the size of + * the file is not known apriori. We go round-robin over the set of disks + * (via the configured dirs) and return the first complete path where + * we could create the parent directory of the passed path. + * @param pathStr the requested path (this will be created on the first + * available disk) + * @param conf the Configuration object + * @return the complete path to the file on a local disk + * @throws IOException + */ + public Path getLocalPathForWrite(String pathStr, + Configuration conf) throws IOException { + return getLocalPathForWrite(pathStr, -1, conf); + } + + /** Get a path from the local FS. Pass size as -1 if not known apriori. We + * round-robin over the set of disks (via the configured dirs) and return + * the first complete path which has enough space + * @param pathStr the requested path (this will be created on the first + * available disk) + * @param size the size of the file that is going to be written + * @param conf the Configuration object + * @return the complete path to the file on a local disk + * @throws IOException + */ + public Path getLocalPathForWrite(String pathStr, long size, + Configuration conf) throws IOException { + AllocatorPerContext context = obtainContext(contextCfgItemName); + return context.getLocalPathForWrite(pathStr, size, conf); + } + + /** Get a path from the local FS for reading. We search through all the + * configured dirs for the file's existence and return the complete + * path to the file when we find one + * @param pathStr the requested file (this will be searched) + * @param conf the Configuration object + * @return the complete path to the file on a local disk + * @throws IOException + */ + public Path getLocalPathToRead(String pathStr, + Configuration conf) throws IOException { + AllocatorPerContext context = obtainContext(contextCfgItemName); + return context.getLocalPathToRead(pathStr, conf); + } + + /** Creates a temporary file in the local FS. Pass size as -1 if not known + * apriori. We round-robin over the set of disks (via the configured dirs) + * and select the first complete path which has enough space. A file is + * created on this directory. The file is guaranteed to go away when the + * JVM exits. + * @param pathStr prefix for the temporary file + * @param size the size of the file that is going to be written + * @param conf the Configuration object + * @return a unique temporary file + * @throws IOException + */ + public File createTmpFileForWrite(String pathStr, long size, + Configuration conf) throws IOException { + AllocatorPerContext context = obtainContext(contextCfgItemName); + return context.createTmpFileForWrite(pathStr, size, conf); + } + + /** Method to check whether a context is valid + * @param contextCfgItemName + * @return true/false + */ + public static boolean isContextValid(String contextCfgItemName) { + synchronized (contexts) { + return contexts.containsKey(contextCfgItemName); + } + } + + /** We search through all the configured dirs for the file's existence + * and return true when we find + * @param pathStr the requested file (this will be searched) + * @param conf the Configuration object + * @return true if files exist. false otherwise + * @throws IOException + */ + public boolean ifExists(String pathStr,Configuration conf) { + AllocatorPerContext context = obtainContext(contextCfgItemName); + return context.ifExists(pathStr, conf); + } + + /** + * Get the current directory index for the given configuration item. + * @return the current directory index for the given configuration item. + */ + int getCurrentDirectoryIndex() { + AllocatorPerContext context = obtainContext(contextCfgItemName); + return context.getCurrentDirectoryIndex(); + } + + private static class AllocatorPerContext { + + private final Log LOG = + LogFactory.getLog(AllocatorPerContext.class); + + private int dirNumLastAccessed; + private Random dirIndexRandomizer = new Random(); + private FileSystem localFS; + private DF[] dirDF; + private String contextCfgItemName; + private String[] localDirs; + private String savedLocalDirs = ""; + + public AllocatorPerContext(String contextCfgItemName) { + this.contextCfgItemName = contextCfgItemName; + } + + /** This method gets called everytime before any read/write to make sure + * that any change to localDirs is reflected immediately. + */ + private void confChanged(Configuration conf) throws IOException { + String newLocalDirs = conf.get(contextCfgItemName); + if (!newLocalDirs.equals(savedLocalDirs)) { + localDirs = conf.getStrings(contextCfgItemName); + localFS = FileSystem.getLocal(conf); + int numDirs = localDirs.length; + ArrayList dirs = new ArrayList(numDirs); + ArrayList dfList = new ArrayList(numDirs); + for (int i = 0; i < numDirs; i++) { + try { + // filter problematic directories + Path tmpDir = new Path(localDirs[i]); + if(localFS.mkdirs(tmpDir)|| localFS.exists(tmpDir)) { + try { + DiskChecker.checkDir(new File(localDirs[i])); + dirs.add(localDirs[i]); + dfList.add(new DF(new File(localDirs[i]), 30000)); + } catch (DiskErrorException de) { + LOG.warn( localDirs[i] + "is not writable\n" + + StringUtils.stringifyException(de)); + } + } else { + LOG.warn( "Failed to create " + localDirs[i]); + } + } catch (IOException ie) { + LOG.warn( "Failed to create " + localDirs[i] + ": " + + ie.getMessage() + "\n" + StringUtils.stringifyException(ie)); + } //ignore + } + localDirs = dirs.toArray(new String[dirs.size()]); + dirDF = dfList.toArray(new DF[dirs.size()]); + savedLocalDirs = newLocalDirs; + + // randomize the first disk picked in the round-robin selection + dirNumLastAccessed = dirIndexRandomizer.nextInt(dirs.size()); + } + } + + private Path createPath(String path) throws IOException { + Path file = new Path(new Path(localDirs[dirNumLastAccessed]), + path); + //check whether we are able to create a directory here. If the disk + //happens to be RDONLY we will fail + try { + DiskChecker.checkDir(new File(file.getParent().toUri().getPath())); + return file; + } catch (DiskErrorException d) { + LOG.warn(StringUtils.stringifyException(d)); + return null; + } + } + + /** + * Get the current directory index. + * @return the current directory index. + */ + int getCurrentDirectoryIndex() { + return dirNumLastAccessed; + } + + /** Get a path from the local FS. This method should be used if the size of + * the file is not known a priori. + * + * It will use roulette selection, picking directories + * with probability proportional to their available space. + */ + public synchronized Path getLocalPathForWrite(String path, + Configuration conf) throws IOException { + return getLocalPathForWrite(path, -1, conf); + } + + /** Get a path from the local FS. If size is known, we go + * round-robin over the set of disks (via the configured dirs) and return + * the first complete path which has enough space. + * + * If size is not known, use roulette selection -- pick directories + * with probability proportional to their available space. + */ + public synchronized Path getLocalPathForWrite(String pathStr, long size, + Configuration conf) throws IOException { + confChanged(conf); + int numDirs = localDirs.length; + int numDirsSearched = 0; + //remove the leading slash from the path (to make sure that the uri + //resolution results in a valid path on the dir being checked) + if (pathStr.startsWith("/")) { + pathStr = pathStr.substring(1); + } + Path returnPath = null; + + if(size == -1) { //do roulette selection: pick dir with probability + //proportional to available size + long[] availableOnDisk = new long[dirDF.length]; + long totalAvailable = 0; + + //build the "roulette wheel" + for(int i =0; i < dirDF.length; ++i) { + availableOnDisk[i] = dirDF[i].getAvailable(); + totalAvailable += availableOnDisk[i]; + } + + // Keep rolling the wheel till we get a valid path + Random r = new java.util.Random(); + while (numDirsSearched < numDirs && returnPath == null) { + long randomPosition = Math.abs(r.nextLong()) % totalAvailable; + int dir = 0; + while (randomPosition > availableOnDisk[dir]) { + randomPosition -= availableOnDisk[dir]; + dir++; + } + dirNumLastAccessed = dir; + returnPath = createPath(pathStr); + if (returnPath == null) { + totalAvailable -= availableOnDisk[dir]; + availableOnDisk[dir] = 0; // skip this disk + numDirsSearched++; + } + } + } else { + while (numDirsSearched < numDirs && returnPath == null) { + long capacity = dirDF[dirNumLastAccessed].getAvailable(); + if (capacity > size) { + returnPath = createPath(pathStr); + } + dirNumLastAccessed++; + dirNumLastAccessed = dirNumLastAccessed % numDirs; + numDirsSearched++; + } + } + if (returnPath != null) { + return returnPath; + } + + //no path found + throw new DiskErrorException("Could not find any valid local " + + "directory for " + pathStr); + } + + /** Creates a file on the local FS. Pass size as -1 if not known apriori. We + * round-robin over the set of disks (via the configured dirs) and return + * a file on the first path which has enough space. The file is guaranteed + * to go away when the JVM exits. + */ + public File createTmpFileForWrite(String pathStr, long size, + Configuration conf) throws IOException { + + // find an appropriate directory + Path path = getLocalPathForWrite(pathStr, size, conf); + File dir = new File(path.getParent().toUri().getPath()); + String prefix = path.getName(); + + // create a temp file on this directory + File result = File.createTempFile(prefix, null, dir); + result.deleteOnExit(); + return result; + } + + /** Get a path from the local FS for reading. We search through all the + * configured dirs for the file's existence and return the complete + * path to the file when we find one + */ + public synchronized Path getLocalPathToRead(String pathStr, + Configuration conf) throws IOException { + confChanged(conf); + int numDirs = localDirs.length; + int numDirsSearched = 0; + //remove the leading slash from the path (to make sure that the uri + //resolution results in a valid path on the dir being checked) + if (pathStr.startsWith("/")) { + pathStr = pathStr.substring(1); + } + while (numDirsSearched < numDirs) { + Path file = new Path(localDirs[numDirsSearched], pathStr); + if (localFS.exists(file)) { + return file; + } + numDirsSearched++; + } + + //no path found + throw new DiskErrorException ("Could not find " + pathStr +" in any of" + + " the configured local directories"); + } + + /** We search through all the configured dirs for the file's existence + * and return true when we find one + */ + public synchronized boolean ifExists(String pathStr,Configuration conf) { + try { + int numDirs = localDirs.length; + int numDirsSearched = 0; + //remove the leading slash from the path (to make sure that the uri + //resolution results in a valid path on the dir being checked) + if (pathStr.startsWith("/")) { + pathStr = pathStr.substring(1); + } + while (numDirsSearched < numDirs) { + Path file = new Path(localDirs[numDirsSearched], pathStr); + if (localFS.exists(file)) { + return true; + } + numDirsSearched++; + } + } catch (IOException e) { + // IGNORE and try again + } + return false; + } + } +} diff --git a/src/java/org/apache/hadoop/fs/LocalFileSystem.java b/src/java/org/apache/hadoop/fs/LocalFileSystem.java new file mode 100644 index 00000000000..199c773f5e4 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/LocalFileSystem.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.*; +import java.net.URI; +import java.util.*; + +/**************************************************************** + * Implement the FileSystem API for the checksumed local filesystem. + * + *****************************************************************/ +public class LocalFileSystem extends ChecksumFileSystem { + static final URI NAME = URI.create("file:///"); + static private Random rand = new Random(); + FileSystem rfs; + + public LocalFileSystem() { + this(new RawLocalFileSystem()); + } + + public FileSystem getRaw() { + return rfs; + } + + public LocalFileSystem(FileSystem rawLocalFileSystem) { + super(rawLocalFileSystem); + rfs = rawLocalFileSystem; + } + + /** Convert a path to a File. */ + public File pathToFile(Path path) { + return ((RawLocalFileSystem)fs).pathToFile(path); + } + + @Override + public void copyFromLocalFile(boolean delSrc, Path src, Path dst) + throws IOException { + FileUtil.copy(this, src, this, dst, delSrc, getConf()); + } + + @Override + public void copyToLocalFile(boolean delSrc, Path src, Path dst) + throws IOException { + FileUtil.copy(this, src, this, dst, delSrc, getConf()); + } + + /** + * Moves files to a bad file directory on the same device, so that their + * storage will not be reused. + */ + public boolean reportChecksumFailure(Path p, FSDataInputStream in, + long inPos, + FSDataInputStream sums, long sumsPos) { + try { + // canonicalize f + File f = ((RawLocalFileSystem)fs).pathToFile(p).getCanonicalFile(); + + // find highest writable parent dir of f on the same device + String device = new DF(f, getConf()).getMount(); + File parent = f.getParentFile(); + File dir = null; + while (parent!=null && parent.canWrite() && parent.toString().startsWith(device)) { + dir = parent; + parent = parent.getParentFile(); + } + + if (dir==null) { + throw new IOException( + "not able to find the highest writable parent dir"); + } + + // move the file there + File badDir = new File(dir, "bad_files"); + if (!badDir.mkdirs()) { + if (!badDir.isDirectory()) { + throw new IOException("Mkdirs failed to create " + badDir.toString()); + } + } + String suffix = "." + rand.nextInt(); + File badFile = new File(badDir, f.getName()+suffix); + LOG.warn("Moving bad file " + f + " to " + badFile); + in.close(); // close it first + boolean b = f.renameTo(badFile); // rename it + if (!b) { + LOG.warn("Ignoring failure of renameTo"); + } + // move checksum file too + File checkFile = ((RawLocalFileSystem)fs).pathToFile(getChecksumFile(p)); + b = checkFile.renameTo(new File(badDir, checkFile.getName()+suffix)); + if (!b) { + LOG.warn("Ignoring failure of renameTo"); + } + } catch (IOException e) { + LOG.warn("Error moving bad file " + p + ": " + e); + } + return false; + } +} diff --git a/src/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java b/src/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java new file mode 100644 index 00000000000..c20b3d31d5d --- /dev/null +++ b/src/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.io.WritableUtils; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.znerd.xmlenc.XMLOutputter; + +/** MD5 of MD5 of CRC32. */ +public class MD5MD5CRC32FileChecksum extends FileChecksum { + public static final int LENGTH = MD5Hash.MD5_LEN + + (Integer.SIZE + Long.SIZE)/Byte.SIZE; + + private int bytesPerCRC; + private long crcPerBlock; + private MD5Hash md5; + + /** Same as this(0, 0, null) */ + public MD5MD5CRC32FileChecksum() { + this(0, 0, null); + } + + /** Create a MD5FileChecksum */ + public MD5MD5CRC32FileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) { + this.bytesPerCRC = bytesPerCRC; + this.crcPerBlock = crcPerBlock; + this.md5 = md5; + } + + /** {@inheritDoc} */ + public String getAlgorithmName() { + return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32"; + } + + /** {@inheritDoc} */ + public int getLength() {return LENGTH;} + + /** {@inheritDoc} */ + public byte[] getBytes() { + return WritableUtils.toByteArray(this); + } + + /** {@inheritDoc} */ + public void readFields(DataInput in) throws IOException { + bytesPerCRC = in.readInt(); + crcPerBlock = in.readLong(); + md5 = MD5Hash.read(in); + } + + /** {@inheritDoc} */ + public void write(DataOutput out) throws IOException { + out.writeInt(bytesPerCRC); + out.writeLong(crcPerBlock); + md5.write(out); + } + + /** Write that object to xml output. */ + public static void write(XMLOutputter xml, MD5MD5CRC32FileChecksum that + ) throws IOException { + xml.startTag(MD5MD5CRC32FileChecksum.class.getName()); + if (that != null) { + xml.attribute("bytesPerCRC", "" + that.bytesPerCRC); + xml.attribute("crcPerBlock", "" + that.crcPerBlock); + xml.attribute("md5", "" + that.md5); + } + xml.endTag(); + } + + /** Return the object represented in the attributes. */ + public static MD5MD5CRC32FileChecksum valueOf(Attributes attrs + ) throws SAXException { + final String bytesPerCRC = attrs.getValue("bytesPerCRC"); + final String crcPerBlock = attrs.getValue("crcPerBlock"); + final String md5 = attrs.getValue("md5"); + if (bytesPerCRC == null || crcPerBlock == null || md5 == null) { + return null; + } + + try { + return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC), + Integer.valueOf(crcPerBlock), new MD5Hash(md5)); + } catch(Exception e) { + throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC + + ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e); + } + } + + /** {@inheritDoc} */ + public String toString() { + return getAlgorithmName() + ":" + md5; + } +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/fs/Path.java b/src/java/org/apache/hadoop/fs/Path.java new file mode 100644 index 00000000000..cf96bf24515 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/Path.java @@ -0,0 +1,298 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.net.*; +import java.io.*; + +import org.apache.hadoop.conf.Configuration; + +/** Names a file or directory in a {@link FileSystem}. + * Path strings use slash as the directory separator. A path string is + * absolute if it begins with a slash. + */ +public class Path implements Comparable { + + /** The directory separator, a slash. */ + public static final String SEPARATOR = "/"; + public static final char SEPARATOR_CHAR = '/'; + + public static final String CUR_DIR = "."; + + static final boolean WINDOWS + = System.getProperty("os.name").startsWith("Windows"); + + private URI uri; // a hierarchical uri + + /** Resolve a child path against a parent path. */ + public Path(String parent, String child) { + this(new Path(parent), new Path(child)); + } + + /** Resolve a child path against a parent path. */ + public Path(Path parent, String child) { + this(parent, new Path(child)); + } + + /** Resolve a child path against a parent path. */ + public Path(String parent, Path child) { + this(new Path(parent), child); + } + + /** Resolve a child path against a parent path. */ + public Path(Path parent, Path child) { + // Add a slash to parent's path so resolution is compatible with URI's + URI parentUri = parent.uri; + String parentPath = parentUri.getPath(); + if (!(parentPath.equals("/") || parentPath.equals(""))) + try { + parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), + parentUri.getPath()+"/", null, null); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + URI resolved = parentUri.resolve(child.uri); + initialize(resolved.getScheme(), resolved.getAuthority(), + normalizePath(resolved.getPath())); + } + + private void checkPathArg( String path ) { + // disallow construction of a Path from an empty string + if ( path == null ) { + throw new IllegalArgumentException( + "Can not create a Path from a null string"); + } + if( path.length() == 0 ) { + throw new IllegalArgumentException( + "Can not create a Path from an empty string"); + } + } + + /** Construct a path from a String. Path strings are URIs, but with + * unescaped elements and some additional normalization. */ + public Path(String pathString) { + checkPathArg( pathString ); + + // We can't use 'new URI(String)' directly, since it assumes things are + // escaped, which we don't require of Paths. + + // add a slash in front of paths with Windows drive letters + if (hasWindowsDrive(pathString, false)) + pathString = "/"+pathString; + + // parse uri components + String scheme = null; + String authority = null; + + int start = 0; + + // parse uri scheme, if any + int colon = pathString.indexOf(':'); + int slash = pathString.indexOf('/'); + if ((colon != -1) && + ((slash == -1) || (colon < slash))) { // has a scheme + scheme = pathString.substring(0, colon); + start = colon+1; + } + + // parse uri authority, if any + if (pathString.startsWith("//", start) && + (pathString.length()-start > 2)) { // has authority + int nextSlash = pathString.indexOf('/', start+2); + int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); + authority = pathString.substring(start+2, authEnd); + start = authEnd; + } + + // uri path is the rest of the string -- query & fragment not supported + String path = pathString.substring(start, pathString.length()); + + initialize(scheme, authority, path); + } + + /** Construct a Path from components. */ + public Path(String scheme, String authority, String path) { + checkPathArg( path ); + initialize(scheme, authority, path); + } + + private void initialize(String scheme, String authority, String path) { + try { + this.uri = new URI(scheme, authority, normalizePath(path), null, null) + .normalize(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + private String normalizePath(String path) { + // remove double slashes & backslashes + path = path.replace("//", "/"); + path = path.replace("\\", "/"); + + // trim trailing slash from non-root path (ignoring windows drive) + int minLength = hasWindowsDrive(path, true) ? 4 : 1; + if (path.length() > minLength && path.endsWith("/")) { + path = path.substring(0, path.length()-1); + } + + return path; + } + + private boolean hasWindowsDrive(String path, boolean slashed) { + if (!WINDOWS) return false; + int start = slashed ? 1 : 0; + return + path.length() >= start+2 && + (slashed ? path.charAt(0) == '/' : true) && + path.charAt(start+1) == ':' && + ((path.charAt(start) >= 'A' && path.charAt(start) <= 'Z') || + (path.charAt(start) >= 'a' && path.charAt(start) <= 'z')); + } + + + /** Convert this to a URI. */ + public URI toUri() { return uri; } + + /** Return the FileSystem that owns this Path. */ + public FileSystem getFileSystem(Configuration conf) throws IOException { + return FileSystem.get(this.toUri(), conf); + } + + /** True if the directory of this path is absolute. */ + public boolean isAbsolute() { + int start = hasWindowsDrive(uri.getPath(), true) ? 3 : 0; + return uri.getPath().startsWith(SEPARATOR, start); + } + + /** Returns the final component of this path.*/ + public String getName() { + String path = uri.getPath(); + int slash = path.lastIndexOf(SEPARATOR); + return path.substring(slash+1); + } + + /** Returns the parent of a path or null if at root. */ + public Path getParent() { + String path = uri.getPath(); + int lastSlash = path.lastIndexOf('/'); + int start = hasWindowsDrive(path, true) ? 3 : 0; + if ((path.length() == start) || // empty path + (lastSlash == start && path.length() == start+1)) { // at root + return null; + } + String parent; + if (lastSlash==-1) { + parent = CUR_DIR; + } else { + int end = hasWindowsDrive(path, true) ? 3 : 0; + parent = path.substring(0, lastSlash==end?end+1:lastSlash); + } + return new Path(uri.getScheme(), uri.getAuthority(), parent); + } + + /** Adds a suffix to the final name in the path.*/ + public Path suffix(String suffix) { + return new Path(getParent(), getName()+suffix); + } + + public String toString() { + // we can't use uri.toString(), which escapes everything, because we want + // illegal characters unescaped in the string, for glob processing, etc. + StringBuffer buffer = new StringBuffer(); + if (uri.getScheme() != null) { + buffer.append(uri.getScheme()); + buffer.append(":"); + } + if (uri.getAuthority() != null) { + buffer.append("//"); + buffer.append(uri.getAuthority()); + } + if (uri.getPath() != null) { + String path = uri.getPath(); + if (path.indexOf('/')==0 && + hasWindowsDrive(path, true) && // has windows drive + uri.getScheme() == null && // but no scheme + uri.getAuthority() == null) // or authority + path = path.substring(1); // remove slash before drive + buffer.append(path); + } + return buffer.toString(); + } + + public boolean equals(Object o) { + if (!(o instanceof Path)) { + return false; + } + Path that = (Path)o; + return this.uri.equals(that.uri); + } + + public int hashCode() { + return uri.hashCode(); + } + + public int compareTo(Object o) { + Path that = (Path)o; + return this.uri.compareTo(that.uri); + } + + /** Return the number of elements in this path. */ + public int depth() { + String path = uri.getPath(); + int depth = 0; + int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; + while (slash != -1) { + depth++; + slash = path.indexOf(SEPARATOR, slash+1); + } + return depth; + } + + /** Returns a qualified path object. */ + public Path makeQualified(FileSystem fs) { + Path path = this; + if (!isAbsolute()) { + path = new Path(fs.getWorkingDirectory(), this); + } + + URI pathUri = path.toUri(); + URI fsUri = fs.getUri(); + + String scheme = pathUri.getScheme(); + String authority = pathUri.getAuthority(); + + if (scheme != null && + (authority != null || fsUri.getAuthority() == null)) + return path; + + if (scheme == null) { + scheme = fsUri.getScheme(); + } + + if (authority == null) { + authority = fsUri.getAuthority(); + if (authority == null) { + authority = ""; + } + } + + return new Path(scheme+":"+"//"+authority + pathUri.getPath()); + } +} diff --git a/src/java/org/apache/hadoop/fs/PathFilter.java b/src/java/org/apache/hadoop/fs/PathFilter.java new file mode 100644 index 00000000000..bcb7658943a --- /dev/null +++ b/src/java/org/apache/hadoop/fs/PathFilter.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +public interface PathFilter { + /** + * Tests whether or not the specified abstract pathname should be + * included in a pathname list. + * + * @param path The abstract pathname to be tested + * @return true if and only if pathname + * should be included + */ + boolean accept(Path path); +} + + diff --git a/src/java/org/apache/hadoop/fs/PositionedReadable.java b/src/java/org/apache/hadoop/fs/PositionedReadable.java new file mode 100644 index 00000000000..d5af64e53e0 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/PositionedReadable.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.*; +import org.apache.hadoop.fs.*; + +/** Stream that permits positional reading. */ +public interface PositionedReadable { + /** + * Read upto the specified number of bytes, from a given + * position within a file, and return the number of bytes read. This does not + * change the current offset of a file, and is thread-safe. + */ + public int read(long position, byte[] buffer, int offset, int length) + throws IOException; + + /** + * Read the specified number of bytes, from a given + * position within a file. This does not + * change the current offset of a file, and is thread-safe. + */ + public void readFully(long position, byte[] buffer, int offset, int length) + throws IOException; + + /** + * Read number of bytes equalt to the length of the buffer, from a given + * position within a file. This does not + * change the current offset of a file, and is thread-safe. + */ + public void readFully(long position, byte[] buffer) throws IOException; +} diff --git a/src/java/org/apache/hadoop/fs/RawLocalFileSystem.java b/src/java/org/apache/hadoop/fs/RawLocalFileSystem.java new file mode 100644 index 00000000000..4587136e8af --- /dev/null +++ b/src/java/org/apache/hadoop/fs/RawLocalFileSystem.java @@ -0,0 +1,496 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.BufferedOutputStream; +import java.io.DataOutput; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; +import java.nio.ByteBuffer; +import java.util.StringTokenizer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.util.StringUtils; + +/**************************************************************** + * Implement the FileSystem API for the raw local filesystem. + * + *****************************************************************/ +public class RawLocalFileSystem extends FileSystem { + static final URI NAME = URI.create("file:///"); + private Path workingDir; + + public RawLocalFileSystem() { + workingDir = new Path(System.getProperty("user.dir")).makeQualified(this); + } + + /** Convert a path to a File. */ + public File pathToFile(Path path) { + checkPath(path); + if (!path.isAbsolute()) { + path = new Path(getWorkingDirectory(), path); + } + return new File(path.toUri().getPath()); + } + + public URI getUri() { return NAME; } + + public void initialize(URI uri, Configuration conf) throws IOException { + super.initialize(uri, conf); + setConf(conf); + } + + class TrackingFileInputStream extends FileInputStream { + public TrackingFileInputStream(File f) throws IOException { + super(f); + } + + public int read() throws IOException { + int result = super.read(); + if (result != -1) { + statistics.incrementBytesRead(1); + } + return result; + } + + public int read(byte[] data) throws IOException { + int result = super.read(data); + if (result != -1) { + statistics.incrementBytesRead(result); + } + return result; + } + + public int read(byte[] data, int offset, int length) throws IOException { + int result = super.read(data, offset, length); + if (result != -1) { + statistics.incrementBytesRead(result); + } + return result; + } + } + + /******************************************************* + * For open()'s FSInputStream + *******************************************************/ + class LocalFSFileInputStream extends FSInputStream { + FileInputStream fis; + private long position; + + public LocalFSFileInputStream(Path f) throws IOException { + this.fis = new TrackingFileInputStream(pathToFile(f)); + } + + public void seek(long pos) throws IOException { + fis.getChannel().position(pos); + this.position = pos; + } + + public long getPos() throws IOException { + return this.position; + } + + public boolean seekToNewSource(long targetPos) throws IOException { + return false; + } + + /* + * Just forward to the fis + */ + public int available() throws IOException { return fis.available(); } + public void close() throws IOException { fis.close(); } + public boolean markSupport() { return false; } + + public int read() throws IOException { + try { + int value = fis.read(); + if (value >= 0) { + this.position++; + } + return value; + } catch (IOException e) { // unexpected exception + throw new FSError(e); // assume native fs error + } + } + + public int read(byte[] b, int off, int len) throws IOException { + try { + int value = fis.read(b, off, len); + if (value > 0) { + this.position += value; + } + return value; + } catch (IOException e) { // unexpected exception + throw new FSError(e); // assume native fs error + } + } + + public int read(long position, byte[] b, int off, int len) + throws IOException { + ByteBuffer bb = ByteBuffer.wrap(b, off, len); + try { + return fis.getChannel().read(bb, position); + } catch (IOException e) { + throw new FSError(e); + } + } + + public long skip(long n) throws IOException { + long value = fis.skip(n); + if (value > 0) { + this.position += value; + } + return value; + } + } + + public FSDataInputStream open(Path f, int bufferSize) throws IOException { + if (!exists(f)) { + throw new FileNotFoundException(f.toString()); + } + return new FSDataInputStream(new BufferedFSInputStream( + new LocalFSFileInputStream(f), bufferSize)); + } + + /********************************************************* + * For create()'s FSOutputStream. + *********************************************************/ + class LocalFSFileOutputStream extends OutputStream implements Syncable { + FileOutputStream fos; + + private LocalFSFileOutputStream(Path f, boolean append) throws IOException { + this.fos = new FileOutputStream(pathToFile(f), append); + } + + /* + * Just forward to the fos + */ + public void close() throws IOException { fos.close(); } + public void flush() throws IOException { fos.flush(); } + public void write(byte[] b, int off, int len) throws IOException { + try { + fos.write(b, off, len); + } catch (IOException e) { // unexpected exception + throw new FSError(e); // assume native fs error + } + } + + public void write(int b) throws IOException { + try { + fos.write(b); + } catch (IOException e) { // unexpected exception + throw new FSError(e); // assume native fs error + } + } + + /** {@inheritDoc} */ + public void sync() throws IOException { + fos.getFD().sync(); + } + } + + /** {@inheritDoc} */ + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException { + if (!exists(f)) { + throw new FileNotFoundException("File " + f + " not found."); + } + if (getFileStatus(f).isDir()) { + throw new IOException("Cannot append to a diretory (=" + f + " )."); + } + return new FSDataOutputStream(new BufferedOutputStream( + new LocalFSFileOutputStream(f, true), bufferSize), statistics); + } + + /** {@inheritDoc} */ + public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, + short replication, long blockSize, Progressable progress) + throws IOException { + if (exists(f) && !overwrite) { + throw new IOException("File already exists:"+f); + } + Path parent = f.getParent(); + if (parent != null && !mkdirs(parent)) { + throw new IOException("Mkdirs failed to create " + parent.toString()); + } + return new FSDataOutputStream(new BufferedOutputStream( + new LocalFSFileOutputStream(f, false), bufferSize), statistics); + } + + /** {@inheritDoc} */ + @Override + public FSDataOutputStream create(Path f, FsPermission permission, + boolean overwrite, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + FSDataOutputStream out = create(f, + overwrite, bufferSize, replication, blockSize, progress); + setPermission(f, permission); + return out; + } + + public boolean rename(Path src, Path dst) throws IOException { + if (pathToFile(src).renameTo(pathToFile(dst))) { + return true; + } + return FileUtil.copy(this, src, this, dst, true, getConf()); + } + + public boolean delete(Path p, boolean recursive) throws IOException { + File f = pathToFile(p); + if (f.isFile()) { + return f.delete(); + } else if ((!recursive) && f.isDirectory() && + (f.listFiles().length != 0)) { + throw new IOException("Directory " + f.toString() + " is not empty"); + } + return FileUtil.fullyDelete(f); + } + + public FileStatus[] listStatus(Path f) throws IOException { + File localf = pathToFile(f); + FileStatus[] results; + + if (!localf.exists()) { + return null; + } + if (localf.isFile()) { + return new FileStatus[] { + new RawLocalFileStatus(localf, getDefaultBlockSize(), this) }; + } + + String[] names = localf.list(); + if (names == null) { + return null; + } + results = new FileStatus[names.length]; + for (int i = 0; i < names.length; i++) { + results[i] = getFileStatus(new Path(f, names[i])); + } + return results; + } + + /** + * Creates the specified directory hierarchy. Does not + * treat existence as an error. + */ + public boolean mkdirs(Path f) throws IOException { + Path parent = f.getParent(); + File p2f = pathToFile(f); + return (parent == null || mkdirs(parent)) && + (p2f.mkdir() || p2f.isDirectory()); + } + + /** {@inheritDoc} */ + @Override + public boolean mkdirs(Path f, FsPermission permission) throws IOException { + boolean b = mkdirs(f); + setPermission(f, permission); + return b; + } + + @Override + public Path getHomeDirectory() { + return new Path(System.getProperty("user.home")).makeQualified(this); + } + + /** + * Set the working directory to the given directory. + */ + @Override + public void setWorkingDirectory(Path newDir) { + workingDir = newDir; + } + + @Override + public Path getWorkingDirectory() { + return workingDir; + } + + /** {@inheritDoc} */ + @Override + public FsStatus getStatus(Path p) throws IOException { + File partition = pathToFile(p == null ? new Path("/") : p); + //File provides getUsableSpace() and getFreeSpace() + //File provides no API to obtain used space, assume used = total - free + return new FsStatus(partition.getTotalSpace(), + partition.getTotalSpace() - partition.getFreeSpace(), + partition.getFreeSpace()); + } + + // In the case of the local filesystem, we can just rename the file. + public void moveFromLocalFile(Path src, Path dst) throws IOException { + rename(src, dst); + } + + // We can write output directly to the final location + public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + return fsOutputFile; + } + + // It's in the right place - nothing to do. + public void completeLocalOutput(Path fsWorkingFile, Path tmpLocalFile) + throws IOException { + } + + public void close() throws IOException { + super.close(); + } + + public String toString() { + return "LocalFS"; + } + + public FileStatus getFileStatus(Path f) throws IOException { + File path = pathToFile(f); + if (path.exists()) { + return new RawLocalFileStatus(pathToFile(f), getDefaultBlockSize(), this); + } else { + throw new FileNotFoundException( "File " + f + " does not exist."); + } + } + + static class RawLocalFileStatus extends FileStatus { + /* We can add extra fields here. It breaks at least CopyFiles.FilePair(). + * We recognize if the information is already loaded by check if + * onwer.equals(""). + */ + private boolean isPermissionLoaded() { + return !super.getOwner().equals(""); + } + + RawLocalFileStatus(File f, long defaultBlockSize, FileSystem fs) { + super(f.length(), f.isDirectory(), 1, defaultBlockSize, + f.lastModified(), new Path(f.getPath()).makeQualified(fs)); + } + + @Override + public FsPermission getPermission() { + if (!isPermissionLoaded()) { + loadPermissionInfo(); + } + return super.getPermission(); + } + + @Override + public String getOwner() { + if (!isPermissionLoaded()) { + loadPermissionInfo(); + } + return super.getOwner(); + } + + @Override + public String getGroup() { + if (!isPermissionLoaded()) { + loadPermissionInfo(); + } + return super.getGroup(); + } + + /// loads permissions, owner, and group from `ls -ld` + private void loadPermissionInfo() { + IOException e = null; + try { + StringTokenizer t = new StringTokenizer( + execCommand(new File(getPath().toUri()), + Shell.getGET_PERMISSION_COMMAND())); + //expected format + //-rw------- 1 username groupname ... + String permission = t.nextToken(); + if (permission.length() > 10) { //files with ACLs might have a '+' + permission = permission.substring(0, 10); + } + setPermission(FsPermission.valueOf(permission)); + t.nextToken(); + setOwner(t.nextToken()); + setGroup(t.nextToken()); + } catch (Shell.ExitCodeException ioe) { + if (ioe.getExitCode() != 1) { + e = ioe; + } else { + setPermission(null); + setOwner(null); + setGroup(null); + } + } catch (IOException ioe) { + e = ioe; + } finally { + if (e != null) { + throw new RuntimeException("Error while running command to get " + + "file permissions : " + + StringUtils.stringifyException(e)); + } + } + } + + @Override + public void write(DataOutput out) throws IOException { + if (!isPermissionLoaded()) { + loadPermissionInfo(); + } + super.write(out); + } + } + + /** + * Use the command chown to set owner. + */ + @Override + public void setOwner(Path p, String username, String groupname + ) throws IOException { + if (username == null && groupname == null) { + throw new IOException("username == null && groupname == null"); + } + + if (username == null) { + execCommand(pathToFile(p), Shell.SET_GROUP_COMMAND, groupname); + } else { + //OWNER[:[GROUP]] + String s = username + (groupname == null? "": ":" + groupname); + execCommand(pathToFile(p), Shell.SET_OWNER_COMMAND, s); + } + } + + /** + * Use the command chmod to set permission. + */ + @Override + public void setPermission(Path p, FsPermission permission + ) throws IOException { + execCommand(pathToFile(p), Shell.SET_PERMISSION_COMMAND, + String.format("%05o", permission.toShort())); + } + + private static String execCommand(File f, String... cmd) throws IOException { + String[] args = new String[cmd.length + 1]; + System.arraycopy(cmd, 0, args, 0, cmd.length); + args[cmd.length] = f.getCanonicalPath(); + String output = Shell.execCommand(args); + return output; + } +} diff --git a/src/java/org/apache/hadoop/fs/Seekable.java b/src/java/org/apache/hadoop/fs/Seekable.java new file mode 100644 index 00000000000..20e75088514 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/Seekable.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.*; + +/** Stream that permits seeking. */ +public interface Seekable { + /** + * Seek to the given offset from the start of the file. + * The next read() will be from that location. Can't + * seek past the end of the file. + */ + void seek(long pos) throws IOException; + + /** + * Return the current offset from the start of the file + */ + long getPos() throws IOException; + + /** + * Seeks a different copy of the data. Returns true if + * found a new source, false otherwise. + */ + boolean seekToNewSource(long targetPos) throws IOException; +} diff --git a/src/java/org/apache/hadoop/fs/Syncable.java b/src/java/org/apache/hadoop/fs/Syncable.java new file mode 100644 index 00000000000..650d224e3e9 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/Syncable.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.IOException; + +/** This interface declare the sync() operation. */ +public interface Syncable { + /** + * Synchronize all buffer with the underlying devices. + * @throws IOException + */ + public void sync() throws IOException; +} diff --git a/src/java/org/apache/hadoop/fs/Trash.java b/src/java/org/apache/hadoop/fs/Trash.java new file mode 100644 index 00000000000..5b062a1ece5 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/Trash.java @@ -0,0 +1,291 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.text.*; +import java.io.*; +import java.util.Date; + +import org.apache.commons.logging.*; + +import org.apache.hadoop.conf.*; +import org.apache.hadoop.fs.permission.*; +import org.apache.hadoop.util.StringUtils; + +/** Provides a trash feature. Files are moved to a user's trash + * directory, a subdirectory of their home directory named ".Trash". Files are + * initially moved to a current sub-directory of the trash directory. + * Within that sub-directory their original path is preserved. Periodically + * one may checkpoint the current trash and remove older checkpoints. (This + * design permits trash management without enumeration of the full trash + * content, without date support in the filesystem, and without clock + * synchronization.) + */ +public class Trash extends Configured { + private static final Log LOG = + LogFactory.getLog(Trash.class); + + private static final Path CURRENT = new Path("Current"); + private static final Path TRASH = new Path(".Trash/"); + private static final Path HOMES = new Path("/user/"); + + private static final FsPermission PERMISSION = + new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); + + private static final DateFormat CHECKPOINT = new SimpleDateFormat("yyMMddHHmm"); + private static final int MSECS_PER_MINUTE = 60*1000; + + private final FileSystem fs; + private final Path trash; + private final Path current; + private final long interval; + + /** Construct a trash can accessor. + * @param conf a Configuration + */ + public Trash(Configuration conf) throws IOException { + this(FileSystem.get(conf), conf); + } + + /** + * Construct a trash can accessor for the FileSystem provided. + */ + public Trash(FileSystem fs, Configuration conf) throws IOException { + super(conf); + this.fs = fs; + this.trash = new Path(fs.getHomeDirectory(), TRASH); + this.current = new Path(trash, CURRENT); + this.interval = conf.getLong("fs.trash.interval", 60) * MSECS_PER_MINUTE; + } + + private Trash(Path home, Configuration conf) throws IOException { + super(conf); + this.fs = home.getFileSystem(conf); + this.trash = new Path(home, TRASH); + this.current = new Path(trash, CURRENT); + this.interval = conf.getLong("fs.trash.interval", 60) * MSECS_PER_MINUTE; + } + + private Path makeTrashRelativePath(Path basePath, Path rmFilePath) { + return new Path(basePath + rmFilePath.toUri().getPath()); + } + + /** Move a file or directory to the current trash directory. + * @return false if the item is already in the trash or trash is disabled + */ + public boolean moveToTrash(Path path) throws IOException { + if (interval == 0) + return false; + + if (!path.isAbsolute()) // make path absolute + path = new Path(fs.getWorkingDirectory(), path); + + if (!fs.exists(path)) // check that path exists + throw new FileNotFoundException(path.toString()); + + String qpath = path.makeQualified(fs).toString(); + + if (qpath.startsWith(trash.toString())) { + return false; // already in trash + } + + if (trash.getParent().toString().startsWith(qpath)) { + throw new IOException("Cannot move \"" + path + + "\" to the trash, as it contains the trash"); + } + + Path trashPath = makeTrashRelativePath(current, path); + Path baseTrashPath = makeTrashRelativePath(current, path.getParent()); + + IOException cause = null; + + // try twice, in case checkpoint between the mkdirs() & rename() + for (int i = 0; i < 2; i++) { + try { + if (!fs.mkdirs(baseTrashPath, PERMISSION)) { // create current + LOG.warn("Can't create trash directory: "+baseTrashPath); + return false; + } + } catch (IOException e) { + LOG.warn("Can't create trash directory: "+baseTrashPath); + return false; + } + try { + // + // if the target path in Trash already exists, then append with + // a number. Start from 1. + // + String orig = trashPath.toString(); + for (int j = 1; fs.exists(trashPath); j++) { + trashPath = new Path(orig + "." + j); + } + if (fs.rename(path, trashPath)) // move to current trash + return true; + } catch (IOException e) { + cause = e; + } + } + throw (IOException) + new IOException("Failed to move to trash: "+path).initCause(cause); + } + + /** Create a trash checkpoint. */ + public void checkpoint() throws IOException { + if (!fs.exists(current)) // no trash, no checkpoint + return; + + Path checkpoint; + synchronized (CHECKPOINT) { + checkpoint = new Path(trash, CHECKPOINT.format(new Date())); + } + + if (fs.rename(current, checkpoint)) { + LOG.info("Created trash checkpoint: "+checkpoint.toUri().getPath()); + } else { + throw new IOException("Failed to checkpoint trash: "+checkpoint); + } + } + + /** Delete old checkpoints. */ + public void expunge() throws IOException { + FileStatus[] dirs = fs.listStatus(trash); // scan trash sub-directories + if( dirs == null){ + return; + } + long now = System.currentTimeMillis(); + for (int i = 0; i < dirs.length; i++) { + Path path = dirs[i].getPath(); + String dir = path.toUri().getPath(); + String name = path.getName(); + if (name.equals(CURRENT.getName())) // skip current + continue; + + long time; + try { + synchronized (CHECKPOINT) { + time = CHECKPOINT.parse(name).getTime(); + } + } catch (ParseException e) { + LOG.warn("Unexpected item in trash: "+dir+". Ignoring."); + continue; + } + + if ((now - interval) > time) { + if (fs.delete(path, true)) { + LOG.info("Deleted trash checkpoint: "+dir); + } else { + LOG.warn("Couldn't delete checkpoint: "+dir+" Ignoring."); + } + } + } + } + + // + // get the current working directory + // + Path getCurrentTrashDir() { + return current; + } + + /** Return a {@link Runnable} that periodically empties the trash of all + * users, intended to be run by the superuser. Only one checkpoint is kept + * at a time. + */ + public Runnable getEmptier() throws IOException { + return new Emptier(getConf()); + } + + private class Emptier implements Runnable { + + private Configuration conf; + private long interval; + + Emptier(Configuration conf) throws IOException { + this.conf = conf; + this.interval = conf.getLong("fs.trash.interval", 0) * MSECS_PER_MINUTE; + } + + public void run() { + if (interval == 0) + return; // trash disabled + + long now = System.currentTimeMillis(); + long end; + while (true) { + end = ceiling(now, interval); + try { // sleep for interval + Thread.sleep(end - now); + } catch (InterruptedException e) { + break; // exit on interrupt + } + + try { + now = System.currentTimeMillis(); + if (now >= end) { + + FileStatus[] homes = null; + try { + homes = fs.listStatus(HOMES); // list all home dirs + } catch (IOException e) { + LOG.warn("Trash can't list homes: "+e+" Sleeping."); + continue; + } + + if (homes == null) + continue; + + for (FileStatus home : homes) { // dump each trash + if (!home.isDir()) + continue; + try { + Trash trash = new Trash(home.getPath(), conf); + trash.expunge(); + trash.checkpoint(); + } catch (IOException e) { + LOG.warn("Trash caught: "+e+". Skipping "+home.getPath()+"."); + } + } + } + } catch (Exception e) { + LOG.warn("RuntimeException during Trash.Emptier.run() " + + StringUtils.stringifyException(e)); + } + } + try { + fs.close(); + } catch(IOException e) { + LOG.warn("Trash cannot close FileSystem. " + + StringUtils.stringifyException(e)); + } + } + + private long ceiling(long time, long interval) { + return floor(time, interval) + interval; + } + private long floor(long time, long interval) { + return (time / interval) * interval; + } + + } + + /** Run an emptier.*/ + public static void main(String[] args) throws Exception { + new Trash(new Configuration()).getEmptier().run(); + } + +} diff --git a/src/java/org/apache/hadoop/fs/ftp/FTPException.java b/src/java/org/apache/hadoop/fs/ftp/FTPException.java new file mode 100644 index 00000000000..c76cb57f3c8 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/ftp/FTPException.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.ftp; + +/** + * A class to wrap a {@link Throwable} into a Runtime Exception. + */ +public class FTPException extends RuntimeException { + + private static final long serialVersionUID = 1L; + + public FTPException(String message) { + super(message); + } + + public FTPException(Throwable t) { + super(t); + } + + public FTPException(String message, Throwable t) { + super(message, t); + } +} diff --git a/src/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java b/src/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java new file mode 100644 index 00000000000..ee91f1c899f --- /dev/null +++ b/src/java/org/apache/hadoop/fs/ftp/FTPFileSystem.java @@ -0,0 +1,576 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.ftp; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.net.ftp.FTP; +import org.apache.commons.net.ftp.FTPClient; +import org.apache.commons.net.ftp.FTPFile; +import org.apache.commons.net.ftp.FTPReply; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Progressable; + +/** + *

+ * A {@link FileSystem} backed by an FTP client provided by Apache Commons Net. + *

+ */ +public class FTPFileSystem extends FileSystem { + + public static final Log LOG = LogFactory + .getLog(FTPFileSystem.class); + + public static final int DEFAULT_BUFFER_SIZE = 1024 * 1024; + + public static final int DEFAULT_BLOCK_SIZE = 4 * 1024; + + private URI uri; + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { // get + super.initialize(uri, conf); + // get host information from uri (overrides info in conf) + String host = uri.getHost(); + host = (host == null) ? conf.get("fs.ftp.host", null) : host; + if (host == null) { + throw new IOException("Invalid host specified"); + } + conf.set("fs.ftp.host", host); + + // get port information from uri, (overrides info in conf) + int port = uri.getPort(); + port = (port == -1) ? FTP.DEFAULT_PORT : port; + conf.setInt("fs.ftp.host.port", port); + + // get user/password information from URI (overrides info in conf) + String userAndPassword = uri.getUserInfo(); + if (userAndPassword == null) { + userAndPassword = (conf.get("fs.ftp.user." + host, null) + ":" + conf + .get("fs.ftp.password." + host, null)); + if (userAndPassword == null) { + throw new IOException("Invalid user/passsword specified"); + } + } + String[] userPasswdInfo = userAndPassword.split(":"); + conf.set("fs.ftp.user." + host, userPasswdInfo[0]); + if (userPasswdInfo.length > 1) { + conf.set("fs.ftp.password." + host, userPasswdInfo[1]); + } else { + conf.set("fs.ftp.password." + host, null); + } + setConf(conf); + this.uri = uri; + } + + /** + * Connect to the FTP server using configuration parameters * + * + * @return An FTPClient instance + * @throws IOException + */ + private FTPClient connect() throws IOException { + FTPClient client = null; + Configuration conf = getConf(); + String host = conf.get("fs.ftp.host"); + int port = conf.getInt("fs.ftp.host.port", FTP.DEFAULT_PORT); + String user = conf.get("fs.ftp.user." + host); + String password = conf.get("fs.ftp.password." + host); + client = new FTPClient(); + client.connect(host, port); + int reply = client.getReplyCode(); + if (!FTPReply.isPositiveCompletion(reply)) { + throw new IOException("Server - " + host + + " refused connection on port - " + port); + } else if (client.login(user, password)) { + client.setFileTransferMode(FTP.BLOCK_TRANSFER_MODE); + client.setFileType(FTP.BINARY_FILE_TYPE); + client.setBufferSize(DEFAULT_BUFFER_SIZE); + } else { + throw new IOException("Login failed on server - " + host + ", port - " + + port); + } + + return client; + } + + /** + * Logout and disconnect the given FTPClient. * + * + * @param client + * @throws IOException + */ + private void disconnect(FTPClient client) throws IOException { + if (client != null) { + if (!client.isConnected()) { + throw new FTPException("Client not connected"); + } + boolean logoutSuccess = client.logout(); + client.disconnect(); + if (!logoutSuccess) { + LOG.warn("Logout failed while disconnecting, error code - " + + client.getReplyCode()); + } + } + } + + /** + * Resolve against given working directory. * + * + * @param workDir + * @param path + * @return + */ + private Path makeAbsolute(Path workDir, Path path) { + if (path.isAbsolute()) { + return path; + } + return new Path(workDir, path); + } + + @Override + public FSDataInputStream open(Path file, int bufferSize) throws IOException { + FTPClient client = connect(); + Path workDir = new Path(client.printWorkingDirectory()); + Path absolute = makeAbsolute(workDir, file); + FileStatus fileStat = getFileStatus(client, absolute); + if (fileStat.isDir()) { + disconnect(client); + throw new IOException("Path " + file + " is a directory."); + } + client.allocate(bufferSize); + Path parent = absolute.getParent(); + // Change to parent directory on the + // server. Only then can we read the + // file + // on the server by opening up an InputStream. As a side effect the working + // directory on the server is changed to the parent directory of the file. + // The FTP client connection is closed when close() is called on the + // FSDataInputStream. + client.changeWorkingDirectory(parent.toUri().getPath()); + InputStream is = client.retrieveFileStream(file.getName()); + FSDataInputStream fis = new FSDataInputStream(new FTPInputStream(is, + client, statistics)); + if (!FTPReply.isPositivePreliminary(client.getReplyCode())) { + // The ftpClient is an inconsistent state. Must close the stream + // which in turn will logout and disconnect from FTP server + fis.close(); + throw new IOException("Unable to open file: " + file + ", Aborting"); + } + return fis; + } + + /** + * A stream obtained via this call must be closed before using other APIs of + * this class or else the invocation will block. + */ + @Override + public FSDataOutputStream create(Path file, FsPermission permission, + boolean overwrite, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + final FTPClient client = connect(); + Path workDir = new Path(client.printWorkingDirectory()); + Path absolute = makeAbsolute(workDir, file); + if (exists(client, file)) { + if (overwrite) { + delete(client, file); + } else { + disconnect(client); + throw new IOException("File already exists: " + file); + } + } + Path parent = absolute.getParent(); + if (parent == null || !mkdirs(client, parent, FsPermission.getDefault())) { + parent = (parent == null) ? new Path("/") : parent; + disconnect(client); + throw new IOException("create(): Mkdirs failed to create: " + parent); + } + client.allocate(bufferSize); + // Change to parent directory on the server. Only then can we write to the + // file on the server by opening up an OutputStream. As a side effect the + // working directory on the server is changed to the parent directory of the + // file. The FTP client connection is closed when close() is called on the + // FSDataOutputStream. + client.changeWorkingDirectory(parent.toUri().getPath()); + FSDataOutputStream fos = new FSDataOutputStream(client.storeFileStream(file + .getName()), statistics) { + @Override + public void close() throws IOException { + super.close(); + if (!client.isConnected()) { + throw new FTPException("Client not connected"); + } + boolean cmdCompleted = client.completePendingCommand(); + disconnect(client); + if (!cmdCompleted) { + throw new FTPException("Could not complete transfer, Reply Code - " + + client.getReplyCode()); + } + } + }; + if (!FTPReply.isPositivePreliminary(client.getReplyCode())) { + // The ftpClient is an inconsistent state. Must close the stream + // which in turn will logout and disconnect from FTP server + fos.close(); + throw new IOException("Unable to create file: " + file + ", Aborting"); + } + return fos; + } + + /** This optional operation is not yet supported. */ + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException { + throw new IOException("Not supported"); + } + + /** + * Convenience method, so that we don't open a new connection when using this + * method from within another method. Otherwise every API invocation incurs + * the overhead of opening/closing a TCP connection. + */ + private boolean exists(FTPClient client, Path file) { + try { + return getFileStatus(client, file) != null; + } catch (FileNotFoundException fnfe) { + return false; + } catch (IOException ioe) { + throw new FTPException("Failed to get file status", ioe); + } + } + + @Override + public boolean delete(Path file, boolean recursive) throws IOException { + FTPClient client = connect(); + try { + boolean success = delete(client, file, recursive); + return success; + } finally { + disconnect(client); + } + } + + /** @deprecated Use delete(Path, boolean) instead */ + @Deprecated + private boolean delete(FTPClient client, Path file) throws IOException { + return delete(client, file, false); + } + + /** + * Convenience method, so that we don't open a new connection when using this + * method from within another method. Otherwise every API invocation incurs + * the overhead of opening/closing a TCP connection. + */ + private boolean delete(FTPClient client, Path file, boolean recursive) + throws IOException { + Path workDir = new Path(client.printWorkingDirectory()); + Path absolute = makeAbsolute(workDir, file); + String pathName = absolute.toUri().getPath(); + FileStatus fileStat = getFileStatus(client, absolute); + if (!fileStat.isDir()) { + return client.deleteFile(pathName); + } + FileStatus[] dirEntries = listStatus(client, absolute); + if (dirEntries != null && dirEntries.length > 0 && !(recursive)) { + throw new IOException("Directory: " + file + " is not empty."); + } + if (dirEntries != null) { + for (int i = 0; i < dirEntries.length; i++) { + delete(client, new Path(absolute, dirEntries[i].getPath()), recursive); + } + } + return client.removeDirectory(pathName); + } + + private FsAction getFsAction(int accessGroup, FTPFile ftpFile) { + FsAction action = FsAction.NONE; + if (ftpFile.hasPermission(accessGroup, FTPFile.READ_PERMISSION)) { + action.or(FsAction.READ); + } + if (ftpFile.hasPermission(accessGroup, FTPFile.WRITE_PERMISSION)) { + action.or(FsAction.WRITE); + } + if (ftpFile.hasPermission(accessGroup, FTPFile.EXECUTE_PERMISSION)) { + action.or(FsAction.EXECUTE); + } + return action; + } + + private FsPermission getPermissions(FTPFile ftpFile) { + FsAction user, group, others; + user = getFsAction(FTPFile.USER_ACCESS, ftpFile); + group = getFsAction(FTPFile.GROUP_ACCESS, ftpFile); + others = getFsAction(FTPFile.WORLD_ACCESS, ftpFile); + return new FsPermission(user, group, others); + } + + @Override + public URI getUri() { + return uri; + } + + @Override + public FileStatus[] listStatus(Path file) throws IOException { + FTPClient client = connect(); + try { + FileStatus[] stats = listStatus(client, file); + return stats; + } finally { + disconnect(client); + } + } + + /** + * Convenience method, so that we don't open a new connection when using this + * method from within another method. Otherwise every API invocation incurs + * the overhead of opening/closing a TCP connection. + */ + private FileStatus[] listStatus(FTPClient client, Path file) + throws IOException { + Path workDir = new Path(client.printWorkingDirectory()); + Path absolute = makeAbsolute(workDir, file); + FileStatus fileStat = getFileStatus(client, absolute); + if (!fileStat.isDir()) { + return new FileStatus[] { fileStat }; + } + FTPFile[] ftpFiles = client.listFiles(absolute.toUri().getPath()); + FileStatus[] fileStats = new FileStatus[ftpFiles.length]; + for (int i = 0; i < ftpFiles.length; i++) { + fileStats[i] = getFileStatus(ftpFiles[i], absolute); + } + return fileStats; + } + + @Override + public FileStatus getFileStatus(Path file) throws IOException { + FTPClient client = connect(); + try { + FileStatus status = getFileStatus(client, file); + return status; + } finally { + disconnect(client); + } + } + + /** + * Convenience method, so that we don't open a new connection when using this + * method from within another method. Otherwise every API invocation incurs + * the overhead of opening/closing a TCP connection. + */ + private FileStatus getFileStatus(FTPClient client, Path file) + throws IOException { + FileStatus fileStat = null; + Path workDir = new Path(client.printWorkingDirectory()); + Path absolute = makeAbsolute(workDir, file); + Path parentPath = absolute.getParent(); + if (parentPath == null) { // root dir + long length = -1; // Length of root dir on server not known + boolean isDir = true; + int blockReplication = 1; + long blockSize = DEFAULT_BLOCK_SIZE; // Block Size not known. + long modTime = -1; // Modification time of root dir not known. + Path root = new Path("/"); + return new FileStatus(length, isDir, blockReplication, blockSize, + modTime, root.makeQualified(this)); + } + String pathName = parentPath.toUri().getPath(); + FTPFile[] ftpFiles = client.listFiles(pathName); + if (ftpFiles != null) { + for (FTPFile ftpFile : ftpFiles) { + if (ftpFile.getName().equals(file.getName())) { // file found in dir + fileStat = getFileStatus(ftpFile, parentPath); + break; + } + } + if (fileStat == null) { + throw new FileNotFoundException("File " + file + " does not exist."); + } + } else { + throw new FileNotFoundException("File " + file + " does not exist."); + } + return fileStat; + } + + /** + * Convert the file information in FTPFile to a {@link FileStatus} object. * + * + * @param ftpFile + * @param parentPath + * @return FileStatus + */ + private FileStatus getFileStatus(FTPFile ftpFile, Path parentPath) { + long length = ftpFile.getSize(); + boolean isDir = ftpFile.isDirectory(); + int blockReplication = 1; + // Using default block size since there is no way in FTP client to know of + // block sizes on server. The assumption could be less than ideal. + long blockSize = DEFAULT_BLOCK_SIZE; + long modTime = ftpFile.getTimestamp().getTimeInMillis(); + long accessTime = 0; + FsPermission permission = getPermissions(ftpFile); + String user = ftpFile.getUser(); + String group = ftpFile.getGroup(); + Path filePath = new Path(parentPath, ftpFile.getName()); + return new FileStatus(length, isDir, blockReplication, blockSize, modTime, + accessTime, permission, user, group, filePath.makeQualified(this)); + } + + @Override + public boolean mkdirs(Path file, FsPermission permission) throws IOException { + FTPClient client = connect(); + try { + boolean success = mkdirs(client, file, permission); + return success; + } finally { + disconnect(client); + } + } + + /** + * Convenience method, so that we don't open a new connection when using this + * method from within another method. Otherwise every API invocation incurs + * the overhead of opening/closing a TCP connection. + */ + private boolean mkdirs(FTPClient client, Path file, FsPermission permission) + throws IOException { + boolean created = true; + Path workDir = new Path(client.printWorkingDirectory()); + Path absolute = makeAbsolute(workDir, file); + String pathName = absolute.getName(); + if (!exists(client, absolute)) { + Path parent = absolute.getParent(); + created = (parent == null || mkdirs(client, parent, FsPermission + .getDefault())); + if (created) { + String parentDir = parent.toUri().getPath(); + client.changeWorkingDirectory(parentDir); + created = created & client.makeDirectory(pathName); + } + } else if (isFile(client, absolute)) { + throw new IOException(String.format( + "Can't make directory for path %s since it is a file.", absolute)); + } + return created; + } + + /** + * Convenience method, so that we don't open a new connection when using this + * method from within another method. Otherwise every API invocation incurs + * the overhead of opening/closing a TCP connection. + */ + private boolean isFile(FTPClient client, Path file) { + try { + return !getFileStatus(client, file).isDir(); + } catch (FileNotFoundException e) { + return false; // file does not exist + } catch (IOException ioe) { + throw new FTPException("File check failed", ioe); + } + } + + /* + * Assuming that parent of both source and destination is the same. Is the + * assumption correct or it is suppose to work like 'move' ? + */ + @Override + public boolean rename(Path src, Path dst) throws IOException { + FTPClient client = connect(); + try { + boolean success = rename(client, src, dst); + return success; + } finally { + disconnect(client); + } + } + + /** + * Convenience method, so that we don't open a new connection when using this + * method from within another method. Otherwise every API invocation incurs + * the overhead of opening/closing a TCP connection. + * + * @param client + * @param src + * @param dst + * @return + * @throws IOException + */ + private boolean rename(FTPClient client, Path src, Path dst) + throws IOException { + Path workDir = new Path(client.printWorkingDirectory()); + Path absoluteSrc = makeAbsolute(workDir, src); + Path absoluteDst = makeAbsolute(workDir, dst); + if (!exists(client, absoluteSrc)) { + throw new IOException("Source path " + src + " does not exist"); + } + if (exists(client, absoluteDst)) { + throw new IOException("Destination path " + dst + + " already exist, cannot rename!"); + } + String parentSrc = absoluteSrc.getParent().toUri().toString(); + String parentDst = absoluteDst.getParent().toUri().toString(); + String from = src.getName(); + String to = dst.getName(); + if (!parentSrc.equals(parentDst)) { + throw new IOException("Cannot rename parent(source): " + parentSrc + + ", parent(destination): " + parentDst); + } + client.changeWorkingDirectory(parentSrc); + boolean renamed = client.rename(from, to); + return renamed; + } + + @Override + public Path getWorkingDirectory() { + // Return home directory always since we do not maintain state. + return getHomeDirectory(); + } + + @Override + public Path getHomeDirectory() { + FTPClient client = null; + try { + client = connect(); + Path homeDir = new Path(client.printWorkingDirectory()); + return homeDir; + } catch (IOException ioe) { + throw new FTPException("Failed to get home directory", ioe); + } finally { + try { + disconnect(client); + } catch (IOException ioe) { + throw new FTPException("Failed to disconnect", ioe); + } + } + } + + @Override + public void setWorkingDirectory(Path newDir) { + // we do not maintain the working directory state + } +} diff --git a/src/java/org/apache/hadoop/fs/ftp/FTPInputStream.java b/src/java/org/apache/hadoop/fs/ftp/FTPInputStream.java new file mode 100644 index 00000000000..f1b78955ae2 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/ftp/FTPInputStream.java @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.ftp; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.commons.net.ftp.FTPClient; +import org.apache.hadoop.fs.FSInputStream; +import org.apache.hadoop.fs.FileSystem; + +public class FTPInputStream extends FSInputStream { + + InputStream wrappedStream; + FTPClient client; + FileSystem.Statistics stats; + boolean closed; + long pos; + + public FTPInputStream(InputStream stream, FTPClient client, + FileSystem.Statistics stats) { + if (stream == null) { + throw new IllegalArgumentException("Null InputStream"); + } + if (client == null || !client.isConnected()) { + throw new IllegalArgumentException("FTP client null or not connected"); + } + this.wrappedStream = stream; + this.client = client; + this.stats = stats; + this.pos = 0; + this.closed = false; + } + + public long getPos() throws IOException { + return pos; + } + + // We don't support seek. + public void seek(long pos) throws IOException { + throw new IOException("Seek not supported"); + } + + public boolean seekToNewSource(long targetPos) throws IOException { + throw new IOException("Seek not supported"); + } + + public synchronized int read() throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + + int byteRead = wrappedStream.read(); + if (byteRead >= 0) { + pos++; + } + if (stats != null & byteRead >= 0) { + stats.incrementBytesRead(1); + } + return byteRead; + } + + public synchronized int read(byte buf[], int off, int len) throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + + int result = wrappedStream.read(buf, off, len); + if (result > 0) { + pos += result; + } + if (stats != null & result > 0) { + stats.incrementBytesRead(result); + } + + return result; + } + + public synchronized void close() throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + super.close(); + closed = true; + if (!client.isConnected()) { + throw new FTPException("Client not connected"); + } + + boolean cmdCompleted = client.completePendingCommand(); + client.logout(); + client.disconnect(); + if (!cmdCompleted) { + throw new FTPException("Could not complete transfer, Reply Code - " + + client.getReplyCode()); + } + } + + // Not supported. + + public boolean markSupported() { + return false; + } + + public void mark(int readLimit) { + // Do nothing + } + + public void reset() throws IOException { + throw new IOException("Mark not supported"); + } +} diff --git a/src/java/org/apache/hadoop/fs/kfs/IFSImpl.java b/src/java/org/apache/hadoop/fs/kfs/IFSImpl.java new file mode 100644 index 00000000000..f2a773663ea --- /dev/null +++ b/src/java/org/apache/hadoop/fs/kfs/IFSImpl.java @@ -0,0 +1,60 @@ +/** + * + * Licensed under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * @author: Sriram Rao (Kosmix Corp.) + * + * We need to provide the ability to the code in fs/kfs without really + * having a KFS deployment. In particular, the glue code that wraps + * around calls to KfsAccess object. This is accomplished by defining a + * filesystem implementation interface: + * -- for testing purposes, a dummy implementation of this interface + * will suffice; as long as the dummy implementation is close enough + * to doing what KFS does, we are good. + * -- for deployment purposes with KFS, this interface is + * implemented by the KfsImpl object. + */ + +package org.apache.hadoop.fs.kfs; + +import java.io.*; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.Progressable; + +interface IFSImpl { + public boolean exists(String path) throws IOException; + public boolean isDirectory(String path) throws IOException; + public boolean isFile(String path) throws IOException; + public String[] readdir(String path) throws IOException; + public FileStatus[] readdirplus(Path path) throws IOException; + + public int mkdirs(String path) throws IOException; + public int rename(String source, String dest) throws IOException; + + public int rmdir(String path) throws IOException; + public int remove(String path) throws IOException; + public long filesize(String path) throws IOException; + public short getReplication(String path) throws IOException; + public short setReplication(String path, short replication) throws IOException; + public String[][] getDataLocation(String path, long start, long len) throws IOException; + + public long getModificationTime(String path) throws IOException; + public FSDataOutputStream create(String path, short replication, int bufferSize, Progressable progress) throws IOException; + public FSDataInputStream open(String path, int bufferSize) throws IOException; + public FSDataOutputStream append(String path, int bufferSize, Progressable progress) throws IOException; + +}; diff --git a/src/java/org/apache/hadoop/fs/kfs/KFSImpl.java b/src/java/org/apache/hadoop/fs/kfs/KFSImpl.java new file mode 100644 index 00000000000..bc66ec2570a --- /dev/null +++ b/src/java/org/apache/hadoop/fs/kfs/KFSImpl.java @@ -0,0 +1,151 @@ +/** + * + * Licensed under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * @author: Sriram Rao (Kosmix Corp.) + * + * Provide the implementation of KFS which turn into calls to KfsAccess. + */ + +package org.apache.hadoop.fs.kfs; + +import java.io.*; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; + +import org.kosmix.kosmosfs.access.KfsAccess; +import org.kosmix.kosmosfs.access.KfsFileAttr; +import org.apache.hadoop.util.Progressable; + +class KFSImpl implements IFSImpl { + private KfsAccess kfsAccess = null; + private FileSystem.Statistics statistics; + + @Deprecated + public KFSImpl(String metaServerHost, int metaServerPort + ) throws IOException { + this(metaServerHost, metaServerPort, null); + } + + public KFSImpl(String metaServerHost, int metaServerPort, + FileSystem.Statistics stats) throws IOException { + kfsAccess = new KfsAccess(metaServerHost, metaServerPort); + statistics = stats; + } + + public boolean exists(String path) throws IOException { + return kfsAccess.kfs_exists(path); + } + + public boolean isDirectory(String path) throws IOException { + return kfsAccess.kfs_isDirectory(path); + } + + public boolean isFile(String path) throws IOException { + return kfsAccess.kfs_isFile(path); + } + + public String[] readdir(String path) throws IOException { + return kfsAccess.kfs_readdir(path); + } + + public FileStatus[] readdirplus(Path path) throws IOException { + String srep = path.toUri().getPath(); + KfsFileAttr[] fattr = kfsAccess.kfs_readdirplus(srep); + if (fattr == null) + return null; + int numEntries = 0; + for (int i = 0; i < fattr.length; i++) { + if ((fattr[i].filename.compareTo(".") == 0) || (fattr[i].filename.compareTo("..") == 0)) + continue; + numEntries++; + } + FileStatus[] fstatus = new FileStatus[numEntries]; + int j = 0; + for (int i = 0; i < fattr.length; i++) { + if ((fattr[i].filename.compareTo(".") == 0) || (fattr[i].filename.compareTo("..") == 0)) + continue; + Path fn = new Path(path, fattr[i].filename); + + if (fattr[i].isDirectory) + fstatus[j] = new FileStatus(0, true, 1, 0, fattr[i].modificationTime, fn); + else + fstatus[j] = new FileStatus(fattr[i].filesize, fattr[i].isDirectory, + fattr[i].replication, + (long) + (1 << 26), + fattr[i].modificationTime, + fn); + + j++; + } + return fstatus; + } + + + public int mkdirs(String path) throws IOException { + return kfsAccess.kfs_mkdirs(path); + } + + public int rename(String source, String dest) throws IOException { + return kfsAccess.kfs_rename(source, dest); + } + + public int rmdir(String path) throws IOException { + return kfsAccess.kfs_rmdir(path); + } + + public int remove(String path) throws IOException { + return kfsAccess.kfs_remove(path); + } + + public long filesize(String path) throws IOException { + return kfsAccess.kfs_filesize(path); + } + + public short getReplication(String path) throws IOException { + return kfsAccess.kfs_getReplication(path); + } + + public short setReplication(String path, short replication) throws IOException { + return kfsAccess.kfs_setReplication(path, replication); + } + + public String[][] getDataLocation(String path, long start, long len) throws IOException { + return kfsAccess.kfs_getDataLocation(path, start, len); + } + + public long getModificationTime(String path) throws IOException { + return kfsAccess.kfs_getModificationTime(path); + } + + public FSDataInputStream open(String path, int bufferSize) throws IOException { + return new FSDataInputStream(new KFSInputStream(kfsAccess, path, + statistics)); + } + + public FSDataOutputStream create(String path, short replication, int bufferSize, Progressable progress) throws IOException { + return new FSDataOutputStream(new KFSOutputStream(kfsAccess, path, replication, false, progress), + statistics); + } + + public FSDataOutputStream append(String path, int bufferSize, Progressable progress) throws IOException { + // when opening for append, # of replicas is ignored + return new FSDataOutputStream(new KFSOutputStream(kfsAccess, path, (short) 1, true, progress), + statistics); + } +} diff --git a/src/java/org/apache/hadoop/fs/kfs/KFSInputStream.java b/src/java/org/apache/hadoop/fs/kfs/KFSInputStream.java new file mode 100644 index 00000000000..bb2c32c31bd --- /dev/null +++ b/src/java/org/apache/hadoop/fs/kfs/KFSInputStream.java @@ -0,0 +1,130 @@ +/** + * + * Licensed under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * @author: Sriram Rao (Kosmix Corp.) + * + * Implements the Hadoop FSInputStream interfaces to allow applications to read + * files in Kosmos File System (KFS). + */ + +package org.apache.hadoop.fs.kfs; + +import java.io.*; +import java.nio.ByteBuffer; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FSInputStream; + +import org.kosmix.kosmosfs.access.KfsAccess; +import org.kosmix.kosmosfs.access.KfsInputChannel; + +class KFSInputStream extends FSInputStream { + + private KfsInputChannel kfsChannel; + private FileSystem.Statistics statistics; + private long fsize; + + @Deprecated + public KFSInputStream(KfsAccess kfsAccess, String path) { + this(kfsAccess, path, null); + } + + public KFSInputStream(KfsAccess kfsAccess, String path, + FileSystem.Statistics stats) { + this.statistics = stats; + this.kfsChannel = kfsAccess.kfs_open(path); + if (this.kfsChannel != null) + this.fsize = kfsAccess.kfs_filesize(path); + else + this.fsize = 0; + } + + public long getPos() throws IOException { + if (kfsChannel == null) { + throw new IOException("File closed"); + } + return kfsChannel.tell(); + } + + public synchronized int available() throws IOException { + if (kfsChannel == null) { + throw new IOException("File closed"); + } + return (int) (this.fsize - getPos()); + } + + public synchronized void seek(long targetPos) throws IOException { + if (kfsChannel == null) { + throw new IOException("File closed"); + } + kfsChannel.seek(targetPos); + } + + public synchronized boolean seekToNewSource(long targetPos) throws IOException { + return false; + } + + public synchronized int read() throws IOException { + if (kfsChannel == null) { + throw new IOException("File closed"); + } + byte b[] = new byte[1]; + int res = read(b, 0, 1); + if (res == 1) { + if (statistics != null) { + statistics.incrementBytesRead(1); + } + return b[0] & 0xff; + } + return -1; + } + + public synchronized int read(byte b[], int off, int len) throws IOException { + if (kfsChannel == null) { + throw new IOException("File closed"); + } + int res; + + res = kfsChannel.read(ByteBuffer.wrap(b, off, len)); + // Use -1 to signify EOF + if (res == 0) + return -1; + if (statistics != null) { + statistics.incrementBytesRead(res); + } + return res; + } + + public synchronized void close() throws IOException { + if (kfsChannel == null) { + return; + } + + kfsChannel.close(); + kfsChannel = null; + } + + public boolean markSupported() { + return false; + } + + public void mark(int readLimit) { + // Do nothing + } + + public void reset() throws IOException { + throw new IOException("Mark not supported"); + } + +} diff --git a/src/java/org/apache/hadoop/fs/kfs/KFSOutputStream.java b/src/java/org/apache/hadoop/fs/kfs/KFSOutputStream.java new file mode 100644 index 00000000000..e55f4205d8f --- /dev/null +++ b/src/java/org/apache/hadoop/fs/kfs/KFSOutputStream.java @@ -0,0 +1,97 @@ +/** + * + * Licensed under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * @author: Sriram Rao (Kosmix Corp.) + * + * Implements the Hadoop FSOutputStream interfaces to allow applications to write to + * files in Kosmos File System (KFS). + */ + +package org.apache.hadoop.fs.kfs; + +import java.io.*; +import java.net.*; +import java.util.*; +import java.nio.ByteBuffer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.util.Progressable; + +import org.kosmix.kosmosfs.access.KfsAccess; +import org.kosmix.kosmosfs.access.KfsOutputChannel; + +class KFSOutputStream extends OutputStream { + + private String path; + private KfsOutputChannel kfsChannel; + private Progressable progressReporter; + + public KFSOutputStream(KfsAccess kfsAccess, String path, short replication, + boolean append, Progressable prog) { + this.path = path; + + if ((append) && (kfsAccess.kfs_isFile(path))) + this.kfsChannel = kfsAccess.kfs_append(path); + else + this.kfsChannel = kfsAccess.kfs_create(path, replication); + this.progressReporter = prog; + } + + public long getPos() throws IOException { + if (kfsChannel == null) { + throw new IOException("File closed"); + } + return kfsChannel.tell(); + } + + public void write(int v) throws IOException { + if (kfsChannel == null) { + throw new IOException("File closed"); + } + byte[] b = new byte[1]; + + b[0] = (byte) v; + write(b, 0, 1); + } + + public void write(byte b[], int off, int len) throws IOException { + if (kfsChannel == null) { + throw new IOException("File closed"); + } + + // touch the progress before going into KFS since the call can block + progressReporter.progress(); + kfsChannel.write(ByteBuffer.wrap(b, off, len)); + } + + public void flush() throws IOException { + if (kfsChannel == null) { + throw new IOException("File closed"); + } + // touch the progress before going into KFS since the call can block + progressReporter.progress(); + kfsChannel.sync(); + } + + public synchronized void close() throws IOException { + if (kfsChannel == null) { + return; + } + flush(); + kfsChannel.close(); + kfsChannel = null; + } +} diff --git a/src/java/org/apache/hadoop/fs/kfs/KosmosFileSystem.java b/src/java/org/apache/hadoop/fs/kfs/KosmosFileSystem.java new file mode 100644 index 00000000000..57b27a2a0e9 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/kfs/KosmosFileSystem.java @@ -0,0 +1,340 @@ +/** + * + * Licensed under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * @author: Sriram Rao (Kosmix Corp.) + * + * Implements the Hadoop FS interfaces to allow applications to store + *files in Kosmos File System (KFS). + */ + +package org.apache.hadoop.fs.kfs; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Progressable; + +/** + * A FileSystem backed by KFS. + * + */ + +public class KosmosFileSystem extends FileSystem { + + private FileSystem localFs; + private IFSImpl kfsImpl = null; + private URI uri; + private Path workingDir = new Path("/"); + + public KosmosFileSystem() { + + } + + KosmosFileSystem(IFSImpl fsimpl) { + this.kfsImpl = fsimpl; + } + + @Override + public URI getUri() { + return uri; + } + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { + super.initialize(uri, conf); + try { + if (kfsImpl == null) { + if (uri.getHost() == null) { + kfsImpl = new KFSImpl(conf.get("fs.kfs.metaServerHost", ""), + conf.getInt("fs.kfs.metaServerPort", -1), + statistics); + } else { + kfsImpl = new KFSImpl(uri.getHost(), uri.getPort(), statistics); + } + } + + this.localFs = FileSystem.getLocal(conf); + this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); + this.workingDir = new Path("/user", System.getProperty("user.name") + ).makeQualified(this); + setConf(conf); + + } catch (Exception e) { + e.printStackTrace(); + System.out.println("Unable to initialize KFS"); + System.exit(-1); + } + } + + @Override + public Path getWorkingDirectory() { + return workingDir; + } + + @Override + public void setWorkingDirectory(Path dir) { + workingDir = makeAbsolute(dir); + } + + private Path makeAbsolute(Path path) { + if (path.isAbsolute()) { + return path; + } + return new Path(workingDir, path); + } + + @Override + public boolean mkdirs(Path path, FsPermission permission + ) throws IOException { + Path absolute = makeAbsolute(path); + String srep = absolute.toUri().getPath(); + + int res; + + // System.out.println("Calling mkdirs on: " + srep); + + res = kfsImpl.mkdirs(srep); + + return res == 0; + } + + @Override + @Deprecated + public boolean isDirectory(Path path) throws IOException { + Path absolute = makeAbsolute(path); + String srep = absolute.toUri().getPath(); + + // System.out.println("Calling isdir on: " + srep); + + return kfsImpl.isDirectory(srep); + } + + @Override + @Deprecated + public boolean isFile(Path path) throws IOException { + Path absolute = makeAbsolute(path); + String srep = absolute.toUri().getPath(); + return kfsImpl.isFile(srep); + } + + @Override + public FileStatus[] listStatus(Path path) throws IOException { + Path absolute = makeAbsolute(path); + String srep = absolute.toUri().getPath(); + + if (kfsImpl.isFile(srep)) + return new FileStatus[] { getFileStatus(path) } ; + + return kfsImpl.readdirplus(absolute); + } + + @Override + public FileStatus getFileStatus(Path path) throws IOException { + Path absolute = makeAbsolute(path); + String srep = absolute.toUri().getPath(); + if (!kfsImpl.exists(srep)) { + throw new FileNotFoundException("File " + path + " does not exist."); + } + if (kfsImpl.isDirectory(srep)) { + // System.out.println("Status of path: " + path + " is dir"); + return new FileStatus(0, true, 1, 0, kfsImpl.getModificationTime(srep), + path.makeQualified(this)); + } else { + // System.out.println("Status of path: " + path + " is file"); + return new FileStatus(kfsImpl.filesize(srep), false, + kfsImpl.getReplication(srep), + getDefaultBlockSize(), + kfsImpl.getModificationTime(srep), + path.makeQualified(this)); + } + } + + @Override + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException { + Path parent = f.getParent(); + if (parent != null && !mkdirs(parent)) { + throw new IOException("Mkdirs failed to create " + parent); + } + + Path absolute = makeAbsolute(f); + String srep = absolute.toUri().getPath(); + + return kfsImpl.append(srep, bufferSize, progress); + } + + @Override + public FSDataOutputStream create(Path file, FsPermission permission, + boolean overwrite, int bufferSize, + short replication, long blockSize, Progressable progress) + throws IOException { + + if (exists(file)) { + if (overwrite) { + delete(file, true); + } else { + throw new IOException("File already exists: " + file); + } + } + + Path parent = file.getParent(); + if (parent != null && !mkdirs(parent)) { + throw new IOException("Mkdirs failed to create " + parent); + } + + Path absolute = makeAbsolute(file); + String srep = absolute.toUri().getPath(); + + return kfsImpl.create(srep, replication, bufferSize, progress); + } + + @Override + public FSDataInputStream open(Path path, int bufferSize) throws IOException { + if (!exists(path)) + throw new IOException("File does not exist: " + path); + + Path absolute = makeAbsolute(path); + String srep = absolute.toUri().getPath(); + + return kfsImpl.open(srep, bufferSize); + } + + @Override + public boolean rename(Path src, Path dst) throws IOException { + Path absoluteS = makeAbsolute(src); + String srepS = absoluteS.toUri().getPath(); + Path absoluteD = makeAbsolute(dst); + String srepD = absoluteD.toUri().getPath(); + + // System.out.println("Calling rename on: " + srepS + " -> " + srepD); + + return kfsImpl.rename(srepS, srepD) == 0; + } + + // recursively delete the directory and its contents + @Override + public boolean delete(Path path, boolean recursive) throws IOException { + Path absolute = makeAbsolute(path); + String srep = absolute.toUri().getPath(); + if (kfsImpl.isFile(srep)) + return kfsImpl.remove(srep) == 0; + + FileStatus[] dirEntries = listStatus(absolute); + if ((!recursive) && (dirEntries != null) && + (dirEntries.length != 0)) { + throw new IOException("Directory " + path.toString() + + " is not empty."); + } + if (dirEntries != null) { + for (int i = 0; i < dirEntries.length; i++) { + delete(new Path(absolute, dirEntries[i].getPath()), recursive); + } + } + return kfsImpl.rmdir(srep) == 0; + } + + @Override + public short getDefaultReplication() { + return 3; + } + + @Override + public boolean setReplication(Path path, short replication) + throws IOException { + + Path absolute = makeAbsolute(path); + String srep = absolute.toUri().getPath(); + + int res = kfsImpl.setReplication(srep, replication); + return res >= 0; + } + + // 64MB is the KFS block size + + @Override + public long getDefaultBlockSize() { + return 1 << 26; + } + + @Deprecated + public void lock(Path path, boolean shared) throws IOException { + + } + + @Deprecated + public void release(Path path) throws IOException { + + } + + /** + * Return null if the file doesn't exist; otherwise, get the + * locations of the various chunks of the file file from KFS. + */ + @Override + public BlockLocation[] getFileBlockLocations(FileStatus file, long start, + long len) throws IOException { + + if (file == null) { + return null; + } + String srep = makeAbsolute(file.getPath()).toUri().getPath(); + String[][] hints = kfsImpl.getDataLocation(srep, start, len); + if (hints == null) { + return null; + } + BlockLocation[] result = new BlockLocation[hints.length]; + long blockSize = getDefaultBlockSize(); + long length = len; + long blockStart = start; + for(int i=0; i < result.length; ++i) { + result[i] = new BlockLocation(null, hints[i], blockStart, + length < blockSize ? length : blockSize); + blockStart += blockSize; + length -= blockSize; + } + return result; + } + + @Override + public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException { + FileUtil.copy(localFs, src, this, dst, delSrc, getConf()); + } + + @Override + public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException { + FileUtil.copy(this, src, localFs, dst, delSrc, getConf()); + } + + @Override + public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + return tmpLocalFile; + } + + @Override + public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) + throws IOException { + moveFromLocalFile(tmpLocalFile, fsOutputFile); + } +} diff --git a/src/java/org/apache/hadoop/fs/kfs/package.html b/src/java/org/apache/hadoop/fs/kfs/package.html new file mode 100644 index 00000000000..365b60b4fa5 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/kfs/package.html @@ -0,0 +1,98 @@ + + + + + + +

A client for the Kosmos filesystem (KFS)

+ +

Introduction

+ +This pages describes how to use Kosmos Filesystem +( KFS ) as a backing +store with Hadoop. This page assumes that you have downloaded the +KFS software and installed necessary binaries as outlined in the KFS +documentation. + +

Steps

+ +
    +
  • In the Hadoop conf directory edit core-site.xml, + add the following: +
    +<property>
    +  <name>fs.kfs.impl</name>
    +  <value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value>
    +  <description>The FileSystem for kfs: uris.</description>
    +</property>
    +            
    + +
  • In the Hadoop conf directory edit core-site.xml, + adding the following (with appropriate values for + <server> and <port>): +
    +<property>
    +  <name>fs.default.name</name>
    +  <value>kfs://<server:port></value> 
    +</property>
    +
    +<property>
    +  <name>fs.kfs.metaServerHost</name>
    +  <value><server></value>
    +  <description>The location of the KFS meta server.</description>
    +</property>
    +
    +<property>
    +  <name>fs.kfs.metaServerPort</name>
    +  <value><port></value>
    +  <description>The location of the meta server's port.</description>
    +</property>
    +
    +
    +
  • + +
  • Copy KFS's kfs-0.1.jar to Hadoop's lib directory. This step + enables Hadoop's to load the KFS specific modules. Note + that, kfs-0.1.jar was built when you compiled KFS source + code. This jar file contains code that calls KFS's client + library code via JNI; the native code is in KFS's + libkfsClient.so library. +
  • + +
  • When the Hadoop map/reduce trackers start up, those +processes (on local as well as remote nodes) will now need to load +KFS's libkfsClient.so library. To simplify this process, it is advisable to +store libkfsClient.so in an NFS accessible directory (similar to where +Hadoop binaries/scripts are stored); then, modify Hadoop's +conf/hadoop-env.sh adding the following line and providing suitable +value for <path>: +
    +export LD_LIBRARY_PATH=<path>
    +
    + + +
  • Start only the map/reduce trackers +
    + example: execute Hadoop's bin/start-mapred.sh
  • +
+
+ +If the map/reduce job trackers start up, all file-I/O is done to KFS. + + + diff --git a/src/java/org/apache/hadoop/fs/package.html b/src/java/org/apache/hadoop/fs/package.html new file mode 100644 index 00000000000..71bfdc8a40f --- /dev/null +++ b/src/java/org/apache/hadoop/fs/package.html @@ -0,0 +1,23 @@ + + + + + +An abstract file system API. + + diff --git a/src/java/org/apache/hadoop/fs/permission/AccessControlException.java b/src/java/org/apache/hadoop/fs/permission/AccessControlException.java new file mode 100644 index 00000000000..49880f9dcdd --- /dev/null +++ b/src/java/org/apache/hadoop/fs/permission/AccessControlException.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.permission; + +import java.io.IOException; + +/** + * An exception class for access control related issues. + * @deprecated Use {@link org.apache.hadoop.security.AccessControlException} + * instead. + */ +@Deprecated +public class AccessControlException extends IOException { + //Required by {@link java.io.Serializable}. + private static final long serialVersionUID = 1L; + + /** + * Default constructor is needed for unwrapping from + * {@link org.apache.hadoop.ipc.RemoteException}. + */ + public AccessControlException() { + super("Permission denied."); + } + + /** + * Constructs an {@link AccessControlException} + * with the specified detail message. + * @param s the detail message. + */ + public AccessControlException(String s) { + super(s); + } + + /** + * Constructs a new exception with the specified cause and a detail + * message of (cause==null ? null : cause.toString()) (which + * typically contains the class and detail message of cause). + * @param cause the cause (which is saved for later retrieval by the + * {@link #getCause()} method). (A null value is + * permitted, and indicates that the cause is nonexistent or + * unknown.) + */ + public AccessControlException(Throwable cause) { + super(cause); + } +} diff --git a/src/java/org/apache/hadoop/fs/permission/FsAction.java b/src/java/org/apache/hadoop/fs/permission/FsAction.java new file mode 100644 index 00000000000..5aafd21b33a --- /dev/null +++ b/src/java/org/apache/hadoop/fs/permission/FsAction.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.permission; + +/** + * File system actions, e.g. read, write, etc. + */ +public enum FsAction { + // POSIX style + NONE("---"), + EXECUTE("--x"), + WRITE("-w-"), + WRITE_EXECUTE("-wx"), + READ("r--"), + READ_EXECUTE("r-x"), + READ_WRITE("rw-"), + ALL("rwx"); + + /** Retain reference to value array. */ + private final static FsAction[] vals = values(); + + /** Symbolic representation */ + public final String SYMBOL; + + private FsAction(String s) { + SYMBOL = s; + } + + /** + * Return true if this action implies that action. + * @param that + */ + public boolean implies(FsAction that) { + if (that != null) { + return (ordinal() & that.ordinal()) == that.ordinal(); + } + return false; + } + + /** AND operation. */ + public FsAction and(FsAction that) { + return vals[ordinal() & that.ordinal()]; + } + /** OR operation. */ + public FsAction or(FsAction that) { + return vals[ordinal() | that.ordinal()]; + } + /** NOT operation. */ + public FsAction not() { + return vals[7 - ordinal()]; + } +} diff --git a/src/java/org/apache/hadoop/fs/permission/FsPermission.java b/src/java/org/apache/hadoop/fs/permission/FsPermission.java new file mode 100644 index 00000000000..e92d35bceac --- /dev/null +++ b/src/java/org/apache/hadoop/fs/permission/FsPermission.java @@ -0,0 +1,232 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.permission; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.*; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * A class for file/directory permissions. + */ +public class FsPermission implements Writable { + static final WritableFactory FACTORY = new WritableFactory() { + public Writable newInstance() { return new FsPermission(); } + }; + static { // register a ctor + WritableFactories.setFactory(FsPermission.class, FACTORY); + } + + /** Create an immutable {@link FsPermission} object. */ + public static FsPermission createImmutable(short permission) { + return new FsPermission(permission) { + public FsPermission applyUMask(FsPermission umask) { + throw new UnsupportedOperationException(); + } + public void readFields(DataInput in) throws IOException { + throw new UnsupportedOperationException(); + } + }; + } + + //POSIX permission style + private FsAction useraction = null; + private FsAction groupaction = null; + private FsAction otheraction = null; + private boolean stickyBit = false; + + private FsPermission() {} + + /** + * Construct by the given {@link FsAction}. + * @param u user action + * @param g group action + * @param o other action + */ + public FsPermission(FsAction u, FsAction g, FsAction o) { + this(u, g, o, false); + } + + public FsPermission(FsAction u, FsAction g, FsAction o, boolean sb) { + set(u, g, o, sb); + } + + /** + * Construct by the given mode. + * @param mode + * @see #toShort() + */ + public FsPermission(short mode) { fromShort(mode); } + + /** + * Copy constructor + * + * @param other other permission + */ + public FsPermission(FsPermission other) { + this.useraction = other.useraction; + this.groupaction = other.groupaction; + this.otheraction = other.otheraction; + } + + /** Return user {@link FsAction}. */ + public FsAction getUserAction() {return useraction;} + + /** Return group {@link FsAction}. */ + public FsAction getGroupAction() {return groupaction;} + + /** Return other {@link FsAction}. */ + public FsAction getOtherAction() {return otheraction;} + + private void set(FsAction u, FsAction g, FsAction o, boolean sb) { + useraction = u; + groupaction = g; + otheraction = o; + stickyBit = sb; + } + + public void fromShort(short n) { + FsAction[] v = FsAction.values(); + + set(v[(n >>> 6) & 7], v[(n >>> 3) & 7], v[n & 7], (((n >>> 9) & 1) == 1) ); + } + + /** {@inheritDoc} */ + public void write(DataOutput out) throws IOException { + out.writeShort(toShort()); + } + + /** {@inheritDoc} */ + public void readFields(DataInput in) throws IOException { + fromShort(in.readShort()); + } + + /** + * Create and initialize a {@link FsPermission} from {@link DataInput}. + */ + public static FsPermission read(DataInput in) throws IOException { + FsPermission p = new FsPermission(); + p.readFields(in); + return p; + } + + /** + * Encode the object to a short. + */ + public short toShort() { + int s = (stickyBit ? 1 << 9 : 0) | + (useraction.ordinal() << 6) | + (groupaction.ordinal() << 3) | + otheraction.ordinal(); + + return (short)s; + } + + /** {@inheritDoc} */ + public boolean equals(Object obj) { + if (obj instanceof FsPermission) { + FsPermission that = (FsPermission)obj; + return this.useraction == that.useraction + && this.groupaction == that.groupaction + && this.otheraction == that.otheraction + && this.stickyBit == that.stickyBit; + } + return false; + } + + /** {@inheritDoc} */ + public int hashCode() {return toShort();} + + /** {@inheritDoc} */ + public String toString() { + String str = useraction.SYMBOL + groupaction.SYMBOL + otheraction.SYMBOL; + if(stickyBit) { + StringBuilder str2 = new StringBuilder(str); + str2.replace(str2.length() - 1, str2.length(), + otheraction.implies(FsAction.EXECUTE) ? "t" : "T"); + str = str2.toString(); + } + + return str; + } + + /** Apply a umask to this permission and return a new one */ + public FsPermission applyUMask(FsPermission umask) { + return new FsPermission(useraction.and(umask.useraction.not()), + groupaction.and(umask.groupaction.not()), + otheraction.and(umask.otheraction.not())); + } + + /** umask property label */ + public static final String UMASK_LABEL = "dfs.umask"; + public static final int DEFAULT_UMASK = 0022; + + /** Get the user file creation mask (umask) */ + public static FsPermission getUMask(Configuration conf) { + int umask = DEFAULT_UMASK; + if (conf != null) { + umask = conf.getInt(UMASK_LABEL, DEFAULT_UMASK); + } + return new FsPermission((short)umask); + } + + public boolean getStickyBit() { + return stickyBit; + } + + /** Set the user file creation mask (umask) */ + public static void setUMask(Configuration conf, FsPermission umask) { + conf.setInt(UMASK_LABEL, umask.toShort()); + } + + /** Get the default permission. */ + public static FsPermission getDefault() { + return new FsPermission((short)00777); + } + + /** + * Create a FsPermission from a Unix symbolic permission string + * @param unixSymbolicPermission e.g. "-rw-rw-rw-" + */ + public static FsPermission valueOf(String unixSymbolicPermission) { + if (unixSymbolicPermission == null) { + return null; + } + else if (unixSymbolicPermission.length() != 10) { + throw new IllegalArgumentException("length != 10(unixSymbolicPermission=" + + unixSymbolicPermission + ")"); + } + + int n = 0; + for(int i = 1; i < unixSymbolicPermission.length(); i++) { + n = n << 1; + char c = unixSymbolicPermission.charAt(i); + n += (c == '-' || c == 'T' || c == 'S') ? 0: 1; + } + + // Add sticky bit value if set + if(unixSymbolicPermission.charAt(9) == 't' || + unixSymbolicPermission.charAt(9) == 'T') + n += 01000; + + return new FsPermission((short)n); + } +} diff --git a/src/java/org/apache/hadoop/fs/permission/PermissionStatus.java b/src/java/org/apache/hadoop/fs/permission/PermissionStatus.java new file mode 100644 index 00000000000..4f36abbe625 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/permission/PermissionStatus.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.permission; + +import org.apache.hadoop.io.*; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Store permission related information. + */ +public class PermissionStatus implements Writable { + static final WritableFactory FACTORY = new WritableFactory() { + public Writable newInstance() { return new PermissionStatus(); } + }; + static { // register a ctor + WritableFactories.setFactory(PermissionStatus.class, FACTORY); + } + + /** Create an immutable {@link PermissionStatus} object. */ + public static PermissionStatus createImmutable( + String user, String group, FsPermission permission) { + return new PermissionStatus(user, group, permission) { + public PermissionStatus applyUMask(FsPermission umask) { + throw new UnsupportedOperationException(); + } + public void readFields(DataInput in) throws IOException { + throw new UnsupportedOperationException(); + } + }; + } + + private String username; + private String groupname; + private FsPermission permission; + + private PermissionStatus() {} + + /** Constructor */ + public PermissionStatus(String user, String group, FsPermission permission) { + username = user; + groupname = group; + this.permission = permission; + } + + /** Return user name */ + public String getUserName() {return username;} + + /** Return group name */ + public String getGroupName() {return groupname;} + + /** Return permission */ + public FsPermission getPermission() {return permission;} + + /** + * Apply umask. + * @see FsPermission#applyUMask(FsPermission) + */ + public PermissionStatus applyUMask(FsPermission umask) { + permission = permission.applyUMask(umask); + return this; + } + + /** {@inheritDoc} */ + public void readFields(DataInput in) throws IOException { + username = Text.readString(in); + groupname = Text.readString(in); + permission = FsPermission.read(in); + } + + /** {@inheritDoc} */ + public void write(DataOutput out) throws IOException { + write(out, username, groupname, permission); + } + + /** + * Create and initialize a {@link PermissionStatus} from {@link DataInput}. + */ + public static PermissionStatus read(DataInput in) throws IOException { + PermissionStatus p = new PermissionStatus(); + p.readFields(in); + return p; + } + + /** + * Serialize a {@link PermissionStatus} from its base components. + */ + public static void write(DataOutput out, + String username, + String groupname, + FsPermission permission) throws IOException { + Text.writeString(out, username); + Text.writeString(out, groupname); + permission.write(out); + } + + /** {@inheritDoc} */ + public String toString() { + return username + ":" + groupname + ":" + permission; + } +} diff --git a/src/java/org/apache/hadoop/fs/s3/Block.java b/src/java/org/apache/hadoop/fs/s3/Block.java new file mode 100644 index 00000000000..e24ad264038 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/Block.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3; + +/** + * Holds metadata about a block of data being stored in a {@link FileSystemStore}. + */ +public class Block { + private long id; + + private long length; + + public Block(long id, long length) { + this.id = id; + this.length = length; + } + + public long getId() { + return id; + } + + public long getLength() { + return length; + } + + @Override + public String toString() { + return "Block[" + id + ", " + length + "]"; + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3/FileSystemStore.java b/src/java/org/apache/hadoop/fs/s3/FileSystemStore.java new file mode 100644 index 00000000000..a46472a8150 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/FileSystemStore.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3; + +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +/** + * A facility for storing and retrieving {@link INode}s and {@link Block}s. + */ +public interface FileSystemStore { + + void initialize(URI uri, Configuration conf) throws IOException; + String getVersion() throws IOException; + + void storeINode(Path path, INode inode) throws IOException; + void storeBlock(Block block, File file) throws IOException; + + boolean inodeExists(Path path) throws IOException; + boolean blockExists(long blockId) throws IOException; + + INode retrieveINode(Path path) throws IOException; + File retrieveBlock(Block block, long byteRangeStart) throws IOException; + + void deleteINode(Path path) throws IOException; + void deleteBlock(Block block) throws IOException; + + Set listSubPaths(Path path) throws IOException; + Set listDeepSubPaths(Path path) throws IOException; + + /** + * Delete everything. Used for testing. + * @throws IOException + */ + void purge() throws IOException; + + /** + * Diagnostic method to dump all INodes to the console. + * @throws IOException + */ + void dump() throws IOException; +} diff --git a/src/java/org/apache/hadoop/fs/s3/INode.java b/src/java/org/apache/hadoop/fs/s3/INode.java new file mode 100644 index 00000000000..ec7f67c266c --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/INode.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * Holds file metadata including type (regular file, or directory), + * and the list of blocks that are pointers to the data. + */ +public class INode { + + enum FileType { + DIRECTORY, FILE + } + + public static final FileType[] FILE_TYPES = { + FileType.DIRECTORY, + FileType.FILE + }; + + public static final INode DIRECTORY_INODE = new INode(FileType.DIRECTORY, null); + + private FileType fileType; + private Block[] blocks; + + public INode(FileType fileType, Block[] blocks) { + this.fileType = fileType; + if (isDirectory() && blocks != null) { + throw new IllegalArgumentException("A directory cannot contain blocks."); + } + this.blocks = blocks; + } + + public Block[] getBlocks() { + return blocks; + } + + public FileType getFileType() { + return fileType; + } + + public boolean isDirectory() { + return fileType == FileType.DIRECTORY; + } + + public boolean isFile() { + return fileType == FileType.FILE; + } + + public long getSerializedLength() { + return 1L + (blocks == null ? 0 : 4 + blocks.length * 16); + } + + + public InputStream serialize() throws IOException { + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(bytes); + out.writeByte(fileType.ordinal()); + if (isFile()) { + out.writeInt(blocks.length); + for (int i = 0; i < blocks.length; i++) { + out.writeLong(blocks[i].getId()); + out.writeLong(blocks[i].getLength()); + } + } + out.close(); + return new ByteArrayInputStream(bytes.toByteArray()); + } + + public static INode deserialize(InputStream in) throws IOException { + if (in == null) { + return null; + } + DataInputStream dataIn = new DataInputStream(in); + FileType fileType = INode.FILE_TYPES[dataIn.readByte()]; + switch (fileType) { + case DIRECTORY: + in.close(); + return INode.DIRECTORY_INODE; + case FILE: + int numBlocks = dataIn.readInt(); + Block[] blocks = new Block[numBlocks]; + for (int i = 0; i < numBlocks; i++) { + long id = dataIn.readLong(); + long length = dataIn.readLong(); + blocks[i] = new Block(id, length); + } + in.close(); + return new INode(fileType, blocks); + default: + throw new IllegalArgumentException("Cannot deserialize inode."); + } + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java b/src/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java new file mode 100644 index 00000000000..b5131d62449 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java @@ -0,0 +1,390 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3.INode.FileType; +import org.jets3t.service.S3Service; +import org.jets3t.service.S3ServiceException; +import org.jets3t.service.impl.rest.httpclient.RestS3Service; +import org.jets3t.service.model.S3Bucket; +import org.jets3t.service.model.S3Object; +import org.jets3t.service.security.AWSCredentials; + +class Jets3tFileSystemStore implements FileSystemStore { + + private static final String FILE_SYSTEM_NAME = "fs"; + private static final String FILE_SYSTEM_VALUE = "Hadoop"; + + private static final String FILE_SYSTEM_TYPE_NAME = "fs-type"; + private static final String FILE_SYSTEM_TYPE_VALUE = "block"; + + private static final String FILE_SYSTEM_VERSION_NAME = "fs-version"; + private static final String FILE_SYSTEM_VERSION_VALUE = "1"; + + private static final Map METADATA = + new HashMap(); + + static { + METADATA.put(FILE_SYSTEM_NAME, FILE_SYSTEM_VALUE); + METADATA.put(FILE_SYSTEM_TYPE_NAME, FILE_SYSTEM_TYPE_VALUE); + METADATA.put(FILE_SYSTEM_VERSION_NAME, FILE_SYSTEM_VERSION_VALUE); + } + + private static final String PATH_DELIMITER = Path.SEPARATOR; + private static final String BLOCK_PREFIX = "block_"; + + private Configuration conf; + + private S3Service s3Service; + + private S3Bucket bucket; + + private int bufferSize; + + private static final Log LOG = + LogFactory.getLog(Jets3tFileSystemStore.class.getName()); + + public void initialize(URI uri, Configuration conf) throws IOException { + + this.conf = conf; + + S3Credentials s3Credentials = new S3Credentials(); + s3Credentials.initialize(uri, conf); + try { + AWSCredentials awsCredentials = + new AWSCredentials(s3Credentials.getAccessKey(), + s3Credentials.getSecretAccessKey()); + this.s3Service = new RestS3Service(awsCredentials); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + bucket = new S3Bucket(uri.getHost()); + + this.bufferSize = conf.getInt("io.file.buffer.size", 4096); + } + + public String getVersion() throws IOException { + return FILE_SYSTEM_VERSION_VALUE; + } + + private void delete(String key) throws IOException { + try { + s3Service.deleteObject(bucket, key); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public void deleteINode(Path path) throws IOException { + delete(pathToKey(path)); + } + + public void deleteBlock(Block block) throws IOException { + delete(blockToKey(block)); + } + + public boolean inodeExists(Path path) throws IOException { + InputStream in = get(pathToKey(path), true); + if (in == null) { + return false; + } + in.close(); + return true; + } + + public boolean blockExists(long blockId) throws IOException { + InputStream in = get(blockToKey(blockId), false); + if (in == null) { + return false; + } + in.close(); + return true; + } + + private InputStream get(String key, boolean checkMetadata) + throws IOException { + + try { + S3Object object = s3Service.getObject(bucket, key); + if (checkMetadata) { + checkMetadata(object); + } + return object.getDataInputStream(); + } catch (S3ServiceException e) { + if ("NoSuchKey".equals(e.getS3ErrorCode())) { + return null; + } + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + private InputStream get(String key, long byteRangeStart) throws IOException { + try { + S3Object object = s3Service.getObject(bucket, key, null, null, null, + null, byteRangeStart, null); + return object.getDataInputStream(); + } catch (S3ServiceException e) { + if ("NoSuchKey".equals(e.getS3ErrorCode())) { + return null; + } + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + private void checkMetadata(S3Object object) throws S3FileSystemException, + S3ServiceException { + + String name = (String) object.getMetadata(FILE_SYSTEM_NAME); + if (!FILE_SYSTEM_VALUE.equals(name)) { + throw new S3FileSystemException("Not a Hadoop S3 file."); + } + String type = (String) object.getMetadata(FILE_SYSTEM_TYPE_NAME); + if (!FILE_SYSTEM_TYPE_VALUE.equals(type)) { + throw new S3FileSystemException("Not a block file."); + } + String dataVersion = (String) object.getMetadata(FILE_SYSTEM_VERSION_NAME); + if (!FILE_SYSTEM_VERSION_VALUE.equals(dataVersion)) { + throw new VersionMismatchException(FILE_SYSTEM_VERSION_VALUE, + dataVersion); + } + } + + public INode retrieveINode(Path path) throws IOException { + return INode.deserialize(get(pathToKey(path), true)); + } + + public File retrieveBlock(Block block, long byteRangeStart) + throws IOException { + File fileBlock = null; + InputStream in = null; + OutputStream out = null; + try { + fileBlock = newBackupFile(); + in = get(blockToKey(block), byteRangeStart); + out = new BufferedOutputStream(new FileOutputStream(fileBlock)); + byte[] buf = new byte[bufferSize]; + int numRead; + while ((numRead = in.read(buf)) >= 0) { + out.write(buf, 0, numRead); + } + return fileBlock; + } catch (IOException e) { + // close output stream to file then delete file + closeQuietly(out); + out = null; // to prevent a second close + if (fileBlock != null) { + boolean b = fileBlock.delete(); + if (!b) { + LOG.warn("Ignoring failed delete"); + } + } + throw e; + } finally { + closeQuietly(out); + closeQuietly(in); + } + } + + private File newBackupFile() throws IOException { + File dir = new File(conf.get("fs.s3.buffer.dir")); + if (!dir.exists() && !dir.mkdirs()) { + throw new IOException("Cannot create S3 buffer directory: " + dir); + } + File result = File.createTempFile("input-", ".tmp", dir); + result.deleteOnExit(); + return result; + } + + public Set listSubPaths(Path path) throws IOException { + try { + String prefix = pathToKey(path); + if (!prefix.endsWith(PATH_DELIMITER)) { + prefix += PATH_DELIMITER; + } + S3Object[] objects = s3Service.listObjects(bucket, prefix, PATH_DELIMITER); + Set prefixes = new TreeSet(); + for (int i = 0; i < objects.length; i++) { + prefixes.add(keyToPath(objects[i].getKey())); + } + prefixes.remove(path); + return prefixes; + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public Set listDeepSubPaths(Path path) throws IOException { + try { + String prefix = pathToKey(path); + if (!prefix.endsWith(PATH_DELIMITER)) { + prefix += PATH_DELIMITER; + } + S3Object[] objects = s3Service.listObjects(bucket, prefix, null); + Set prefixes = new TreeSet(); + for (int i = 0; i < objects.length; i++) { + prefixes.add(keyToPath(objects[i].getKey())); + } + prefixes.remove(path); + return prefixes; + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + private void put(String key, InputStream in, long length, boolean storeMetadata) + throws IOException { + + try { + S3Object object = new S3Object(key); + object.setDataInputStream(in); + object.setContentType("binary/octet-stream"); + object.setContentLength(length); + if (storeMetadata) { + object.addAllMetadata(METADATA); + } + s3Service.putObject(bucket, object); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public void storeINode(Path path, INode inode) throws IOException { + put(pathToKey(path), inode.serialize(), inode.getSerializedLength(), true); + } + + public void storeBlock(Block block, File file) throws IOException { + BufferedInputStream in = null; + try { + in = new BufferedInputStream(new FileInputStream(file)); + put(blockToKey(block), in, block.getLength(), false); + } finally { + closeQuietly(in); + } + } + + private void closeQuietly(Closeable closeable) { + if (closeable != null) { + try { + closeable.close(); + } catch (IOException e) { + // ignore + } + } + } + + private String pathToKey(Path path) { + if (!path.isAbsolute()) { + throw new IllegalArgumentException("Path must be absolute: " + path); + } + return path.toUri().getPath(); + } + + private Path keyToPath(String key) { + return new Path(key); + } + + private String blockToKey(long blockId) { + return BLOCK_PREFIX + blockId; + } + + private String blockToKey(Block block) { + return blockToKey(block.getId()); + } + + public void purge() throws IOException { + try { + S3Object[] objects = s3Service.listObjects(bucket); + for (int i = 0; i < objects.length; i++) { + s3Service.deleteObject(bucket, objects[i].getKey()); + } + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public void dump() throws IOException { + StringBuilder sb = new StringBuilder("S3 Filesystem, "); + sb.append(bucket.getName()).append("\n"); + try { + S3Object[] objects = s3Service.listObjects(bucket, PATH_DELIMITER, null); + for (int i = 0; i < objects.length; i++) { + Path path = keyToPath(objects[i].getKey()); + sb.append(path).append("\n"); + INode m = retrieveINode(path); + sb.append("\t").append(m.getFileType()).append("\n"); + if (m.getFileType() == FileType.DIRECTORY) { + continue; + } + for (int j = 0; j < m.getBlocks().length; j++) { + sb.append("\t").append(m.getBlocks()[j]).append("\n"); + } + } + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + System.out.println(sb); + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3/MigrationTool.java b/src/java/org/apache/hadoop/fs/s3/MigrationTool.java new file mode 100644 index 00000000000..cce31f0869f --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/MigrationTool.java @@ -0,0 +1,280 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.s3; + +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URLDecoder; +import java.net.URLEncoder; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.jets3t.service.S3Service; +import org.jets3t.service.S3ServiceException; +import org.jets3t.service.impl.rest.httpclient.RestS3Service; +import org.jets3t.service.model.S3Bucket; +import org.jets3t.service.model.S3Object; +import org.jets3t.service.security.AWSCredentials; + +/** + *

+ * This class is a tool for migrating data from an older to a newer version + * of an S3 filesystem. + *

+ *

+ * All files in the filesystem are migrated by re-writing the block metadata + * - no datafiles are touched. + *

+ */ +public class MigrationTool extends Configured implements Tool { + + private S3Service s3Service; + private S3Bucket bucket; + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new MigrationTool(), args); + System.exit(res); + } + + public int run(String[] args) throws Exception { + + if (args.length == 0) { + System.err.println("Usage: MigrationTool "); + System.err.println("\t\tfilesystem to migrate"); + ToolRunner.printGenericCommandUsage(System.err); + return -1; + } + + URI uri = URI.create(args[0]); + + initialize(uri); + + FileSystemStore newStore = new Jets3tFileSystemStore(); + newStore.initialize(uri, getConf()); + + if (get("%2F") != null) { + System.err.println("Current version number is [unversioned]."); + System.err.println("Target version number is " + + newStore.getVersion() + "."); + Store oldStore = new UnversionedStore(); + migrate(oldStore, newStore); + return 0; + } else { + S3Object root = get("/"); + if (root != null) { + String version = (String) root.getMetadata("fs-version"); + if (version == null) { + System.err.println("Can't detect version - exiting."); + } else { + String newVersion = newStore.getVersion(); + System.err.println("Current version number is " + version + "."); + System.err.println("Target version number is " + newVersion + "."); + if (version.equals(newStore.getVersion())) { + System.err.println("No migration required."); + return 0; + } + // use version number to create Store + //Store oldStore = ... + //migrate(oldStore, newStore); + System.err.println("Not currently implemented."); + return 0; + } + } + System.err.println("Can't detect version - exiting."); + return 0; + } + + } + + public void initialize(URI uri) throws IOException { + + + + try { + String accessKey = null; + String secretAccessKey = null; + String userInfo = uri.getUserInfo(); + if (userInfo != null) { + int index = userInfo.indexOf(':'); + if (index != -1) { + accessKey = userInfo.substring(0, index); + secretAccessKey = userInfo.substring(index + 1); + } else { + accessKey = userInfo; + } + } + if (accessKey == null) { + accessKey = getConf().get("fs.s3.awsAccessKeyId"); + } + if (secretAccessKey == null) { + secretAccessKey = getConf().get("fs.s3.awsSecretAccessKey"); + } + if (accessKey == null && secretAccessKey == null) { + throw new IllegalArgumentException("AWS " + + "Access Key ID and Secret Access Key " + + "must be specified as the username " + + "or password (respectively) of a s3 URL, " + + "or by setting the " + + "fs.s3.awsAccessKeyId or " + + "fs.s3.awsSecretAccessKey properties (respectively)."); + } else if (accessKey == null) { + throw new IllegalArgumentException("AWS " + + "Access Key ID must be specified " + + "as the username of a s3 URL, or by setting the " + + "fs.s3.awsAccessKeyId property."); + } else if (secretAccessKey == null) { + throw new IllegalArgumentException("AWS " + + "Secret Access Key must be specified " + + "as the password of a s3 URL, or by setting the " + + "fs.s3.awsSecretAccessKey property."); + } + AWSCredentials awsCredentials = + new AWSCredentials(accessKey, secretAccessKey); + this.s3Service = new RestS3Service(awsCredentials); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + bucket = new S3Bucket(uri.getHost()); + } + + private void migrate(Store oldStore, FileSystemStore newStore) + throws IOException { + for (Path path : oldStore.listAllPaths()) { + INode inode = oldStore.retrieveINode(path); + oldStore.deleteINode(path); + newStore.storeINode(path, inode); + } + } + + private S3Object get(String key) { + try { + return s3Service.getObject(bucket, key); + } catch (S3ServiceException e) { + if ("NoSuchKey".equals(e.getS3ErrorCode())) { + return null; + } + } + return null; + } + + interface Store { + + Set listAllPaths() throws IOException; + INode retrieveINode(Path path) throws IOException; + void deleteINode(Path path) throws IOException; + + } + + class UnversionedStore implements Store { + + public Set listAllPaths() throws IOException { + try { + String prefix = urlEncode(Path.SEPARATOR); + S3Object[] objects = s3Service.listObjects(bucket, prefix, null); + Set prefixes = new TreeSet(); + for (int i = 0; i < objects.length; i++) { + prefixes.add(keyToPath(objects[i].getKey())); + } + return prefixes; + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public void deleteINode(Path path) throws IOException { + delete(pathToKey(path)); + } + + private void delete(String key) throws IOException { + try { + s3Service.deleteObject(bucket, key); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public INode retrieveINode(Path path) throws IOException { + return INode.deserialize(get(pathToKey(path))); + } + + private InputStream get(String key) throws IOException { + try { + S3Object object = s3Service.getObject(bucket, key); + return object.getDataInputStream(); + } catch (S3ServiceException e) { + if ("NoSuchKey".equals(e.getS3ErrorCode())) { + return null; + } + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + private String pathToKey(Path path) { + if (!path.isAbsolute()) { + throw new IllegalArgumentException("Path must be absolute: " + path); + } + return urlEncode(path.toUri().getPath()); + } + + private Path keyToPath(String key) { + return new Path(urlDecode(key)); + } + + private String urlEncode(String s) { + try { + return URLEncoder.encode(s, "UTF-8"); + } catch (UnsupportedEncodingException e) { + // Should never happen since every implementation of the Java Platform + // is required to support UTF-8. + // See http://java.sun.com/j2se/1.5.0/docs/api/java/nio/charset/Charset.html + throw new IllegalStateException(e); + } + } + + private String urlDecode(String s) { + try { + return URLDecoder.decode(s, "UTF-8"); + } catch (UnsupportedEncodingException e) { + // Should never happen since every implementation of the Java Platform + // is required to support UTF-8. + // See http://java.sun.com/j2se/1.5.0/docs/api/java/nio/charset/Charset.html + throw new IllegalStateException(e); + } + } + + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3/S3Credentials.java b/src/java/org/apache/hadoop/fs/s3/S3Credentials.java new file mode 100644 index 00000000000..039499e2a65 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/S3Credentials.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3; + +import java.net.URI; + +import org.apache.hadoop.conf.Configuration; + +/** + *

+ * Extracts AWS credentials from the filesystem URI or configuration. + *

+ */ +public class S3Credentials { + + private String accessKey; + private String secretAccessKey; + + /** + * @throws IllegalArgumentException if credentials for S3 cannot be + * determined. + */ + public void initialize(URI uri, Configuration conf) { + if (uri.getHost() == null) { + throw new IllegalArgumentException("Invalid hostname in URI " + uri); + } + + String userInfo = uri.getUserInfo(); + if (userInfo != null) { + int index = userInfo.indexOf(':'); + if (index != -1) { + accessKey = userInfo.substring(0, index); + secretAccessKey = userInfo.substring(index + 1); + } else { + accessKey = userInfo; + } + } + + String scheme = uri.getScheme(); + String accessKeyProperty = String.format("fs.%s.awsAccessKeyId", scheme); + String secretAccessKeyProperty = + String.format("fs.%s.awsSecretAccessKey", scheme); + if (accessKey == null) { + accessKey = conf.get(accessKeyProperty); + } + if (secretAccessKey == null) { + secretAccessKey = conf.get(secretAccessKeyProperty); + } + if (accessKey == null && secretAccessKey == null) { + throw new IllegalArgumentException("AWS " + + "Access Key ID and Secret Access " + + "Key must be specified as the " + + "username or password " + + "(respectively) of a " + scheme + + " URL, or by setting the " + + accessKeyProperty + " or " + + secretAccessKeyProperty + + " properties (respectively)."); + } else if (accessKey == null) { + throw new IllegalArgumentException("AWS " + + "Access Key ID must be specified " + + "as the username of a " + scheme + + " URL, or by setting the " + + accessKeyProperty + " property."); + } else if (secretAccessKey == null) { + throw new IllegalArgumentException("AWS " + + "Secret Access Key must be " + + "specified as the password of a " + + scheme + " URL, or by setting the " + + secretAccessKeyProperty + + " property."); + } + + } + + public String getAccessKey() { + return accessKey; + } + + public String getSecretAccessKey() { + return secretAccessKey; + } +} diff --git a/src/java/org/apache/hadoop/fs/s3/S3Exception.java b/src/java/org/apache/hadoop/fs/s3/S3Exception.java new file mode 100644 index 00000000000..7047676a6c7 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/S3Exception.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3; + +import java.io.IOException; + +/** + * Thrown if there is a problem communicating with Amazon S3. + */ +public class S3Exception extends IOException { + + private static final long serialVersionUID = 1L; + + public S3Exception(Throwable t) { + super(t); + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3/S3FileSystem.java b/src/java/org/apache/hadoop/fs/s3/S3FileSystem.java new file mode 100644 index 00000000000..b0013aa0a96 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/S3FileSystem.java @@ -0,0 +1,361 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.s3native.NativeS3FileSystem; +import org.apache.hadoop.io.retry.RetryPolicies; +import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.io.retry.RetryProxy; +import org.apache.hadoop.util.Progressable; + +/** + *

+ * A block-based {@link FileSystem} backed by + * Amazon S3. + *

+ * @see NativeS3FileSystem + */ +public class S3FileSystem extends FileSystem { + + private URI uri; + + private FileSystemStore store; + + private Path workingDir; + + public S3FileSystem() { + // set store in initialize() + } + + public S3FileSystem(FileSystemStore store) { + this.store = store; + } + + @Override + public URI getUri() { + return uri; + } + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { + super.initialize(uri, conf); + if (store == null) { + store = createDefaultStore(conf); + } + store.initialize(uri, conf); + setConf(conf); + this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); + this.workingDir = + new Path("/user", System.getProperty("user.name")).makeQualified(this); + } + + private static FileSystemStore createDefaultStore(Configuration conf) { + FileSystemStore store = new Jets3tFileSystemStore(); + + RetryPolicy basePolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep( + conf.getInt("fs.s3.maxRetries", 4), + conf.getLong("fs.s3.sleepTimeSeconds", 10), TimeUnit.SECONDS); + Map,RetryPolicy> exceptionToPolicyMap = + new HashMap, RetryPolicy>(); + exceptionToPolicyMap.put(IOException.class, basePolicy); + exceptionToPolicyMap.put(S3Exception.class, basePolicy); + + RetryPolicy methodPolicy = RetryPolicies.retryByException( + RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap); + Map methodNameToPolicyMap = new HashMap(); + methodNameToPolicyMap.put("storeBlock", methodPolicy); + methodNameToPolicyMap.put("retrieveBlock", methodPolicy); + + return (FileSystemStore) RetryProxy.create(FileSystemStore.class, + store, methodNameToPolicyMap); + } + + @Override + public Path getWorkingDirectory() { + return workingDir; + } + + @Override + public void setWorkingDirectory(Path dir) { + workingDir = makeAbsolute(dir); + } + + private Path makeAbsolute(Path path) { + if (path.isAbsolute()) { + return path; + } + return new Path(workingDir, path); + } + + /** + * @param permission Currently ignored. + */ + @Override + public boolean mkdirs(Path path, FsPermission permission) throws IOException { + Path absolutePath = makeAbsolute(path); + List paths = new ArrayList(); + do { + paths.add(0, absolutePath); + absolutePath = absolutePath.getParent(); + } while (absolutePath != null); + + boolean result = true; + for (Path p : paths) { + result &= mkdir(p); + } + return result; + } + + private boolean mkdir(Path path) throws IOException { + Path absolutePath = makeAbsolute(path); + INode inode = store.retrieveINode(absolutePath); + if (inode == null) { + store.storeINode(absolutePath, INode.DIRECTORY_INODE); + } else if (inode.isFile()) { + throw new IOException(String.format( + "Can't make directory for path %s since it is a file.", + absolutePath)); + } + return true; + } + + @Override + public boolean isFile(Path path) throws IOException { + INode inode = store.retrieveINode(makeAbsolute(path)); + if (inode == null) { + return false; + } + return inode.isFile(); + } + + private INode checkFile(Path path) throws IOException { + INode inode = store.retrieveINode(makeAbsolute(path)); + if (inode == null) { + throw new IOException("No such file."); + } + if (inode.isDirectory()) { + throw new IOException("Path " + path + " is a directory."); + } + return inode; + } + + @Override + public FileStatus[] listStatus(Path f) throws IOException { + Path absolutePath = makeAbsolute(f); + INode inode = store.retrieveINode(absolutePath); + if (inode == null) { + return null; + } + if (inode.isFile()) { + return new FileStatus[] { + new S3FileStatus(f.makeQualified(this), inode) + }; + } + ArrayList ret = new ArrayList(); + for (Path p : store.listSubPaths(absolutePath)) { + ret.add(getFileStatus(p.makeQualified(this))); + } + return ret.toArray(new FileStatus[0]); + } + + /** This optional operation is not yet supported. */ + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException { + throw new IOException("Not supported"); + } + + /** + * @param permission Currently ignored. + */ + @Override + public FSDataOutputStream create(Path file, FsPermission permission, + boolean overwrite, int bufferSize, + short replication, long blockSize, Progressable progress) + throws IOException { + + INode inode = store.retrieveINode(makeAbsolute(file)); + if (inode != null) { + if (overwrite) { + delete(file, true); + } else { + throw new IOException("File already exists: " + file); + } + } else { + Path parent = file.getParent(); + if (parent != null) { + if (!mkdirs(parent)) { + throw new IOException("Mkdirs failed to create " + parent.toString()); + } + } + } + return new FSDataOutputStream + (new S3OutputStream(getConf(), store, makeAbsolute(file), + blockSize, progress, bufferSize), + statistics); + } + + @Override + public FSDataInputStream open(Path path, int bufferSize) throws IOException { + INode inode = checkFile(path); + return new FSDataInputStream(new S3InputStream(getConf(), store, inode, + statistics)); + } + + @Override + public boolean rename(Path src, Path dst) throws IOException { + Path absoluteSrc = makeAbsolute(src); + INode srcINode = store.retrieveINode(absoluteSrc); + if (srcINode == null) { + // src path doesn't exist + return false; + } + Path absoluteDst = makeAbsolute(dst); + INode dstINode = store.retrieveINode(absoluteDst); + if (dstINode != null && dstINode.isDirectory()) { + absoluteDst = new Path(absoluteDst, absoluteSrc.getName()); + dstINode = store.retrieveINode(absoluteDst); + } + if (dstINode != null) { + // dst path already exists - can't overwrite + return false; + } + Path dstParent = absoluteDst.getParent(); + if (dstParent != null) { + INode dstParentINode = store.retrieveINode(dstParent); + if (dstParentINode == null || dstParentINode.isFile()) { + // dst parent doesn't exist or is a file + return false; + } + } + return renameRecursive(absoluteSrc, absoluteDst); + } + + private boolean renameRecursive(Path src, Path dst) throws IOException { + INode srcINode = store.retrieveINode(src); + store.storeINode(dst, srcINode); + store.deleteINode(src); + if (srcINode.isDirectory()) { + for (Path oldSrc : store.listDeepSubPaths(src)) { + INode inode = store.retrieveINode(oldSrc); + if (inode == null) { + return false; + } + String oldSrcPath = oldSrc.toUri().getPath(); + String srcPath = src.toUri().getPath(); + String dstPath = dst.toUri().getPath(); + Path newDst = new Path(oldSrcPath.replaceFirst(srcPath, dstPath)); + store.storeINode(newDst, inode); + store.deleteINode(oldSrc); + } + } + return true; + } + + public boolean delete(Path path, boolean recursive) throws IOException { + Path absolutePath = makeAbsolute(path); + INode inode = store.retrieveINode(absolutePath); + if (inode == null) { + return false; + } + if (inode.isFile()) { + store.deleteINode(absolutePath); + for (Block block: inode.getBlocks()) { + store.deleteBlock(block); + } + } else { + FileStatus[] contents = listStatus(absolutePath); + if (contents == null) { + return false; + } + if ((contents.length !=0) && (!recursive)) { + throw new IOException("Directory " + path.toString() + + " is not empty."); + } + for (FileStatus p:contents) { + if (!delete(p.getPath(), recursive)) { + return false; + } + } + store.deleteINode(absolutePath); + } + return true; + } + + /** + * FileStatus for S3 file systems. + */ + @Override + public FileStatus getFileStatus(Path f) throws IOException { + INode inode = store.retrieveINode(makeAbsolute(f)); + if (inode == null) { + throw new FileNotFoundException(f + ": No such file or directory."); + } + return new S3FileStatus(f.makeQualified(this), inode); + } + + // diagnostic methods + + void dump() throws IOException { + store.dump(); + } + + void purge() throws IOException { + store.purge(); + } + + private static class S3FileStatus extends FileStatus { + + S3FileStatus(Path f, INode inode) throws IOException { + super(findLength(inode), inode.isDirectory(), 1, + findBlocksize(inode), 0, f); + } + + private static long findLength(INode inode) { + if (!inode.isDirectory()) { + long length = 0L; + for (Block block : inode.getBlocks()) { + length += block.getLength(); + } + return length; + } + return 0; + } + + private static long findBlocksize(INode inode) { + final Block[] ret = inode.getBlocks(); + return ret == null ? 0L : ret[0].getLength(); + } + } +} diff --git a/src/java/org/apache/hadoop/fs/s3/S3FileSystemException.java b/src/java/org/apache/hadoop/fs/s3/S3FileSystemException.java new file mode 100644 index 00000000000..f4a5141adbc --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/S3FileSystemException.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.s3; + +import java.io.IOException; + +/** + * Thrown when there is a fatal exception while using {@link S3FileSystem}. + */ +public class S3FileSystemException extends IOException { + private static final long serialVersionUID = 1L; + + public S3FileSystemException(String message) { + super(message); + } +} diff --git a/src/java/org/apache/hadoop/fs/s3/S3InputStream.java b/src/java/org/apache/hadoop/fs/s3/S3InputStream.java new file mode 100644 index 00000000000..db5eded7ad3 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/S3InputStream.java @@ -0,0 +1,211 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3; + +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSInputStream; +import org.apache.hadoop.fs.FileSystem; + +class S3InputStream extends FSInputStream { + + private FileSystemStore store; + + private Block[] blocks; + + private boolean closed; + + private long fileLength; + + private long pos = 0; + + private File blockFile; + + private DataInputStream blockStream; + + private long blockEnd = -1; + + private FileSystem.Statistics stats; + + private static final Log LOG = + LogFactory.getLog(S3InputStream.class.getName()); + + + @Deprecated + public S3InputStream(Configuration conf, FileSystemStore store, + INode inode) { + this(conf, store, inode, null); + } + + public S3InputStream(Configuration conf, FileSystemStore store, + INode inode, FileSystem.Statistics stats) { + + this.store = store; + this.stats = stats; + this.blocks = inode.getBlocks(); + for (Block block : blocks) { + this.fileLength += block.getLength(); + } + } + + @Override + public synchronized long getPos() throws IOException { + return pos; + } + + @Override + public synchronized int available() throws IOException { + return (int) (fileLength - pos); + } + + @Override + public synchronized void seek(long targetPos) throws IOException { + if (targetPos > fileLength) { + throw new IOException("Cannot seek after EOF"); + } + pos = targetPos; + blockEnd = -1; + } + + @Override + public synchronized boolean seekToNewSource(long targetPos) throws IOException { + return false; + } + + @Override + public synchronized int read() throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + int result = -1; + if (pos < fileLength) { + if (pos > blockEnd) { + blockSeekTo(pos); + } + result = blockStream.read(); + if (result >= 0) { + pos++; + } + } + if (stats != null & result >= 0) { + stats.incrementBytesRead(1); + } + return result; + } + + @Override + public synchronized int read(byte buf[], int off, int len) throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + if (pos < fileLength) { + if (pos > blockEnd) { + blockSeekTo(pos); + } + int realLen = Math.min(len, (int) (blockEnd - pos + 1)); + int result = blockStream.read(buf, off, realLen); + if (result >= 0) { + pos += result; + } + if (stats != null && result > 0) { + stats.incrementBytesRead(result); + } + return result; + } + return -1; + } + + private synchronized void blockSeekTo(long target) throws IOException { + // + // Compute desired block + // + int targetBlock = -1; + long targetBlockStart = 0; + long targetBlockEnd = 0; + for (int i = 0; i < blocks.length; i++) { + long blockLength = blocks[i].getLength(); + targetBlockEnd = targetBlockStart + blockLength - 1; + + if (target >= targetBlockStart && target <= targetBlockEnd) { + targetBlock = i; + break; + } else { + targetBlockStart = targetBlockEnd + 1; + } + } + if (targetBlock < 0) { + throw new IOException( + "Impossible situation: could not find target position " + target); + } + long offsetIntoBlock = target - targetBlockStart; + + // read block blocks[targetBlock] from position offsetIntoBlock + + this.blockFile = store.retrieveBlock(blocks[targetBlock], offsetIntoBlock); + + this.pos = target; + this.blockEnd = targetBlockEnd; + this.blockStream = new DataInputStream(new FileInputStream(blockFile)); + + } + + @Override + public void close() throws IOException { + if (closed) { + return; + } + if (blockStream != null) { + blockStream.close(); + blockStream = null; + } + if (blockFile != null) { + boolean b = blockFile.delete(); + if (!b) { + LOG.warn("Ignoring failed delete"); + } + } + super.close(); + closed = true; + } + + /** + * We don't support marks. + */ + @Override + public boolean markSupported() { + return false; + } + + @Override + public void mark(int readLimit) { + // Do nothing + } + + @Override + public void reset() throws IOException { + throw new IOException("Mark not supported"); + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3/S3OutputStream.java b/src/java/org/apache/hadoop/fs/s3/S3OutputStream.java new file mode 100644 index 00000000000..f3fee2d5342 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/S3OutputStream.java @@ -0,0 +1,231 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3.INode.FileType; +import org.apache.hadoop.util.Progressable; + +class S3OutputStream extends OutputStream { + + private Configuration conf; + + private int bufferSize; + + private FileSystemStore store; + + private Path path; + + private long blockSize; + + private File backupFile; + + private OutputStream backupStream; + + private Random r = new Random(); + + private boolean closed; + + private int pos = 0; + + private long filePos = 0; + + private int bytesWrittenToBlock = 0; + + private byte[] outBuf; + + private List blocks = new ArrayList(); + + private Block nextBlock; + + private static final Log LOG = + LogFactory.getLog(S3OutputStream.class.getName()); + + + public S3OutputStream(Configuration conf, FileSystemStore store, + Path path, long blockSize, Progressable progress, + int buffersize) throws IOException { + + this.conf = conf; + this.store = store; + this.path = path; + this.blockSize = blockSize; + this.backupFile = newBackupFile(); + this.backupStream = new FileOutputStream(backupFile); + this.bufferSize = buffersize; + this.outBuf = new byte[bufferSize]; + + } + + private File newBackupFile() throws IOException { + File dir = new File(conf.get("fs.s3.buffer.dir")); + if (!dir.exists() && !dir.mkdirs()) { + throw new IOException("Cannot create S3 buffer directory: " + dir); + } + File result = File.createTempFile("output-", ".tmp", dir); + result.deleteOnExit(); + return result; + } + + public long getPos() throws IOException { + return filePos; + } + + @Override + public synchronized void write(int b) throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + + if ((bytesWrittenToBlock + pos == blockSize) || (pos >= bufferSize)) { + flush(); + } + outBuf[pos++] = (byte) b; + filePos++; + } + + @Override + public synchronized void write(byte b[], int off, int len) throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + while (len > 0) { + int remaining = bufferSize - pos; + int toWrite = Math.min(remaining, len); + System.arraycopy(b, off, outBuf, pos, toWrite); + pos += toWrite; + off += toWrite; + len -= toWrite; + filePos += toWrite; + + if ((bytesWrittenToBlock + pos >= blockSize) || (pos == bufferSize)) { + flush(); + } + } + } + + @Override + public synchronized void flush() throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + + if (bytesWrittenToBlock + pos >= blockSize) { + flushData((int) blockSize - bytesWrittenToBlock); + } + if (bytesWrittenToBlock == blockSize) { + endBlock(); + } + flushData(pos); + } + + private synchronized void flushData(int maxPos) throws IOException { + int workingPos = Math.min(pos, maxPos); + + if (workingPos > 0) { + // + // To the local block backup, write just the bytes + // + backupStream.write(outBuf, 0, workingPos); + + // + // Track position + // + bytesWrittenToBlock += workingPos; + System.arraycopy(outBuf, workingPos, outBuf, 0, pos - workingPos); + pos -= workingPos; + } + } + + private synchronized void endBlock() throws IOException { + // + // Done with local copy + // + backupStream.close(); + + // + // Send it to S3 + // + // TODO: Use passed in Progressable to report progress. + nextBlockOutputStream(); + store.storeBlock(nextBlock, backupFile); + internalClose(); + + // + // Delete local backup, start new one + // + boolean b = backupFile.delete(); + if (!b) { + LOG.warn("Ignoring failed delete"); + } + backupFile = newBackupFile(); + backupStream = new FileOutputStream(backupFile); + bytesWrittenToBlock = 0; + } + + private synchronized void nextBlockOutputStream() throws IOException { + long blockId = r.nextLong(); + while (store.blockExists(blockId)) { + blockId = r.nextLong(); + } + nextBlock = new Block(blockId, bytesWrittenToBlock); + blocks.add(nextBlock); + bytesWrittenToBlock = 0; + } + + private synchronized void internalClose() throws IOException { + INode inode = new INode(FileType.FILE, blocks.toArray(new Block[blocks + .size()])); + store.storeINode(path, inode); + } + + @Override + public synchronized void close() throws IOException { + if (closed) { + return; + } + + flush(); + if (filePos == 0 || bytesWrittenToBlock != 0) { + endBlock(); + } + + backupStream.close(); + boolean b = backupFile.delete(); + if (!b) { + LOG.warn("Ignoring failed delete"); + } + + super.close(); + + closed = true; + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3/VersionMismatchException.java b/src/java/org/apache/hadoop/fs/s3/VersionMismatchException.java new file mode 100644 index 00000000000..22c6d67f777 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/VersionMismatchException.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.s3; + +/** + * Thrown when Hadoop cannot read the version of the data stored + * in {@link S3FileSystem}. + */ +public class VersionMismatchException extends S3FileSystemException { + private static final long serialVersionUID = 1L; + + public VersionMismatchException(String clientVersion, String dataVersion) { + super("Version mismatch: client expects version " + clientVersion + + ", but data has version " + + (dataVersion == null ? "[unversioned]" : dataVersion)); + } +} diff --git a/src/java/org/apache/hadoop/fs/s3/package.html b/src/java/org/apache/hadoop/fs/s3/package.html new file mode 100644 index 00000000000..dd601e104e5 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3/package.html @@ -0,0 +1,55 @@ + + + + + + +

A distributed, block-based implementation of {@link +org.apache.hadoop.fs.FileSystem} that uses Amazon S3 +as a backing store.

+ +

+Files are stored in S3 as blocks (represented by +{@link org.apache.hadoop.fs.s3.Block}), which have an ID and a length. +Block metadata is stored in S3 as a small record (represented by +{@link org.apache.hadoop.fs.s3.INode}) using the URL-encoded +path string as a key. Inodes record the file type (regular file or directory) and the list of blocks. +This design makes it easy to seek to any given position in a file by reading the inode data to compute +which block to access, then using S3's support for +HTTP Range headers +to start streaming from the correct position. +Renames are also efficient since only the inode is moved (by a DELETE followed by a PUT since +S3 does not support renames). +

+

+For a single file /dir1/file1 which takes two blocks of storage, the file structure in S3 +would be something like this: +

+
+/
+/dir1
+/dir1/file1
+block-6415776850131549260
+block-3026438247347758425
+
+

+Inodes start with a leading /, while blocks are prefixed with block-. +

+ + + diff --git a/src/java/org/apache/hadoop/fs/s3native/FileMetadata.java b/src/java/org/apache/hadoop/fs/s3native/FileMetadata.java new file mode 100644 index 00000000000..23797e81c0d --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3native/FileMetadata.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3native; + +/** + *

+ * Holds basic metadata for a file stored in a {@link NativeFileSystemStore}. + *

+ */ +class FileMetadata { + private final String key; + private final long length; + private final long lastModified; + + public FileMetadata(String key, long length, long lastModified) { + this.key = key; + this.length = length; + this.lastModified = lastModified; + } + + public String getKey() { + return key; + } + + public long getLength() { + return length; + } + + public long getLastModified() { + return lastModified; + } + + @Override + public String toString() { + return "FileMetadata[" + key + ", " + length + ", " + lastModified + "]"; + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java b/src/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java new file mode 100644 index 00000000000..b24a8e06b7c --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java @@ -0,0 +1,255 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3native; + +import static org.apache.hadoop.fs.s3native.NativeS3FileSystem.PATH_DELIMITER; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3.S3Credentials; +import org.apache.hadoop.fs.s3.S3Exception; +import org.jets3t.service.S3ObjectsChunk; +import org.jets3t.service.S3Service; +import org.jets3t.service.S3ServiceException; +import org.jets3t.service.impl.rest.httpclient.RestS3Service; +import org.jets3t.service.model.S3Bucket; +import org.jets3t.service.model.S3Object; +import org.jets3t.service.security.AWSCredentials; + +class Jets3tNativeFileSystemStore implements NativeFileSystemStore { + + private S3Service s3Service; + private S3Bucket bucket; + + public void initialize(URI uri, Configuration conf) throws IOException { + S3Credentials s3Credentials = new S3Credentials(); + s3Credentials.initialize(uri, conf); + try { + AWSCredentials awsCredentials = + new AWSCredentials(s3Credentials.getAccessKey(), + s3Credentials.getSecretAccessKey()); + this.s3Service = new RestS3Service(awsCredentials); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + bucket = new S3Bucket(uri.getHost()); + } + + public void storeFile(String key, File file, byte[] md5Hash) + throws IOException { + + BufferedInputStream in = null; + try { + in = new BufferedInputStream(new FileInputStream(file)); + S3Object object = new S3Object(key); + object.setDataInputStream(in); + object.setContentType("binary/octet-stream"); + object.setContentLength(file.length()); + if (md5Hash != null) { + object.setMd5Hash(md5Hash); + } + s3Service.putObject(bucket, object); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } finally { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + // ignore + } + } + } + } + + public void storeEmptyFile(String key) throws IOException { + try { + S3Object object = new S3Object(key); + object.setDataInputStream(new ByteArrayInputStream(new byte[0])); + object.setContentType("binary/octet-stream"); + object.setContentLength(0); + s3Service.putObject(bucket, object); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public FileMetadata retrieveMetadata(String key) throws IOException { + try { + S3Object object = s3Service.getObjectDetails(bucket, key); + return new FileMetadata(key, object.getContentLength(), + object.getLastModifiedDate().getTime()); + } catch (S3ServiceException e) { + // Following is brittle. Is there a better way? + if (e.getMessage().contains("ResponseCode=404")) { + return null; + } + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public InputStream retrieve(String key) throws IOException { + try { + S3Object object = s3Service.getObject(bucket, key); + return object.getDataInputStream(); + } catch (S3ServiceException e) { + if ("NoSuchKey".equals(e.getS3ErrorCode())) { + return null; + } + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public InputStream retrieve(String key, long byteRangeStart) + throws IOException { + try { + S3Object object = s3Service.getObject(bucket, key, null, null, null, + null, byteRangeStart, null); + return object.getDataInputStream(); + } catch (S3ServiceException e) { + if ("NoSuchKey".equals(e.getS3ErrorCode())) { + return null; + } + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public PartialListing list(String prefix, int maxListingLength) + throws IOException { + return list(prefix, maxListingLength, null); + } + + public PartialListing list(String prefix, int maxListingLength, + String priorLastKey) throws IOException { + + return list(prefix, PATH_DELIMITER, maxListingLength, priorLastKey); + } + + public PartialListing listAll(String prefix, int maxListingLength, + String priorLastKey) throws IOException { + + return list(prefix, null, maxListingLength, priorLastKey); + } + + private PartialListing list(String prefix, String delimiter, + int maxListingLength, String priorLastKey) throws IOException { + try { + if (prefix.length() > 0 && !prefix.endsWith(PATH_DELIMITER)) { + prefix += PATH_DELIMITER; + } + S3ObjectsChunk chunk = s3Service.listObjectsChunked(bucket.getName(), + prefix, delimiter, maxListingLength, priorLastKey); + + FileMetadata[] fileMetadata = + new FileMetadata[chunk.getObjects().length]; + for (int i = 0; i < fileMetadata.length; i++) { + S3Object object = chunk.getObjects()[i]; + fileMetadata[i] = new FileMetadata(object.getKey(), + object.getContentLength(), object.getLastModifiedDate().getTime()); + } + return new PartialListing(chunk.getPriorLastKey(), fileMetadata, + chunk.getCommonPrefixes()); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public void delete(String key) throws IOException { + try { + s3Service.deleteObject(bucket, key); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public void rename(String srcKey, String dstKey) throws IOException { + try { + s3Service.moveObject(bucket.getName(), srcKey, bucket.getName(), + new S3Object(dstKey), false); + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public void purge(String prefix) throws IOException { + try { + S3Object[] objects = s3Service.listObjects(bucket, prefix, null); + for (int i = 0; i < objects.length; i++) { + s3Service.deleteObject(bucket, objects[i].getKey()); + } + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + } + + public void dump() throws IOException { + StringBuilder sb = new StringBuilder("S3 Native Filesystem, "); + sb.append(bucket.getName()).append("\n"); + try { + S3Object[] objects = s3Service.listObjects(bucket); + for (int i = 0; i < objects.length; i++) { + sb.append(objects[i].getKey()).append("\n"); + } + } catch (S3ServiceException e) { + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new S3Exception(e); + } + System.out.println(sb); + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java b/src/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java new file mode 100644 index 00000000000..eb0a6824869 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3native; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; + +import org.apache.hadoop.conf.Configuration; + +/** + *

+ * An abstraction for a key-based {@link File} store. + *

+ */ +interface NativeFileSystemStore { + + void initialize(URI uri, Configuration conf) throws IOException; + + void storeFile(String key, File file, byte[] md5Hash) throws IOException; + void storeEmptyFile(String key) throws IOException; + + FileMetadata retrieveMetadata(String key) throws IOException; + InputStream retrieve(String key) throws IOException; + InputStream retrieve(String key, long byteRangeStart) throws IOException; + + PartialListing list(String prefix, int maxListingLength) throws IOException; + PartialListing list(String prefix, int maxListingLength, String priorLastKey) + throws IOException; + PartialListing listAll(String prefix, int maxListingLength, + String priorLastKey) throws IOException; + + void delete(String key) throws IOException; + + void rename(String srcKey, String dstKey) throws IOException; + + /** + * Delete all keys with the given prefix. Used for testing. + * @throws IOException + */ + void purge(String prefix) throws IOException; + + /** + * Diagnostic method to dump state to the console. + * @throws IOException + */ + void dump() throws IOException; +} diff --git a/src/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java b/src/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java new file mode 100644 index 00000000000..7ec60655dd9 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java @@ -0,0 +1,578 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3native; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.security.DigestOutputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BufferedFSInputStream; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FSInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.s3.S3Exception; +import org.apache.hadoop.io.retry.RetryPolicies; +import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.io.retry.RetryProxy; +import org.apache.hadoop.util.Progressable; + +/** + *

+ * A {@link FileSystem} for reading and writing files stored on + * Amazon S3. + * Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem} this implementation + * stores files on S3 in their + * native form so they can be read by other S3 tools. + *

+ * @see org.apache.hadoop.fs.s3.S3FileSystem + */ +public class NativeS3FileSystem extends FileSystem { + + public static final Log LOG = + LogFactory.getLog(NativeS3FileSystem.class); + + private static final String FOLDER_SUFFIX = "_$folder$"; + private static final long MAX_S3_FILE_SIZE = 5 * 1024 * 1024 * 1024L; + static final String PATH_DELIMITER = Path.SEPARATOR; + private static final int S3_MAX_LISTING_LENGTH = 1000; + + private class NativeS3FsInputStream extends FSInputStream { + + private InputStream in; + private final String key; + private long pos = 0; + + public NativeS3FsInputStream(InputStream in, String key) { + this.in = in; + this.key = key; + } + + public synchronized int read() throws IOException { + int result = in.read(); + if (result != -1) { + pos++; + } + if (statistics != null && result != -1) { + statistics.incrementBytesRead(1); + } + return result; + } + public synchronized int read(byte[] b, int off, int len) + throws IOException { + + int result = in.read(b, off, len); + if (result > 0) { + pos += result; + } + if (statistics != null && result > 0) { + statistics.incrementBytesRead(result); + } + return result; + } + + public void close() throws IOException { + in.close(); + } + + public synchronized void seek(long pos) throws IOException { + in.close(); + in = store.retrieve(key, pos); + this.pos = pos; + } + public synchronized long getPos() throws IOException { + return pos; + } + public boolean seekToNewSource(long targetPos) throws IOException { + return false; + } + } + + private class NativeS3FsOutputStream extends OutputStream { + + private Configuration conf; + private String key; + private File backupFile; + private OutputStream backupStream; + private MessageDigest digest; + private boolean closed; + + public NativeS3FsOutputStream(Configuration conf, + NativeFileSystemStore store, String key, Progressable progress, + int bufferSize) throws IOException { + this.conf = conf; + this.key = key; + this.backupFile = newBackupFile(); + try { + this.digest = MessageDigest.getInstance("MD5"); + this.backupStream = new BufferedOutputStream(new DigestOutputStream( + new FileOutputStream(backupFile), this.digest)); + } catch (NoSuchAlgorithmException e) { + LOG.warn("Cannot load MD5 digest algorithm," + + "skipping message integrity check.", e); + this.backupStream = new BufferedOutputStream( + new FileOutputStream(backupFile)); + } + } + + private File newBackupFile() throws IOException { + File dir = new File(conf.get("fs.s3.buffer.dir")); + if (!dir.mkdirs() && !dir.exists()) { + throw new IOException("Cannot create S3 buffer directory: " + dir); + } + File result = File.createTempFile("output-", ".tmp", dir); + result.deleteOnExit(); + return result; + } + + @Override + public void flush() throws IOException { + backupStream.flush(); + } + + @Override + public synchronized void close() throws IOException { + if (closed) { + return; + } + + backupStream.close(); + + try { + byte[] md5Hash = digest == null ? null : digest.digest(); + store.storeFile(key, backupFile, md5Hash); + } finally { + if (!backupFile.delete()) { + LOG.warn("Could not delete temporary s3n file: " + backupFile); + } + super.close(); + closed = true; + } + + } + + @Override + public void write(int b) throws IOException { + backupStream.write(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + backupStream.write(b, off, len); + } + + + } + + private URI uri; + private NativeFileSystemStore store; + private Path workingDir; + + public NativeS3FileSystem() { + // set store in initialize() + } + + public NativeS3FileSystem(NativeFileSystemStore store) { + this.store = store; + } + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { + super.initialize(uri, conf); + if (store == null) { + store = createDefaultStore(conf); + } + store.initialize(uri, conf); + setConf(conf); + this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); + this.workingDir = + new Path("/user", System.getProperty("user.name")).makeQualified(this); + } + + private static NativeFileSystemStore createDefaultStore(Configuration conf) { + NativeFileSystemStore store = new Jets3tNativeFileSystemStore(); + + RetryPolicy basePolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep( + conf.getInt("fs.s3.maxRetries", 4), + conf.getLong("fs.s3.sleepTimeSeconds", 10), TimeUnit.SECONDS); + Map, RetryPolicy> exceptionToPolicyMap = + new HashMap, RetryPolicy>(); + exceptionToPolicyMap.put(IOException.class, basePolicy); + exceptionToPolicyMap.put(S3Exception.class, basePolicy); + + RetryPolicy methodPolicy = RetryPolicies.retryByException( + RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap); + Map methodNameToPolicyMap = + new HashMap(); + methodNameToPolicyMap.put("storeFile", methodPolicy); + + return (NativeFileSystemStore) + RetryProxy.create(NativeFileSystemStore.class, store, + methodNameToPolicyMap); + } + + private static String pathToKey(Path path) { + if (!path.isAbsolute()) { + throw new IllegalArgumentException("Path must be absolute: " + path); + } + return path.toUri().getPath().substring(1); // remove initial slash + } + + private static Path keyToPath(String key) { + return new Path("/" + key); + } + + private Path makeAbsolute(Path path) { + if (path.isAbsolute()) { + return path; + } + return new Path(workingDir, path); + } + + /** This optional operation is not yet supported. */ + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException { + throw new IOException("Not supported"); + } + + @Override + public FSDataOutputStream create(Path f, FsPermission permission, + boolean overwrite, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + + if (exists(f) && !overwrite) { + throw new IOException("File already exists:"+f); + } + Path absolutePath = makeAbsolute(f); + String key = pathToKey(absolutePath); + return new FSDataOutputStream(new NativeS3FsOutputStream(getConf(), store, + key, progress, bufferSize), statistics); + } + + @Override + public boolean delete(Path f, boolean recursive) throws IOException { + FileStatus status; + try { + status = getFileStatus(f); + } catch (FileNotFoundException e) { + return false; + } + Path absolutePath = makeAbsolute(f); + String key = pathToKey(absolutePath); + if (status.isDir()) { + FileStatus[] contents = listStatus(f); + if (!recursive && contents.length > 0) { + throw new IOException("Directory " + f.toString() + " is not empty."); + } + for (FileStatus p : contents) { + if (!delete(p.getPath(), recursive)) { + return false; + } + } + store.delete(key + FOLDER_SUFFIX); + } else { + store.delete(key); + } + return true; + } + + @Override + public FileStatus getFileStatus(Path f) throws IOException { + + Path absolutePath = makeAbsolute(f); + String key = pathToKey(absolutePath); + + if (key.length() == 0) { // root always exists + return newDirectory(absolutePath); + } + + FileMetadata meta = store.retrieveMetadata(key); + if (meta != null) { + return newFile(meta, absolutePath); + } + if (store.retrieveMetadata(key + FOLDER_SUFFIX) != null) { + return newDirectory(absolutePath); + } + + PartialListing listing = store.list(key, 1); + if (listing.getFiles().length > 0 || + listing.getCommonPrefixes().length > 0) { + return newDirectory(absolutePath); + } + + throw new FileNotFoundException(absolutePath + + ": No such file or directory."); + + } + + @Override + public URI getUri() { + return uri; + } + + /** + *

+ * If f is a file, this method will make a single call to S3. + * If f is a directory, this method will make a maximum of + * (n / 1000) + 2 calls to S3, where n is the total number of + * files and directories contained directly in f. + *

+ */ + @Override + public FileStatus[] listStatus(Path f) throws IOException { + + Path absolutePath = makeAbsolute(f); + String key = pathToKey(absolutePath); + + if (key.length() > 0) { + FileMetadata meta = store.retrieveMetadata(key); + if (meta != null) { + return new FileStatus[] { newFile(meta, absolutePath) }; + } + } + + URI pathUri = absolutePath.toUri(); + Set status = new TreeSet(); + String priorLastKey = null; + do { + PartialListing listing = store.list(key, S3_MAX_LISTING_LENGTH, + priorLastKey); + for (FileMetadata fileMetadata : listing.getFiles()) { + Path subpath = keyToPath(fileMetadata.getKey()); + String relativePath = pathUri.relativize(subpath.toUri()).getPath(); + if (relativePath.endsWith(FOLDER_SUFFIX)) { + status.add(newDirectory(new Path(absolutePath, + relativePath.substring(0, + relativePath.indexOf(FOLDER_SUFFIX))))); + } else { + status.add(newFile(fileMetadata, subpath)); + } + } + for (String commonPrefix : listing.getCommonPrefixes()) { + Path subpath = keyToPath(commonPrefix); + String relativePath = pathUri.relativize(subpath.toUri()).getPath(); + status.add(newDirectory(new Path(absolutePath, relativePath))); + } + priorLastKey = listing.getPriorLastKey(); + } while (priorLastKey != null); + + if (status.isEmpty() && + store.retrieveMetadata(key + FOLDER_SUFFIX) == null) { + return null; + } + + return status.toArray(new FileStatus[0]); + } + + private FileStatus newFile(FileMetadata meta, Path path) { + return new FileStatus(meta.getLength(), false, 1, MAX_S3_FILE_SIZE, + meta.getLastModified(), path.makeQualified(this)); + } + + private FileStatus newDirectory(Path path) { + return new FileStatus(0, true, 1, MAX_S3_FILE_SIZE, 0, + path.makeQualified(this)); + } + + @Override + public boolean mkdirs(Path f, FsPermission permission) throws IOException { + Path absolutePath = makeAbsolute(f); + List paths = new ArrayList(); + do { + paths.add(0, absolutePath); + absolutePath = absolutePath.getParent(); + } while (absolutePath != null); + + boolean result = true; + for (Path path : paths) { + result &= mkdir(path); + } + return result; + } + + private boolean mkdir(Path f) throws IOException { + try { + FileStatus fileStatus = getFileStatus(f); + if (!fileStatus.isDir()) { + throw new IOException(String.format( + "Can't make directory for path %s since it is a file.", f)); + + } + } catch (FileNotFoundException e) { + String key = pathToKey(f) + FOLDER_SUFFIX; + store.storeEmptyFile(key); + } + return true; + } + + @Override + public FSDataInputStream open(Path f, int bufferSize) throws IOException { + if (!exists(f)) { + throw new FileNotFoundException(f.toString()); + } + Path absolutePath = makeAbsolute(f); + String key = pathToKey(absolutePath); + return new FSDataInputStream(new BufferedFSInputStream( + new NativeS3FsInputStream(store.retrieve(key), key), bufferSize)); + } + + // rename() and delete() use this method to ensure that the parent directory + // of the source does not vanish. + private void createParent(Path path) throws IOException { + Path parent = path.getParent(); + if (parent != null) { + String key = pathToKey(makeAbsolute(parent)); + if (key.length() > 0) { + store.storeEmptyFile(key + FOLDER_SUFFIX); + } + } + } + + private boolean existsAndIsFile(Path f) throws IOException { + + Path absolutePath = makeAbsolute(f); + String key = pathToKey(absolutePath); + + if (key.length() == 0) { + return false; + } + + FileMetadata meta = store.retrieveMetadata(key); + if (meta != null) { + // S3 object with given key exists, so this is a file + return true; + } + + if (store.retrieveMetadata(key + FOLDER_SUFFIX) != null) { + // Signifies empty directory + return false; + } + + PartialListing listing = store.list(key, 1, null); + if (listing.getFiles().length > 0 || + listing.getCommonPrefixes().length > 0) { + // Non-empty directory + return false; + } + + throw new FileNotFoundException(absolutePath + + ": No such file or directory"); +} + + + @Override + public boolean rename(Path src, Path dst) throws IOException { + + String srcKey = pathToKey(makeAbsolute(src)); + + if (srcKey.length() == 0) { + // Cannot rename root of file system + return false; + } + + // Figure out the final destination + String dstKey; + try { + boolean dstIsFile = existsAndIsFile(dst); + if (dstIsFile) { + // Attempting to overwrite a file using rename() + return false; + } else { + // Move to within the existent directory + dstKey = pathToKey(makeAbsolute(new Path(dst, src.getName()))); + } + } catch (FileNotFoundException e) { + // dst doesn't exist, so we can proceed + dstKey = pathToKey(makeAbsolute(dst)); + try { + if (!getFileStatus(dst.getParent()).isDir()) { + return false; // parent dst is a file + } + } catch (FileNotFoundException ex) { + return false; // parent dst does not exist + } + } + + try { + boolean srcIsFile = existsAndIsFile(src); + if (srcIsFile) { + store.rename(srcKey, dstKey); + } else { + // Move the folder object + store.delete(srcKey + FOLDER_SUFFIX); + store.storeEmptyFile(dstKey + FOLDER_SUFFIX); + + // Move everything inside the folder + String priorLastKey = null; + do { + PartialListing listing = store.listAll(srcKey, S3_MAX_LISTING_LENGTH, + priorLastKey); + for (FileMetadata file : listing.getFiles()) { + store.rename(file.getKey(), dstKey + + file.getKey().substring(srcKey.length())); + } + priorLastKey = listing.getPriorLastKey(); + } while (priorLastKey != null); + } + + createParent(src); + return true; + + } catch (FileNotFoundException e) { + // Source file does not exist; + return false; + } + } + + + /** + * Set the working directory to the given directory. + */ + @Override + public void setWorkingDirectory(Path newDir) { + workingDir = newDir; + } + + @Override + public Path getWorkingDirectory() { + return workingDir; + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3native/PartialListing.java b/src/java/org/apache/hadoop/fs/s3native/PartialListing.java new file mode 100644 index 00000000000..899758660d2 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3native/PartialListing.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3native; + +/** + *

+ * Holds information on a directory listing for a + * {@link NativeFileSystemStore}. + * This includes the {@link FileMetadata files} and directories + * (their names) contained in a directory. + *

+ *

+ * This listing may be returned in chunks, so a priorLastKey + * is provided so that the next chunk may be requested. + *

+ * @see NativeFileSystemStore#list(String, int, String) + */ +class PartialListing { + + private final String priorLastKey; + private final FileMetadata[] files; + private final String[] commonPrefixes; + + public PartialListing(String priorLastKey, FileMetadata[] files, + String[] commonPrefixes) { + this.priorLastKey = priorLastKey; + this.files = files; + this.commonPrefixes = commonPrefixes; + } + + public FileMetadata[] getFiles() { + return files; + } + + public String[] getCommonPrefixes() { + return commonPrefixes; + } + + public String getPriorLastKey() { + return priorLastKey; + } + +} diff --git a/src/java/org/apache/hadoop/fs/s3native/package.html b/src/java/org/apache/hadoop/fs/s3native/package.html new file mode 100644 index 00000000000..24b9b1df460 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/s3native/package.html @@ -0,0 +1,32 @@ + + + + + + +

+A distributed implementation of {@link +org.apache.hadoop.fs.FileSystem} for reading and writing files on +Amazon S3. +Unlike {@link org.apache.hadoop.fs.s3.S3FileSystem}, which is block-based, +this implementation stores +files on S3 in their native form for interoperability with other S3 tools. +

+ + + diff --git a/src/java/org/apache/hadoop/fs/shell/Command.java b/src/java/org/apache/hadoop/fs/shell/Command.java new file mode 100644 index 00000000000..06883a2086f --- /dev/null +++ b/src/java/org/apache/hadoop/fs/shell/Command.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.shell; + +import java.io.*; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.ipc.RemoteException; + +/** + * An abstract class for the execution of a file system command + */ +abstract public class Command extends Configured { + protected String[] args; + + /** Constructor */ + protected Command(Configuration conf) { + super(conf); + } + + /** Return the command's name excluding the leading character - */ + abstract public String getCommandName(); + + /** + * Execute the command on the input path + * + * @param path the input path + * @throws IOException if any error occurs + */ + abstract protected void run(Path path) throws IOException; + + /** + * For each source path, execute the command + * + * @return 0 if it runs successfully; -1 if it fails + */ + public int runAll() { + int exitCode = 0; + for (String src : args) { + try { + Path srcPath = new Path(src); + FileSystem fs = srcPath.getFileSystem(getConf()); + FileStatus[] statuses = fs.globStatus(srcPath); + if (statuses == null) { + System.err.println("Can not find listing for " + src); + exitCode = -1; + } else { + for(FileStatus s : statuses) { + run(s.getPath()); + } + } + } catch (RemoteException re) { + exitCode = -1; + String content = re.getLocalizedMessage(); + int eol = content.indexOf('\n'); + if (eol>=0) { + content = content.substring(0, eol); + } + System.err.println(getCommandName() + ": " + content); + } catch (IOException e) { + exitCode = -1; + System.err.println(getCommandName() + ": " + e.getLocalizedMessage()); + } + } + return exitCode; + } +} diff --git a/src/java/org/apache/hadoop/fs/shell/CommandFormat.java b/src/java/org/apache/hadoop/fs/shell/CommandFormat.java new file mode 100644 index 00000000000..c1d84d3670c --- /dev/null +++ b/src/java/org/apache/hadoop/fs/shell/CommandFormat.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.shell; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Parse the args of a command and check the format of args. + */ +public class CommandFormat { + final String name; + final int minPar, maxPar; + final Map options = new HashMap(); + + /** constructor */ + public CommandFormat(String n, int min, int max, String ... possibleOpt) { + name = n; + minPar = min; + maxPar = max; + for(String opt : possibleOpt) + options.put(opt, Boolean.FALSE); + } + + /** Parse parameters starting from the given position + * + * @param args an array of input arguments + * @param pos the position at which starts to parse + * @return a list of parameters + */ + public List parse(String[] args, int pos) { + List parameters = new ArrayList(); + for(; pos < args.length; pos++) { + if (args[pos].charAt(0) == '-' && args[pos].length() > 1) { + String opt = args[pos].substring(1); + if (options.containsKey(opt)) + options.put(opt, Boolean.TRUE); + else + throw new IllegalArgumentException("Illegal option " + args[pos]); + } + else + parameters.add(args[pos]); + } + int psize = parameters.size(); + if (psize < minPar || psize > maxPar) + throw new IllegalArgumentException("Illegal number of arguments"); + return parameters; + } + + /** Return if the option is set or not + * + * @param option String representation of an option + * @return true is the option is set; false otherwise + */ + public boolean getOpt(String option) { + return options.get(option); + } +} diff --git a/src/java/org/apache/hadoop/fs/shell/CommandUtils.java b/src/java/org/apache/hadoop/fs/shell/CommandUtils.java new file mode 100644 index 00000000000..2a1317ee6c0 --- /dev/null +++ b/src/java/org/apache/hadoop/fs/shell/CommandUtils.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.shell; + +final class CommandUtils { + static String formatDescription(String usage, String... desciptions) { + StringBuilder b = new StringBuilder(usage + ": " + desciptions[0]); + for(int i = 1; i < desciptions.length; i++) { + b.append("\n\t\t" + desciptions[i]); + } + return b.toString(); + } +} diff --git a/src/java/org/apache/hadoop/fs/shell/Count.java b/src/java/org/apache/hadoop/fs/shell/Count.java new file mode 100644 index 00000000000..abacb2a690d --- /dev/null +++ b/src/java/org/apache/hadoop/fs/shell/Count.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.shell; + +import java.io.*; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * Count the number of directories, files, bytes, quota, and remaining quota. + */ +public class Count extends Command { + public static final String NAME = "count"; + public static final String USAGE = "-" + NAME + "[-q] "; + public static final String DESCRIPTION = CommandUtils.formatDescription(USAGE, + "Count the number of directories, files and bytes under the paths", + "that match the specified file pattern. The output columns are:", + "DIR_COUNT FILE_COUNT CONTENT_SIZE FILE_NAME or", + "QUOTA REMAINING_QUATA SPACE_QUOTA REMAINING_SPACE_QUOTA ", + " DIR_COUNT FILE_COUNT CONTENT_SIZE FILE_NAME"); + + private boolean qOption; + + /** Constructor + * + * @param cmd the count command + * @param pos the starting index of the arguments + */ + public Count(String[] cmd, int pos, Configuration conf) { + super(conf); + CommandFormat c = new CommandFormat(NAME, 1, Integer.MAX_VALUE, "q"); + List parameters = c.parse(cmd, pos); + this.args = parameters.toArray(new String[parameters.size()]); + if (this.args.length == 0) { // default path is the current working directory + this.args = new String[] {"."}; + } + this.qOption = c.getOpt("q") ? true: false; + } + + /** Check if a command is the count command + * + * @param cmd A string representation of a command starting with "-" + * @return true if this is a count command; false otherwise + */ + public static boolean matches(String cmd) { + return ("-" + NAME).equals(cmd); + } + + @Override + public String getCommandName() { + return NAME; + } + + @Override + protected void run(Path path) throws IOException { + FileSystem fs = path.getFileSystem(getConf()); + System.out.println(fs.getContentSummary(path).toString(qOption) + path); + } +} diff --git a/src/java/org/apache/hadoop/http/FilterContainer.java b/src/java/org/apache/hadoop/http/FilterContainer.java new file mode 100644 index 00000000000..40557c08d7a --- /dev/null +++ b/src/java/org/apache/hadoop/http/FilterContainer.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.http; + +import java.util.Map; + +/** + * A container class for javax.servlet.Filter. + */ +public interface FilterContainer { + /** + * Add a filter to the container. + * @param name Filter name + * @param classname Filter class name + * @param parameters a map from parameter names to initial values + */ + void addFilter(String name, String classname, Map parameters); + /** + * Add a global filter to the container. + * @param name filter name + * @param classname filter class name + * @param parameters a map from parameter names to initial values + */ + void addGlobalFilter(String name, String classname, Map parameters); +} diff --git a/src/java/org/apache/hadoop/http/FilterInitializer.java b/src/java/org/apache/hadoop/http/FilterInitializer.java new file mode 100644 index 00000000000..3f4765e29be --- /dev/null +++ b/src/java/org/apache/hadoop/http/FilterInitializer.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.http; + +/** + * Initialize a javax.servlet.Filter. + */ +public abstract class FilterInitializer { + /** + * Initialize a Filter to a FilterContainer. + * @param container The filter container + */ + abstract void initFilter(FilterContainer container); +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/http/HttpServer.java b/src/java/org/apache/hadoop/http/HttpServer.java new file mode 100644 index 00000000000..a739ba69ace --- /dev/null +++ b/src/java/org/apache/hadoop/http/HttpServer.java @@ -0,0 +1,519 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.http; + +import java.io.IOException; +import java.io.PrintWriter; +import java.net.BindException; +import java.net.InetSocketAddress; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.nio.channels.ServerSocketChannel; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.log.LogLevel; +import org.apache.hadoop.metrics.MetricsServlet; +import org.apache.hadoop.util.ReflectionUtils; + +import org.mortbay.jetty.Connector; +import org.mortbay.jetty.Handler; +import org.mortbay.jetty.Server; +import org.mortbay.jetty.handler.ContextHandlerCollection; +import org.mortbay.jetty.nio.SelectChannelConnector; +import org.mortbay.jetty.security.SslSocketConnector; +import org.mortbay.jetty.servlet.Context; +import org.mortbay.jetty.servlet.DefaultServlet; +import org.mortbay.jetty.servlet.FilterHolder; +import org.mortbay.jetty.servlet.FilterMapping; +import org.mortbay.jetty.servlet.ServletHandler; +import org.mortbay.jetty.servlet.ServletHolder; +import org.mortbay.jetty.webapp.WebAppContext; +import org.mortbay.thread.QueuedThreadPool; +import org.mortbay.util.MultiException; + +/** + * Create a Jetty embedded server to answer http requests. The primary goal + * is to serve up status information for the server. + * There are three contexts: + * "/logs/" -> points to the log directory + * "/static/" -> points to common static files (src/webapps/static) + * "/" -> the jsp server code from (src/webapps/) + */ +public class HttpServer implements FilterContainer { + public static final Log LOG = LogFactory.getLog(HttpServer.class); + + static final String FILTER_INITIALIZER_PROPERTY + = "hadoop.http.filter.initializers"; + + protected final Server webServer; + protected final Connector listener; + protected final WebAppContext webAppContext; + protected final boolean findPort; + protected final Map defaultContexts = + new HashMap(); + protected final List filterNames = new ArrayList(); + private static final int MAX_RETRIES = 10; + + /** Same as this(name, bindAddress, port, findPort, null); */ + public HttpServer(String name, String bindAddress, int port, boolean findPort + ) throws IOException { + this(name, bindAddress, port, findPort, new Configuration()); + } + + /** + * Create a status server on the given port. + * The jsp scripts are taken from src/webapps/. + * @param name The name of the server + * @param port The port to use on the server + * @param findPort whether the server should start at the given port and + * increment by 1 until it finds a free port. + * @param conf Configuration + */ + public HttpServer(String name, String bindAddress, int port, + boolean findPort, Configuration conf) throws IOException { + webServer = new Server(); + this.findPort = findPort; + + listener = createBaseListener(conf); + listener.setHost(bindAddress); + listener.setPort(port); + webServer.addConnector(listener); + + webServer.setThreadPool(new QueuedThreadPool()); + + final String appDir = getWebAppsPath(); + ContextHandlerCollection contexts = new ContextHandlerCollection(); + webServer.setHandler(contexts); + + webAppContext = new WebAppContext(); + webAppContext.setContextPath("/"); + webAppContext.setWar(appDir + "/" + name); + webServer.addHandler(webAppContext); + + addDefaultApps(contexts, appDir); + + final FilterInitializer[] initializers = getFilterInitializers(conf); + if (initializers != null) { + for(FilterInitializer c : initializers) { + c.initFilter(this); + } + } + addDefaultServlets(); + } + + /** + * Create a required listener for the Jetty instance listening on the port + * provided. This wrapper and all subclasses must create at least one + * listener. + */ + protected Connector createBaseListener(Configuration conf) + throws IOException { + SelectChannelConnector ret = new SelectChannelConnector(); + ret.setLowResourceMaxIdleTime(10000); + ret.setAcceptQueueSize(128); + ret.setResolveNames(false); + ret.setUseDirectBuffers(false); + return ret; + } + + /** Get an array of FilterConfiguration specified in the conf */ + private static FilterInitializer[] getFilterInitializers(Configuration conf) { + if (conf == null) { + return null; + } + + Class[] classes = conf.getClasses(FILTER_INITIALIZER_PROPERTY); + if (classes == null) { + return null; + } + + FilterInitializer[] initializers = new FilterInitializer[classes.length]; + for(int i = 0; i < classes.length; i++) { + initializers[i] = (FilterInitializer)ReflectionUtils.newInstance( + classes[i], conf); + } + return initializers; + } + + /** + * Add default apps. + * @param appDir The application directory + * @throws IOException + */ + protected void addDefaultApps(ContextHandlerCollection parent, + final String appDir) throws IOException { + // set up the context for "/logs/" if "hadoop.log.dir" property is defined. + String logDir = System.getProperty("hadoop.log.dir"); + if (logDir != null) { + Context logContext = new Context(parent, "/logs"); + logContext.setResourceBase(logDir); + logContext.addServlet(DefaultServlet.class, "/"); + defaultContexts.put(logContext, true); + } + // set up the context for "/static/*" + Context staticContext = new Context(parent, "/static"); + staticContext.setResourceBase(appDir + "/static"); + staticContext.addServlet(DefaultServlet.class, "/*"); + defaultContexts.put(staticContext, true); + } + + /** + * Add default servlets. + */ + protected void addDefaultServlets() { + // set up default servlets + addServlet("stacks", "/stacks", StackServlet.class); + addServlet("logLevel", "/logLevel", LogLevel.Servlet.class); + addServlet("metrics", "/metrics", MetricsServlet.class); + } + + public void addContext(Context ctxt, boolean isFiltered) + throws IOException { + webServer.addHandler(ctxt); + defaultContexts.put(ctxt, isFiltered); + } + + /** + * Add a context + * @param pathSpec The path spec for the context + * @param dir The directory containing the context + * @param isFiltered if true, the servlet is added to the filter path mapping + * @throws IOException + */ + protected void addContext(String pathSpec, String dir, boolean isFiltered) throws IOException { + if (0 == webServer.getHandlers().length) { + throw new RuntimeException("Couldn't find handler"); + } + WebAppContext webAppCtx = new WebAppContext(); + webAppCtx.setContextPath(pathSpec); + webAppCtx.setWar(dir); + addContext(webAppCtx, true); + } + + /** + * Set a value in the webapp context. These values are available to the jsp + * pages as "application.getAttribute(name)". + * @param name The name of the attribute + * @param value The value of the attribute + */ + public void setAttribute(String name, Object value) { + webAppContext.setAttribute(name, value); + } + + /** + * Add a servlet in the server. + * @param name The name of the servlet (can be passed as null) + * @param pathSpec The path spec for the servlet + * @param clazz The servlet class + */ + public void addServlet(String name, String pathSpec, + Class clazz) { + addInternalServlet(name, pathSpec, clazz); + addFilterPathMapping(pathSpec, webAppContext); + } + + /** + * Add an internal servlet in the server. + * @param name The name of the servlet (can be passed as null) + * @param pathSpec The path spec for the servlet + * @param clazz The servlet class + * @deprecated this is a temporary method + */ + @Deprecated + public void addInternalServlet(String name, String pathSpec, + Class clazz) { + ServletHolder holder = new ServletHolder(clazz); + if (name != null) { + holder.setName(name); + } + webAppContext.addServlet(holder, pathSpec); + } + + /** {@inheritDoc} */ + public void addFilter(String name, String classname, + Map parameters) { + + final String[] USER_FACING_URLS = { "*.html", "*.jsp" }; + defineFilter(webAppContext, name, classname, parameters, USER_FACING_URLS); + final String[] ALL_URLS = { "/*" }; + for (Map.Entry e : defaultContexts.entrySet()) { + if (e.getValue()) { + Context ctx = e.getKey(); + defineFilter(ctx, name, classname, parameters, ALL_URLS); + LOG.info("Added filter " + name + " (class=" + classname + + ") to context " + ctx.getDisplayName()); + } + } + filterNames.add(name); + } + + /** {@inheritDoc} */ + public void addGlobalFilter(String name, String classname, + Map parameters) { + final String[] ALL_URLS = { "/*" }; + defineFilter(webAppContext, name, classname, parameters, ALL_URLS); + for (Context ctx : defaultContexts.keySet()) { + defineFilter(ctx, name, classname, parameters, ALL_URLS); + } + LOG.info("Added global filter" + name + " (class=" + classname + ")"); + } + + /** + * Define a filter for a context and set up default url mappings. + */ + protected void defineFilter(Context ctx, String name, + String classname, Map parameters, String[] urls) { + + FilterHolder holder = new FilterHolder(); + holder.setName(name); + holder.setClassName(classname); + holder.setInitParameters(parameters); + FilterMapping fmap = new FilterMapping(); + fmap.setPathSpecs(urls); + fmap.setDispatches(Handler.ALL); + fmap.setFilterName(name); + ServletHandler handler = ctx.getServletHandler(); + handler.addFilter(holder, fmap); + } + + /** + * Add the path spec to the filter path mapping. + * @param pathSpec The path spec + * @param webAppCtx The WebApplicationContext to add to + */ + protected void addFilterPathMapping(String pathSpec, + Context webAppCtx) { + ServletHandler handler = webAppCtx.getServletHandler(); + for(String name : filterNames) { + FilterMapping fmap = new FilterMapping(); + fmap.setPathSpec(pathSpec); + fmap.setFilterName(name); + fmap.setDispatches(Handler.ALL); + handler.addFilterMapping(fmap); + } + } + + /** + * Get the value in the webapp context. + * @param name The name of the attribute + * @return The value of the attribute + */ + public Object getAttribute(String name) { + return webAppContext.getAttribute(name); + } + + /** + * Get the pathname to the webapps files. + * @return the pathname as a URL + * @throws IOException if 'webapps' directory cannot be found on CLASSPATH. + */ + protected String getWebAppsPath() throws IOException { + URL url = getClass().getClassLoader().getResource("webapps"); + if (url == null) + throw new IOException("webapps not found in CLASSPATH"); + return url.toString(); + } + + /** + * Get the port that the server is on + * @return the port + */ + public int getPort() { + return webServer.getConnectors()[0].getLocalPort(); + } + + /** + * Set the min, max number of worker threads (simultaneous connections). + */ + public void setThreads(int min, int max) { + QueuedThreadPool pool = (QueuedThreadPool) webServer.getThreadPool() ; + pool.setMinThreads(min); + pool.setMaxThreads(max); + } + + /** + * Configure an ssl listener on the server. + * @param addr address to listen on + * @param keystore location of the keystore + * @param storPass password for the keystore + * @param keyPass password for the key + * @deprecated Use {@link #addSslListener(InetSocketAddress, Configuration, boolean)} + */ + @Deprecated + public void addSslListener(InetSocketAddress addr, String keystore, + String storPass, String keyPass) throws IOException { + if (webServer.isStarted()) { + throw new IOException("Failed to add ssl listener"); + } + SslSocketConnector sslListener = new SslSocketConnector(); + sslListener.setHost(addr.getHostName()); + sslListener.setPort(addr.getPort()); + sslListener.setKeystore(keystore); + sslListener.setPassword(storPass); + sslListener.setKeyPassword(keyPass); + webServer.addConnector(sslListener); + } + + /** + * Configure an ssl listener on the server. + * @param addr address to listen on + * @param sslConf conf to retrieve ssl options + * @param needClientAuth whether client authentication is required + */ + public void addSslListener(InetSocketAddress addr, Configuration sslConf, + boolean needClientAuth) throws IOException { + if (webServer.isStarted()) { + throw new IOException("Failed to add ssl listener"); + } + if (needClientAuth) { + // setting up SSL truststore for authenticating clients + System.setProperty("javax.net.ssl.trustStore", sslConf.get( + "ssl.server.truststore.location", "")); + System.setProperty("javax.net.ssl.trustStorePassword", sslConf.get( + "ssl.server.truststore.password", "")); + System.setProperty("javax.net.ssl.trustStoreType", sslConf.get( + "ssl.server.truststore.type", "jks")); + } + SslSocketConnector sslListener = new SslSocketConnector(); + sslListener.setHost(addr.getHostName()); + sslListener.setPort(addr.getPort()); + sslListener.setKeystore(sslConf.get("ssl.server.keystore.location")); + sslListener.setPassword(sslConf.get("ssl.server.keystore.password", "")); + sslListener.setKeyPassword(sslConf.get("ssl.server.keystore.keypassword", "")); + sslListener.setKeystoreType(sslConf.get("ssl.server.keystore.type", "jks")); + sslListener.setNeedClientAuth(needClientAuth); + webServer.addConnector(sslListener); + } + + /** + * Start the server. Does not wait for the server to start. + */ + public void start() throws IOException { + try { + int port = 0; + int oriPort = listener.getPort(); // The original requested port + while (true) { + try { + port = webServer.getConnectors()[0].getLocalPort(); + LOG.info("Port returned by webServer.getConnectors()[0]." + + "getLocalPort() before open() is "+ port + + ". Opening the listener on " + oriPort); + listener.open(); + port = listener.getLocalPort(); + LOG.info("listener.getLocalPort() returned " + listener.getLocalPort() + + " webServer.getConnectors()[0].getLocalPort() returned " + + webServer.getConnectors()[0].getLocalPort()); + //Workaround to handle the problem reported in HADOOP-4744 + if (port < 0) { + Thread.sleep(100); + int numRetries = 1; + while (port < 0) { + LOG.warn("listener.getLocalPort returned " + port); + if (numRetries++ > MAX_RETRIES) { + throw new Exception(" listener.getLocalPort is returning " + + "less than 0 even after " +numRetries+" resets"); + } + for (int i = 0; i < 2; i++) { + LOG.info("Retrying listener.getLocalPort()"); + port = listener.getLocalPort(); + if (port > 0) { + break; + } + Thread.sleep(200); + } + if (port > 0) { + break; + } + LOG.info("Bouncing the listener"); + listener.close(); + Thread.sleep(1000); + listener.setPort(oriPort == 0 ? 0 : (oriPort += 1)); + listener.open(); + Thread.sleep(100); + port = listener.getLocalPort(); + } + } //Workaround end + LOG.info("Jetty bound to port " + port); + webServer.start(); + break; + } catch (IOException ex) { + // if this is a bind exception, + // then try the next port number. + if (ex instanceof BindException) { + if (!findPort) { + throw (BindException) ex; + } + } else { + LOG.info("HttpServer.start() threw a non Bind IOException"); + throw ex; + } + } catch (MultiException ex) { + LOG.info("HttpServer.start() threw a MultiException"); + throw ex; + } + listener.setPort((oriPort += 1)); + } + } catch (IOException e) { + throw e; + } catch (Exception e) { + throw new IOException("Problem starting http server", e); + } + } + + /** + * stop the server + */ + public void stop() throws Exception { + listener.close(); + webServer.stop(); + } + + public void join() throws InterruptedException { + webServer.join(); + } + + /** + * A very simple servlet to serve up a text representation of the current + * stack traces. It both returns the stacks to the caller and logs them. + * Currently the stack traces are done sequentially rather than exactly the + * same data. + */ + public static class StackServlet extends HttpServlet { + private static final long serialVersionUID = -6284183679759467039L; + + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + + PrintWriter out = new PrintWriter(response.getOutputStream()); + ReflectionUtils.printThreadInfo(out, ""); + out.close(); + ReflectionUtils.logThreadInfo(LOG, "jsp requested", 1); + } + } +} diff --git a/src/java/org/apache/hadoop/io/AbstractMapWritable.java b/src/java/org/apache/hadoop/io/AbstractMapWritable.java new file mode 100644 index 00000000000..5829d4f1111 --- /dev/null +++ b/src/java/org/apache/hadoop/io/AbstractMapWritable.java @@ -0,0 +1,207 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicReference; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; + +/** + * Abstract base class for MapWritable and SortedMapWritable + * + * Unlike org.apache.nutch.crawl.MapWritable, this class allows creation of + * MapWritable<Writable, MapWritable> so the CLASS_TO_ID and ID_TO_CLASS + * maps travel with the class instead of being static. + * + * Class ids range from 1 to 127 so there can be at most 127 distinct classes + * in any specific map instance. + */ +public abstract class AbstractMapWritable implements Writable, Configurable { + private AtomicReference conf; + + /* Class to id mappings */ + private Map classToIdMap = new ConcurrentHashMap(); + + /* Id to Class mappings */ + private Map idToClassMap = new ConcurrentHashMap(); + + /* The number of new classes (those not established by the constructor) */ + private volatile byte newClasses = 0; + + /** @return the number of known classes */ + byte getNewClasses() { + return newClasses; + } + + /** + * Used to add "predefined" classes and by Writable to copy "new" classes. + */ + private synchronized void addToMap(Class clazz, byte id) { + if (classToIdMap.containsKey(clazz)) { + byte b = classToIdMap.get(clazz); + if (b != id) { + throw new IllegalArgumentException ("Class " + clazz.getName() + + " already registered but maps to " + b + " and not " + id); + } + } + if (idToClassMap.containsKey(id)) { + Class c = idToClassMap.get(id); + if (!c.equals(clazz)) { + throw new IllegalArgumentException("Id " + id + " exists but maps to " + + c.getName() + " and not " + clazz.getName()); + } + } + classToIdMap.put(clazz, id); + idToClassMap.put(id, clazz); + } + + /** Add a Class to the maps if it is not already present. */ + protected synchronized void addToMap(Class clazz) { + if (classToIdMap.containsKey(clazz)) { + return; + } + if (newClasses + 1 > Byte.MAX_VALUE) { + throw new IndexOutOfBoundsException("adding an additional class would" + + " exceed the maximum number allowed"); + } + byte id = ++newClasses; + addToMap(clazz, id); + } + + /** @return the Class class for the specified id */ + protected Class getClass(byte id) { + return idToClassMap.get(id); + } + + /** @return the id for the specified Class */ + protected byte getId(Class clazz) { + return classToIdMap.containsKey(clazz) ? classToIdMap.get(clazz) : -1; + } + + /** Used by child copy constructors. */ + protected synchronized void copy(Writable other) { + if (other != null) { + try { + DataOutputBuffer out = new DataOutputBuffer(); + other.write(out); + DataInputBuffer in = new DataInputBuffer(); + in.reset(out.getData(), out.getLength()); + readFields(in); + + } catch (IOException e) { + throw new IllegalArgumentException("map cannot be copied: " + + e.getMessage()); + } + + } else { + throw new IllegalArgumentException("source map cannot be null"); + } + } + + /** constructor. */ + protected AbstractMapWritable() { + this.conf = new AtomicReference(); + + addToMap(ArrayWritable.class, + Byte.valueOf(Integer.valueOf(-127).byteValue())); + addToMap(BooleanWritable.class, + Byte.valueOf(Integer.valueOf(-126).byteValue())); + addToMap(BytesWritable.class, + Byte.valueOf(Integer.valueOf(-125).byteValue())); + addToMap(FloatWritable.class, + Byte.valueOf(Integer.valueOf(-124).byteValue())); + addToMap(IntWritable.class, + Byte.valueOf(Integer.valueOf(-123).byteValue())); + addToMap(LongWritable.class, + Byte.valueOf(Integer.valueOf(-122).byteValue())); + addToMap(MapWritable.class, + Byte.valueOf(Integer.valueOf(-121).byteValue())); + addToMap(MD5Hash.class, + Byte.valueOf(Integer.valueOf(-120).byteValue())); + addToMap(NullWritable.class, + Byte.valueOf(Integer.valueOf(-119).byteValue())); + addToMap(ObjectWritable.class, + Byte.valueOf(Integer.valueOf(-118).byteValue())); + addToMap(SortedMapWritable.class, + Byte.valueOf(Integer.valueOf(-117).byteValue())); + addToMap(Text.class, + Byte.valueOf(Integer.valueOf(-116).byteValue())); + addToMap(TwoDArrayWritable.class, + Byte.valueOf(Integer.valueOf(-115).byteValue())); + + // UTF8 is deprecated so we don't support it + + addToMap(VIntWritable.class, + Byte.valueOf(Integer.valueOf(-114).byteValue())); + addToMap(VLongWritable.class, + Byte.valueOf(Integer.valueOf(-113).byteValue())); + + } + + /** @return the conf */ + public Configuration getConf() { + return conf.get(); + } + + /** @param conf the conf to set */ + public void setConf(Configuration conf) { + this.conf.set(conf); + } + + /** {@inheritDoc} */ + public void write(DataOutput out) throws IOException { + + // First write out the size of the class table and any classes that are + // "unknown" classes + + out.writeByte(newClasses); + + for (byte i = 1; i <= newClasses; i++) { + out.writeByte(i); + out.writeUTF(getClass(i).getName()); + } + } + + /** {@inheritDoc} */ + public void readFields(DataInput in) throws IOException { + + // Get the number of "unknown" classes + + newClasses = in.readByte(); + + // Then read in the class names and add them to our tables + + for (int i = 0; i < newClasses; i++) { + byte id = in.readByte(); + String className = in.readUTF(); + try { + addToMap(Class.forName(className), id); + + } catch (ClassNotFoundException e) { + throw new IOException("can't find class: " + className + " because "+ + e.getMessage()); + } + } + } +} diff --git a/src/java/org/apache/hadoop/io/ArrayFile.java b/src/java/org/apache/hadoop/io/ArrayFile.java new file mode 100644 index 00000000000..dafb6ae600e --- /dev/null +++ b/src/java/org/apache/hadoop/io/ArrayFile.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.util.*; +import org.apache.hadoop.io.SequenceFile.CompressionType; + + +/** A dense file-based mapping from integers to values. */ +public class ArrayFile extends MapFile { + + protected ArrayFile() {} // no public ctor + + /** Write a new array file. */ + public static class Writer extends MapFile.Writer { + private LongWritable count = new LongWritable(0); + + /** Create the named file for values of the named class. */ + public Writer(Configuration conf, FileSystem fs, + String file, Class valClass) + throws IOException { + super(conf, fs, file, LongWritable.class, valClass); + } + + /** Create the named file for values of the named class. */ + public Writer(Configuration conf, FileSystem fs, + String file, Class valClass, + CompressionType compress, Progressable progress) + throws IOException { + super(conf, fs, file, LongWritable.class, valClass, compress, progress); + } + + /** Append a value to the file. */ + public synchronized void append(Writable value) throws IOException { + super.append(count, value); // add to map + count.set(count.get()+1); // increment count + } + } + + /** Provide access to an existing array file. */ + public static class Reader extends MapFile.Reader { + private LongWritable key = new LongWritable(); + + /** Construct an array reader for the named file.*/ + public Reader(FileSystem fs, String file, Configuration conf) throws IOException { + super(fs, file, conf); + } + + /** Positions the reader before its nth value. */ + public synchronized void seek(long n) throws IOException { + key.set(n); + seek(key); + } + + /** Read and return the next value in the file. */ + public synchronized Writable next(Writable value) throws IOException { + return next(key, value) ? value : null; + } + + /** Returns the key associated with the most recent call to {@link + * #seek(long)}, {@link #next(Writable)}, or {@link + * #get(long,Writable)}. */ + public synchronized long key() throws IOException { + return key.get(); + } + + /** Return the nth value in the file. */ + public synchronized Writable get(long n, Writable value) + throws IOException { + key.set(n); + return get(key, value); + } + } + +} diff --git a/src/java/org/apache/hadoop/io/ArrayWritable.java b/src/java/org/apache/hadoop/io/ArrayWritable.java new file mode 100644 index 00000000000..9c6643548a0 --- /dev/null +++ b/src/java/org/apache/hadoop/io/ArrayWritable.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; +import java.lang.reflect.Array; + +/** + * A Writable for arrays containing instances of a class. The elements of this + * writable must all be instances of the same class. If this writable will be + * the input for a Reducer, you will need to create a subclass that sets the + * value to be of the proper type. + * + * For example: + * + * public class IntArrayWritable extends ArrayWritable { + * public IntArrayWritable() { + * super(IntWritable.class); + * } + * } + * + */ +public class ArrayWritable implements Writable { + private Class valueClass; + private Writable[] values; + + public ArrayWritable(Class valueClass) { + if (valueClass == null) { + throw new IllegalArgumentException("null valueClass"); + } + this.valueClass = valueClass; + } + + public ArrayWritable(Class valueClass, Writable[] values) { + this(valueClass); + this.values = values; + } + + public ArrayWritable(String[] strings) { + this(UTF8.class, new Writable[strings.length]); + for (int i = 0; i < strings.length; i++) { + values[i] = new UTF8(strings[i]); + } + } + + public Class getValueClass() { + return valueClass; + } + + public String[] toStrings() { + String[] strings = new String[values.length]; + for (int i = 0; i < values.length; i++) { + strings[i] = values[i].toString(); + } + return strings; + } + + public Object toArray() { + Object result = Array.newInstance(valueClass, values.length); + for (int i = 0; i < values.length; i++) { + Array.set(result, i, values[i]); + } + return result; + } + + public void set(Writable[] values) { this.values = values; } + + public Writable[] get() { return values; } + + public void readFields(DataInput in) throws IOException { + values = new Writable[in.readInt()]; // construct values + for (int i = 0; i < values.length; i++) { + Writable value = WritableFactories.newInstance(valueClass); + value.readFields(in); // read a value + values[i] = value; // store it in values + } + } + + public void write(DataOutput out) throws IOException { + out.writeInt(values.length); // write values + for (int i = 0; i < values.length; i++) { + values[i].write(out); + } + } + +} + diff --git a/src/java/org/apache/hadoop/io/BinaryComparable.java b/src/java/org/apache/hadoop/io/BinaryComparable.java new file mode 100644 index 00000000000..0fb0882e4f7 --- /dev/null +++ b/src/java/org/apache/hadoop/io/BinaryComparable.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +/** + * Interface supported by {@link org.apache.hadoop.io.WritableComparable} + * types supporting ordering/permutation by a representative set of bytes. + */ +public abstract class BinaryComparable implements Comparable { + + /** + * Return n st bytes 0..n-1 from {#getBytes()} are valid. + */ + public abstract int getLength(); + + /** + * Return representative byte array for this instance. + */ + public abstract byte[] getBytes(); + + /** + * Compare bytes from {#getBytes()}. + * @see org.apache.hadoop.io.WritableComparator#compareBytes(byte[],int,int,byte[],int,int) + */ + public int compareTo(BinaryComparable other) { + if (this == other) + return 0; + return WritableComparator.compareBytes(getBytes(), 0, getLength(), + other.getBytes(), 0, other.getLength()); + } + + /** + * Compare bytes from {#getBytes()} to those provided. + */ + public int compareTo(byte[] other, int off, int len) { + return WritableComparator.compareBytes(getBytes(), 0, getLength(), + other, off, len); + } + + /** + * Return true if bytes from {#getBytes()} match. + */ + public boolean equals(Object other) { + if (!(other instanceof BinaryComparable)) + return false; + BinaryComparable that = (BinaryComparable)other; + if (this.getLength() != that.getLength()) + return false; + return this.compareTo(that) == 0; + } + + /** + * Return a hash of the bytes returned from {#getBytes()}. + * @see org.apache.hadoop.io.WritableComparator#hashBytes(byte[],int) + */ + public int hashCode() { + return WritableComparator.hashBytes(getBytes(), getLength()); + } + +} diff --git a/src/java/org/apache/hadoop/io/BloomMapFile.java b/src/java/org/apache/hadoop/io/BloomMapFile.java new file mode 100644 index 00000000000..aa616a4565d --- /dev/null +++ b/src/java/org/apache/hadoop/io/BloomMapFile.java @@ -0,0 +1,259 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.SequenceFile.CompressionType; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.util.bloom.DynamicBloomFilter; +import org.apache.hadoop.util.bloom.Filter; +import org.apache.hadoop.util.bloom.Key; +import org.apache.hadoop.util.hash.Hash; + +/** + * This class extends {@link MapFile} and provides very much the same + * functionality. However, it uses dynamic Bloom filters to provide + * quick membership test for keys, and it offers a fast version of + * {@link Reader#get(WritableComparable, Writable)} operation, especially in + * case of sparsely populated MapFile-s. + */ +public class BloomMapFile { + private static final Log LOG = LogFactory.getLog(BloomMapFile.class); + public static final String BLOOM_FILE_NAME = "bloom"; + public static final int HASH_COUNT = 5; + + public static void delete(FileSystem fs, String name) throws IOException { + Path dir = new Path(name); + Path data = new Path(dir, MapFile.DATA_FILE_NAME); + Path index = new Path(dir, MapFile.INDEX_FILE_NAME); + Path bloom = new Path(dir, BLOOM_FILE_NAME); + + fs.delete(data, true); + fs.delete(index, true); + fs.delete(bloom, true); + fs.delete(dir, true); + } + + public static class Writer extends MapFile.Writer { + private DynamicBloomFilter bloomFilter; + private int numKeys; + private int vectorSize; + private Key bloomKey = new Key(); + private DataOutputBuffer buf = new DataOutputBuffer(); + private FileSystem fs; + private Path dir; + + public Writer(Configuration conf, FileSystem fs, String dirName, + Class keyClass, + Class valClass, CompressionType compress, + CompressionCodec codec, Progressable progress) throws IOException { + super(conf, fs, dirName, keyClass, valClass, compress, codec, progress); + this.fs = fs; + this.dir = new Path(dirName); + initBloomFilter(conf); + } + + public Writer(Configuration conf, FileSystem fs, String dirName, + Class keyClass, + Class valClass, CompressionType compress, + Progressable progress) throws IOException { + super(conf, fs, dirName, keyClass, valClass, compress, progress); + this.fs = fs; + this.dir = new Path(dirName); + initBloomFilter(conf); + } + + public Writer(Configuration conf, FileSystem fs, String dirName, + Class keyClass, + Class valClass, CompressionType compress) + throws IOException { + super(conf, fs, dirName, keyClass, valClass, compress); + this.fs = fs; + this.dir = new Path(dirName); + initBloomFilter(conf); + } + + public Writer(Configuration conf, FileSystem fs, String dirName, + WritableComparator comparator, Class valClass, + CompressionType compress, CompressionCodec codec, Progressable progress) + throws IOException { + super(conf, fs, dirName, comparator, valClass, compress, codec, progress); + this.fs = fs; + this.dir = new Path(dirName); + initBloomFilter(conf); + } + + public Writer(Configuration conf, FileSystem fs, String dirName, + WritableComparator comparator, Class valClass, + CompressionType compress, Progressable progress) throws IOException { + super(conf, fs, dirName, comparator, valClass, compress, progress); + this.fs = fs; + this.dir = new Path(dirName); + initBloomFilter(conf); + } + + public Writer(Configuration conf, FileSystem fs, String dirName, + WritableComparator comparator, Class valClass, CompressionType compress) + throws IOException { + super(conf, fs, dirName, comparator, valClass, compress); + this.fs = fs; + this.dir = new Path(dirName); + initBloomFilter(conf); + } + + public Writer(Configuration conf, FileSystem fs, String dirName, + WritableComparator comparator, Class valClass) throws IOException { + super(conf, fs, dirName, comparator, valClass); + this.fs = fs; + this.dir = new Path(dirName); + initBloomFilter(conf); + } + + public Writer(Configuration conf, FileSystem fs, String dirName, + Class keyClass, + Class valClass) throws IOException { + super(conf, fs, dirName, keyClass, valClass); + this.fs = fs; + this.dir = new Path(dirName); + initBloomFilter(conf); + } + + private synchronized void initBloomFilter(Configuration conf) { + numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); + // vector size should be -kn / (ln(1 - c^(1/k))) bits for + // single key, where is the number of hash functions, + // n is the number of keys and c is the desired + // max. error rate. + // Our desired error rate is by default 0.005, i.e. 0.5% + float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); + vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / + Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); + bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, + Hash.getHashType(conf), numKeys); + } + + @Override + public synchronized void append(WritableComparable key, Writable val) + throws IOException { + super.append(key, val); + buf.reset(); + key.write(buf); + bloomKey.set(buf.getData(), 1.0); + bloomFilter.add(bloomKey); + } + + @Override + public synchronized void close() throws IOException { + super.close(); + DataOutputStream out = fs.create(new Path(dir, BLOOM_FILE_NAME), true); + bloomFilter.write(out); + out.flush(); + out.close(); + } + + } + + public static class Reader extends MapFile.Reader { + private DynamicBloomFilter bloomFilter; + private DataOutputBuffer buf = new DataOutputBuffer(); + private Key bloomKey = new Key(); + + public Reader(FileSystem fs, String dirName, Configuration conf) + throws IOException { + super(fs, dirName, conf); + initBloomFilter(fs, dirName, conf); + } + + public Reader(FileSystem fs, String dirName, WritableComparator comparator, + Configuration conf, boolean open) throws IOException { + super(fs, dirName, comparator, conf, open); + initBloomFilter(fs, dirName, conf); + } + + public Reader(FileSystem fs, String dirName, WritableComparator comparator, + Configuration conf) throws IOException { + super(fs, dirName, comparator, conf); + initBloomFilter(fs, dirName, conf); + } + + private void initBloomFilter(FileSystem fs, String dirName, + Configuration conf) { + try { + DataInputStream in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); + bloomFilter = new DynamicBloomFilter(); + bloomFilter.readFields(in); + in.close(); + } catch (IOException ioe) { + LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); + bloomFilter = null; + } + } + + /** + * Checks if this MapFile has the indicated key. The membership test is + * performed using a Bloom filter, so the result has always non-zero + * probability of false positives. + * @param key key to check + * @return false iff key doesn't exist, true if key probably exists. + * @throws IOException + */ + public boolean probablyHasKey(WritableComparable key) throws IOException { + if (bloomFilter == null) { + return true; + } + buf.reset(); + key.write(buf); + bloomKey.set(buf.getData(), 1.0); + return bloomFilter.membershipTest(bloomKey); + } + + /** + * Fast version of the + * {@link MapFile.Reader#get(WritableComparable, Writable)} method. First + * it checks the Bloom filter for the existence of the key, and only if + * present it performs the real get operation. This yields significant + * performance improvements for get operations on sparsely populated files. + */ + @Override + public synchronized Writable get(WritableComparable key, Writable val) + throws IOException { + if (!probablyHasKey(key)) { + return null; + } + return super.get(key, val); + } + + /** + * Retrieve the Bloom filter used by this instance of the Reader. + * @return a Bloom filter (see {@link Filter}) + */ + public Filter getBloomFilter() { + return bloomFilter; + } + } +} diff --git a/src/java/org/apache/hadoop/io/BooleanWritable.java b/src/java/org/apache/hadoop/io/BooleanWritable.java new file mode 100644 index 00000000000..1ef1a294571 --- /dev/null +++ b/src/java/org/apache/hadoop/io/BooleanWritable.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +/** + * A WritableComparable for booleans. + */ +public class BooleanWritable implements WritableComparable { + private boolean value; + + /** + */ + public BooleanWritable() {}; + + /** + */ + public BooleanWritable(boolean value) { + set(value); + } + + /** + * Set the value of the BooleanWritable + */ + public void set(boolean value) { + this.value = value; + } + + /** + * Returns the value of the BooleanWritable + */ + public boolean get() { + return value; + } + + /** + */ + public void readFields(DataInput in) throws IOException { + value = in.readBoolean(); + } + + /** + */ + public void write(DataOutput out) throws IOException { + out.writeBoolean(value); + } + + /** + */ + public boolean equals(Object o) { + if (!(o instanceof BooleanWritable)) { + return false; + } + BooleanWritable other = (BooleanWritable) o; + return this.value == other.value; + } + + public int hashCode() { + return value ? 0 : 1; + } + + + + /** + */ + public int compareTo(Object o) { + boolean a = this.value; + boolean b = ((BooleanWritable) o).value; + return ((a == b) ? 0 : (a == false) ? -1 : 1); + } + + public String toString() { + return Boolean.toString(get()); + } + + /** + * A Comparator optimized for BooleanWritable. + */ + public static class Comparator extends WritableComparator { + public Comparator() { + super(BooleanWritable.class); + } + + public int compare(byte[] b1, int s1, int l1, + byte[] b2, int s2, int l2) { + return compareBytes(b1, s1, l1, b2, s2, l2); + } + } + + + static { + WritableComparator.define(BooleanWritable.class, new Comparator()); + } +} diff --git a/src/java/org/apache/hadoop/io/ByteWritable.java b/src/java/org/apache/hadoop/io/ByteWritable.java new file mode 100644 index 00000000000..f9bd2e8eb60 --- /dev/null +++ b/src/java/org/apache/hadoop/io/ByteWritable.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +/** A WritableComparable for a single byte. */ +public class ByteWritable implements WritableComparable { + private byte value; + + public ByteWritable() {} + + public ByteWritable(byte value) { set(value); } + + /** Set the value of this ByteWritable. */ + public void set(byte value) { this.value = value; } + + /** Return the value of this ByteWritable. */ + public byte get() { return value; } + + public void readFields(DataInput in) throws IOException { + value = in.readByte(); + } + + public void write(DataOutput out) throws IOException { + out.writeByte(value); + } + + /** Returns true iff o is a ByteWritable with the same value. */ + public boolean equals(Object o) { + if (!(o instanceof ByteWritable)) { + return false; + } + ByteWritable other = (ByteWritable)o; + return this.value == other.value; + } + + public int hashCode() { + return (int)value; + } + + /** Compares two ByteWritables. */ + public int compareTo(Object o) { + int thisValue = this.value; + int thatValue = ((ByteWritable)o).value; + return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1)); + } + + public String toString() { + return Byte.toString(value); + } + + /** A Comparator optimized for ByteWritable. */ + public static class Comparator extends WritableComparator { + public Comparator() { + super(ByteWritable.class); + } + + public int compare(byte[] b1, int s1, int l1, + byte[] b2, int s2, int l2) { + byte thisValue = b1[s1]; + byte thatValue = b2[s2]; + return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1)); + } + } + + static { // register this comparator + WritableComparator.define(ByteWritable.class, new Comparator()); + } +} + diff --git a/src/java/org/apache/hadoop/io/BytesWritable.java b/src/java/org/apache/hadoop/io/BytesWritable.java new file mode 100644 index 00000000000..9f6bbe0e46d --- /dev/null +++ b/src/java/org/apache/hadoop/io/BytesWritable.java @@ -0,0 +1,216 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.IOException; +import java.io.DataInput; +import java.io.DataOutput; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * A byte sequence that is usable as a key or value. + * It is resizable and distinguishes between the size of the seqeunce and + * the current capacity. The hash function is the front of the md5 of the + * buffer. The sort order is the same as memcmp. + */ +public class BytesWritable extends BinaryComparable + implements WritableComparable { + private static final Log LOG = LogFactory.getLog(BytesWritable.class); + private static final int LENGTH_BYTES = 4; + private static final byte[] EMPTY_BYTES = {}; + + private int size; + private byte[] bytes; + + /** + * Create a zero-size sequence. + */ + public BytesWritable() {this(EMPTY_BYTES);} + + /** + * Create a BytesWritable using the byte array as the initial value. + * @param bytes This array becomes the backing storage for the object. + */ + public BytesWritable(byte[] bytes) { + this.bytes = bytes; + this.size = bytes.length; + } + + /** + * Get the data from the BytesWritable. + * @return The data is only valid between 0 and getLength() - 1. + */ + public byte[] getBytes() { + return bytes; + } + + /** + * Get the data from the BytesWritable. + * @deprecated Use {@link #getBytes()} instead. + */ + @Deprecated + public byte[] get() { + return getBytes(); + } + + /** + * Get the current size of the buffer. + */ + public int getLength() { + return size; + } + + /** + * Get the current size of the buffer. + * @deprecated Use {@link #getLength()} instead. + */ + @Deprecated + public int getSize() { + return getLength(); + } + + /** + * Change the size of the buffer. The values in the old range are preserved + * and any new values are undefined. The capacity is changed if it is + * necessary. + * @param size The new number of bytes + */ + public void setSize(int size) { + if (size > getCapacity()) { + setCapacity(size * 3 / 2); + } + this.size = size; + } + + /** + * Get the capacity, which is the maximum size that could handled without + * resizing the backing storage. + * @return The number of bytes + */ + public int getCapacity() { + return bytes.length; + } + + /** + * Change the capacity of the backing storage. + * The data is preserved. + * @param new_cap The new capacity in bytes. + */ + public void setCapacity(int new_cap) { + if (new_cap != getCapacity()) { + byte[] new_data = new byte[new_cap]; + if (new_cap < size) { + size = new_cap; + } + if (size != 0) { + System.arraycopy(bytes, 0, new_data, 0, size); + } + bytes = new_data; + } + } + + /** + * Set the BytesWritable to the contents of the given newData. + * @param newData the value to set this BytesWritable to. + */ + public void set(BytesWritable newData) { + set(newData.bytes, 0, newData.size); + } + + /** + * Set the value to a copy of the given byte range + * @param newData the new values to copy in + * @param offset the offset in newData to start at + * @param length the number of bytes to copy + */ + public void set(byte[] newData, int offset, int length) { + setSize(0); + setSize(length); + System.arraycopy(newData, offset, bytes, 0, size); + } + + // inherit javadoc + public void readFields(DataInput in) throws IOException { + setSize(0); // clear the old data + setSize(in.readInt()); + in.readFully(bytes, 0, size); + } + + // inherit javadoc + public void write(DataOutput out) throws IOException { + out.writeInt(size); + out.write(bytes, 0, size); + } + + public int hashCode() { + return super.hashCode(); + } + + /** + * Are the two byte sequences equal? + */ + public boolean equals(Object right_obj) { + if (right_obj instanceof BytesWritable) + return super.equals(right_obj); + return false; + } + + /** + * Generate the stream of bytes as hex pairs separated by ' '. + */ + public String toString() { + StringBuffer sb = new StringBuffer(3*size); + for (int idx = 0; idx < size; idx++) { + // if not the first, put a blank separator in + if (idx != 0) { + sb.append(' '); + } + String num = Integer.toHexString(0xff & bytes[idx]); + // if it is only one digit, add a leading 0. + if (num.length() < 2) { + sb.append('0'); + } + sb.append(num); + } + return sb.toString(); + } + + /** A Comparator optimized for BytesWritable. */ + public static class Comparator extends WritableComparator { + public Comparator() { + super(BytesWritable.class); + } + + /** + * Compare the buffers in serialized form. + */ + public int compare(byte[] b1, int s1, int l1, + byte[] b2, int s2, int l2) { + return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES, + b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES); + } + } + + static { // register this comparator + WritableComparator.define(BytesWritable.class, new Comparator()); + } + +} diff --git a/src/java/org/apache/hadoop/io/Closeable.java b/src/java/org/apache/hadoop/io/Closeable.java new file mode 100644 index 00000000000..a0cf8a69441 --- /dev/null +++ b/src/java/org/apache/hadoop/io/Closeable.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +/** @deprecated use java.io.Closeable */ +@Deprecated +public interface Closeable extends java.io.Closeable{ +} diff --git a/src/java/org/apache/hadoop/io/CompressedWritable.java b/src/java/org/apache/hadoop/io/CompressedWritable.java new file mode 100644 index 00000000000..17aca07c4d8 --- /dev/null +++ b/src/java/org/apache/hadoop/io/CompressedWritable.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.IOException; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.DataInputStream; +import java.io.ByteArrayOutputStream; +import java.io.ByteArrayInputStream; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; +import java.util.zip.InflaterInputStream; + +/** A base-class for Writables which store themselves compressed and lazily + * inflate on field access. This is useful for large objects whose fields are + * not be altered during a map or reduce operation: leaving the field data + * compressed makes copying the instance from one file to another much + * faster. */ +public abstract class CompressedWritable implements Writable { + // if non-null, the compressed field data of this instance. + private byte[] compressed; + + public CompressedWritable() {} + + public final void readFields(DataInput in) throws IOException { + compressed = new byte[in.readInt()]; + in.readFully(compressed, 0, compressed.length); + } + + /** Must be called by all methods which access fields to ensure that the data + * has been uncompressed. */ + protected void ensureInflated() { + if (compressed != null) { + try { + ByteArrayInputStream deflated = new ByteArrayInputStream(compressed); + DataInput inflater = + new DataInputStream(new InflaterInputStream(deflated)); + readFieldsCompressed(inflater); + compressed = null; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + /** Subclasses implement this instead of {@link #readFields(DataInput)}. */ + protected abstract void readFieldsCompressed(DataInput in) + throws IOException; + + public final void write(DataOutput out) throws IOException { + if (compressed == null) { + ByteArrayOutputStream deflated = new ByteArrayOutputStream(); + Deflater deflater = new Deflater(Deflater.BEST_SPEED); + DataOutputStream dout = + new DataOutputStream(new DeflaterOutputStream(deflated, deflater)); + writeCompressed(dout); + dout.close(); + deflater.end(); + compressed = deflated.toByteArray(); + } + out.writeInt(compressed.length); + out.write(compressed); + } + + /** Subclasses implement this instead of {@link #write(DataOutput)}. */ + protected abstract void writeCompressed(DataOutput out) throws IOException; + +} diff --git a/src/java/org/apache/hadoop/io/DataInputBuffer.java b/src/java/org/apache/hadoop/io/DataInputBuffer.java new file mode 100644 index 00000000000..71b98f81a39 --- /dev/null +++ b/src/java/org/apache/hadoop/io/DataInputBuffer.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +/** A reusable {@link DataInput} implementation that reads from an in-memory + * buffer. + * + *

This saves memory over creating a new DataInputStream and + * ByteArrayInputStream each time data is read. + * + *

Typical usage is something like the following:

+ *
+ * DataInputBuffer buffer = new DataInputBuffer();
+ * while (... loop condition ...) {
+ *   byte[] data = ... get data ...;
+ *   int dataLength = ... get data length ...;
+ *   buffer.reset(data, dataLength);
+ *   ... read buffer using DataInput methods ...
+ * }
+ * 
+ * + */ +public class DataInputBuffer extends DataInputStream { + private static class Buffer extends ByteArrayInputStream { + public Buffer() { + super(new byte[] {}); + } + + public void reset(byte[] input, int start, int length) { + this.buf = input; + this.count = start+length; + this.mark = start; + this.pos = start; + } + + public byte[] getData() { return buf; } + public int getPosition() { return pos; } + public int getLength() { return count; } + } + + private Buffer buffer; + + /** Constructs a new empty buffer. */ + public DataInputBuffer() { + this(new Buffer()); + } + + private DataInputBuffer(Buffer buffer) { + super(buffer); + this.buffer = buffer; + } + + /** Resets the data that the buffer reads. */ + public void reset(byte[] input, int length) { + buffer.reset(input, 0, length); + } + + /** Resets the data that the buffer reads. */ + public void reset(byte[] input, int start, int length) { + buffer.reset(input, start, length); + } + + public byte[] getData() { + return buffer.getData(); + } + + /** Returns the current position in the input. */ + public int getPosition() { return buffer.getPosition(); } + + /** Returns the length of the input. */ + public int getLength() { return buffer.getLength(); } + +} diff --git a/src/java/org/apache/hadoop/io/DataOutputBuffer.java b/src/java/org/apache/hadoop/io/DataOutputBuffer.java new file mode 100644 index 00000000000..a7ad89839ea --- /dev/null +++ b/src/java/org/apache/hadoop/io/DataOutputBuffer.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +/** A reusable {@link DataOutput} implementation that writes to an in-memory + * buffer. + * + *

This saves memory over creating a new DataOutputStream and + * ByteArrayOutputStream each time data is written. + * + *

Typical usage is something like the following:

+ *
+ * DataOutputBuffer buffer = new DataOutputBuffer();
+ * while (... loop condition ...) {
+ *   buffer.reset();
+ *   ... write buffer using DataOutput methods ...
+ *   byte[] data = buffer.getData();
+ *   int dataLength = buffer.getLength();
+ *   ... write data to its ultimate destination ...
+ * }
+ * 
+ * + */ +public class DataOutputBuffer extends DataOutputStream { + + private static class Buffer extends ByteArrayOutputStream { + public byte[] getData() { return buf; } + public int getLength() { return count; } + + public Buffer() { + super(); + } + + public Buffer(int size) { + super(size); + } + + public void write(DataInput in, int len) throws IOException { + int newcount = count + len; + if (newcount > buf.length) { + byte newbuf[] = new byte[Math.max(buf.length << 1, newcount)]; + System.arraycopy(buf, 0, newbuf, 0, count); + buf = newbuf; + } + in.readFully(buf, count, len); + count = newcount; + } + } + + private Buffer buffer; + + /** Constructs a new empty buffer. */ + public DataOutputBuffer() { + this(new Buffer()); + } + + public DataOutputBuffer(int size) { + this(new Buffer(size)); + } + + private DataOutputBuffer(Buffer buffer) { + super(buffer); + this.buffer = buffer; + } + + /** Returns the current contents of the buffer. + * Data is only valid to {@link #getLength()}. + */ + public byte[] getData() { return buffer.getData(); } + + /** Returns the length of the valid data currently in the buffer. */ + public int getLength() { return buffer.getLength(); } + + /** Resets the buffer to empty. */ + public DataOutputBuffer reset() { + this.written = 0; + buffer.reset(); + return this; + } + + /** Writes bytes from a DataInput directly into the buffer. */ + public void write(DataInput in, int length) throws IOException { + buffer.write(in, length); + } + + /** Write to a file stream */ + public void writeTo(OutputStream out) throws IOException { + buffer.writeTo(out); + } +} diff --git a/src/java/org/apache/hadoop/io/DefaultStringifier.java b/src/java/org/apache/hadoop/io/DefaultStringifier.java new file mode 100644 index 00000000000..124a550942d --- /dev/null +++ b/src/java/org/apache/hadoop/io/DefaultStringifier.java @@ -0,0 +1,199 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.IOException; +import java.nio.charset.UnsupportedCharsetException; +import java.util.ArrayList; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.serializer.Deserializer; +import org.apache.hadoop.io.serializer.Serialization; +import org.apache.hadoop.io.serializer.SerializationFactory; +import org.apache.hadoop.io.serializer.Serializer; +import org.apache.hadoop.util.GenericsUtil; + +/** + * DefaultStringifier is the default implementation of the {@link Stringifier} + * interface which stringifies the objects using base64 encoding of the + * serialized version of the objects. The {@link Serializer} and + * {@link Deserializer} are obtained from the {@link SerializationFactory}. + *
+ * DefaultStringifier offers convenience methods to store/load objects to/from + * the configuration. + * + * @param the class of the objects to stringify + */ +public class DefaultStringifier implements Stringifier { + + private static final String SEPARATOR = ","; + + private Serializer serializer; + + private Deserializer deserializer; + + private DataInputBuffer inBuf; + + private DataOutputBuffer outBuf; + + public DefaultStringifier(Configuration conf, Class c) { + + SerializationFactory factory = new SerializationFactory(conf); + this.serializer = factory.getSerializer(c); + this.deserializer = factory.getDeserializer(c); + this.inBuf = new DataInputBuffer(); + this.outBuf = new DataOutputBuffer(); + try { + serializer.open(outBuf); + deserializer.open(inBuf); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + } + + public T fromString(String str) throws IOException { + try { + byte[] bytes = Base64.decodeBase64(str.getBytes("UTF-8")); + inBuf.reset(bytes, bytes.length); + T restored = deserializer.deserialize(null); + return restored; + } catch (UnsupportedCharsetException ex) { + throw new IOException(ex.toString()); + } + } + + public String toString(T obj) throws IOException { + outBuf.reset(); + serializer.serialize(obj); + byte[] buf = new byte[outBuf.getLength()]; + System.arraycopy(outBuf.getData(), 0, buf, 0, buf.length); + return new String(Base64.encodeBase64(buf)); + } + + public void close() throws IOException { + inBuf.close(); + outBuf.close(); + deserializer.close(); + serializer.close(); + } + + /** + * Stores the item in the configuration with the given keyName. + * + * @param the class of the item + * @param conf the configuration to store + * @param item the object to be stored + * @param keyName the name of the key to use + * @throws IOException : forwards Exceptions from the underlying + * {@link Serialization} classes. + */ + public static void store(Configuration conf, K item, String keyName) + throws IOException { + + DefaultStringifier stringifier = new DefaultStringifier(conf, + GenericsUtil.getClass(item)); + conf.set(keyName, stringifier.toString(item)); + stringifier.close(); + } + + /** + * Restores the object from the configuration. + * + * @param the class of the item + * @param conf the configuration to use + * @param keyName the name of the key to use + * @param itemClass the class of the item + * @return restored object + * @throws IOException : forwards Exceptions from the underlying + * {@link Serialization} classes. + */ + public static K load(Configuration conf, String keyName, + Class itemClass) throws IOException { + DefaultStringifier stringifier = new DefaultStringifier(conf, + itemClass); + try { + String itemStr = conf.get(keyName); + return stringifier.fromString(itemStr); + } finally { + stringifier.close(); + } + } + + /** + * Stores the array of items in the configuration with the given keyName. + * + * @param the class of the item + * @param conf the configuration to use + * @param items the objects to be stored + * @param keyName the name of the key to use + * @throws IndexOutOfBoundsException if the items array is empty + * @throws IOException : forwards Exceptions from the underlying + * {@link Serialization} classes. + */ + public static void storeArray(Configuration conf, K[] items, + String keyName) throws IOException { + + DefaultStringifier stringifier = new DefaultStringifier(conf, + GenericsUtil.getClass(items[0])); + try { + StringBuilder builder = new StringBuilder(); + for (K item : items) { + builder.append(stringifier.toString(item)).append(SEPARATOR); + } + conf.set(keyName, builder.toString()); + } + finally { + stringifier.close(); + } + } + + /** + * Restores the array of objects from the configuration. + * + * @param the class of the item + * @param conf the configuration to use + * @param keyName the name of the key to use + * @param itemClass the class of the item + * @return restored object + * @throws IOException : forwards Exceptions from the underlying + * {@link Serialization} classes. + */ + public static K[] loadArray(Configuration conf, String keyName, + Class itemClass) throws IOException { + DefaultStringifier stringifier = new DefaultStringifier(conf, + itemClass); + try { + String itemStr = conf.get(keyName); + ArrayList list = new ArrayList(); + String[] parts = itemStr.split(SEPARATOR); + + for (String part : parts) { + if (!part.equals("")) + list.add(stringifier.fromString(part)); + } + + return GenericsUtil.toArray(itemClass, list); + } + finally { + stringifier.close(); + } + } + +} diff --git a/src/java/org/apache/hadoop/io/DeprecatedUTF8.java b/src/java/org/apache/hadoop/io/DeprecatedUTF8.java new file mode 100644 index 00000000000..b27973c180e --- /dev/null +++ b/src/java/org/apache/hadoop/io/DeprecatedUTF8.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Wrapper for {@link UTF8}. + * This class should be used only when it is absolutely necessary + * to use {@link UTF8}. The only difference is that using this class + * does not require "@SuppressWarning" annotation to avoid javac warning. + * Instead the deprecation is implied in the class name. + */ +@SuppressWarnings("deprecation") +public class DeprecatedUTF8 extends UTF8 { + + public DeprecatedUTF8() { + super(); + } + + /** Construct from a given string. */ + public DeprecatedUTF8(String string) { + super(string); + } + + /** Construct from a given string. */ + public DeprecatedUTF8(DeprecatedUTF8 utf8) { + super(utf8); + } + + /* The following two are the mostly commonly used methods. + * wrapping them so that editors do not complain about the deprecation. + */ + + public static String readString(DataInput in) throws IOException { + return UTF8.readString(in); + } + + public static int writeString(DataOutput out, String s) throws IOException { + return UTF8.writeString(out, s); + } +} diff --git a/src/java/org/apache/hadoop/io/DoubleWritable.java b/src/java/org/apache/hadoop/io/DoubleWritable.java new file mode 100644 index 00000000000..fa6f3843bf3 --- /dev/null +++ b/src/java/org/apache/hadoop/io/DoubleWritable.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Writable for Double values. + */ +public class DoubleWritable implements WritableComparable { + + private double value = 0.0; + + public DoubleWritable() { + + } + + public DoubleWritable(double value) { + set(value); + } + + public void readFields(DataInput in) throws IOException { + value = in.readDouble(); + } + + public void write(DataOutput out) throws IOException { + out.writeDouble(value); + } + + public void set(double value) { this.value = value; } + + public double get() { return value; } + + /** + * Returns true iff o is a DoubleWritable with the same value. + */ + public boolean equals(Object o) { + if (!(o instanceof DoubleWritable)) { + return false; + } + DoubleWritable other = (DoubleWritable)o; + return this.value == other.value; + } + + public int hashCode() { + return (int)Double.doubleToLongBits(value); + } + + public int compareTo(Object o) { + DoubleWritable other = (DoubleWritable)o; + return (value < other.value ? -1 : (value == other.value ? 0 : 1)); + } + + public String toString() { + return Double.toString(value); + } + + /** A Comparator optimized for DoubleWritable. */ + public static class Comparator extends WritableComparator { + public Comparator() { + super(DoubleWritable.class); + } + + public int compare(byte[] b1, int s1, int l1, + byte[] b2, int s2, int l2) { + double thisValue = readDouble(b1, s1); + double thatValue = readDouble(b2, s2); + return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1)); + } + } + + static { // register this comparator + WritableComparator.define(DoubleWritable.class, new Comparator()); + } + +} + diff --git a/src/java/org/apache/hadoop/io/EnumSetWritable.java b/src/java/org/apache/hadoop/io/EnumSetWritable.java new file mode 100644 index 00000000000..7549dca2b6e --- /dev/null +++ b/src/java/org/apache/hadoop/io/EnumSetWritable.java @@ -0,0 +1,202 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.EnumSet; +import java.util.Iterator; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; + +/** A Writable wrapper for EnumSet. */ +public class EnumSetWritable> implements Writable, + Configurable { + + private EnumSet value; + + private Class elementType; + + private Configuration conf; + + EnumSetWritable() { + } + + /** + * Construct a new EnumSetWritable. If the value argument is null or + * its size is zero, the elementType argument must not be null. If + * the argument value's size is bigger than zero, the argument + * elementType is not be used. + * + * @param value + * @param elementType + */ + public EnumSetWritable(EnumSet value, Class elementType) { + set(value, elementType); + } + + /** + * Construct a new EnumSetWritable. Argument value should not be null + * or empty. + * + * @param value + */ + public EnumSetWritable(EnumSet value) { + this(value, null); + } + + /** + * reset the EnumSetWritable with specified + * value and elementType. If the value argument + * is null or its size is zero, the elementType argument must not be + * null. If the argument value's size is bigger than zero, the + * argument elementType is not be used. + * + * @param value + * @param elementType + */ + public void set(EnumSet value, Class elementType) { + if ((value == null || value.size() == 0) + && (this.elementType == null && elementType == null)) { + throw new IllegalArgumentException( + "The EnumSet argument is null, or is an empty set but with no elementType provided."); + } + this.value = value; + if (value != null && value.size() > 0) { + Iterator iterator = value.iterator(); + this.elementType = iterator.next().getDeclaringClass(); + } else if (elementType != null) { + this.elementType = elementType; + } + } + + /** Return the value of this EnumSetWritable. */ + public EnumSet get() { + return value; + } + + /** {@inheritDoc} */ + @SuppressWarnings("unchecked") + public void readFields(DataInput in) throws IOException { + int length = in.readInt(); + if (length == -1) + this.value = null; + else if (length == 0) { + this.elementType = (Class) ObjectWritable.loadClass(conf, + WritableUtils.readString(in)); + this.value = EnumSet.noneOf(this.elementType); + } else { + E first = (E) ObjectWritable.readObject(in, conf); + this.value = (EnumSet) EnumSet.of(first); + for (int i = 1; i < length; i++) + this.value.add((E) ObjectWritable.readObject(in, conf)); + } + } + + /** {@inheritDoc} */ + public void write(DataOutput out) throws IOException { + if (this.value == null) { + out.writeInt(-1); + WritableUtils.writeString(out, this.elementType.getName()); + } else { + Object[] array = this.value.toArray(); + int length = array.length; + out.writeInt(length); + if (length == 0) { + if (this.elementType == null) + throw new UnsupportedOperationException( + "Unable to serialize empty EnumSet with no element type provided."); + WritableUtils.writeString(out, this.elementType.getName()); + } + for (int i = 0; i < length; i++) { + ObjectWritable.writeObject(out, array[i], array[i].getClass(), conf); + } + } + } + + /** + * Returns true if o is an EnumSetWritable with the same value, + * or both are null. + */ + public boolean equals(Object o) { + if (o == null) { + throw new IllegalArgumentException("null argument passed in equal()."); + } + + if (!(o instanceof EnumSetWritable)) + return false; + + EnumSetWritable other = (EnumSetWritable) o; + + if (this == o || (this.value == other.value)) + return true; + if (this.value == null) // other.value must not be null if we reach here + return false; + + return this.value.equals(other.value); + } + + /** + * Returns the class of all the elements of the underlying EnumSetWriable. It + * may return null. + * + * @return the element class + */ + public Class getElementType() { + return elementType; + } + + /** {@inheritDoc} */ + public int hashCode() { + if (value == null) + return 0; + return (int) value.hashCode(); + } + + /** {@inheritDoc} */ + public String toString() { + if (value == null) + return "(null)"; + return value.toString(); + } + + /** {@inheritDoc} */ + @Override + public Configuration getConf() { + return this.conf; + } + + /** {@inheritDoc} */ + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + static { + WritableFactories.setFactory(EnumSetWritable.class, new WritableFactory() { + @SuppressWarnings("unchecked") + @Override + public Writable newInstance() { + return new EnumSetWritable(); + } + }); + } +} diff --git a/src/java/org/apache/hadoop/io/FloatWritable.java b/src/java/org/apache/hadoop/io/FloatWritable.java new file mode 100644 index 00000000000..484423f0b45 --- /dev/null +++ b/src/java/org/apache/hadoop/io/FloatWritable.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +/** A WritableComparable for floats. */ +public class FloatWritable implements WritableComparable { + private float value; + + public FloatWritable() {} + + public FloatWritable(float value) { set(value); } + + /** Set the value of this FloatWritable. */ + public void set(float value) { this.value = value; } + + /** Return the value of this FloatWritable. */ + public float get() { return value; } + + public void readFields(DataInput in) throws IOException { + value = in.readFloat(); + } + + public void write(DataOutput out) throws IOException { + out.writeFloat(value); + } + + /** Returns true iff o is a FloatWritable with the same value. */ + public boolean equals(Object o) { + if (!(o instanceof FloatWritable)) + return false; + FloatWritable other = (FloatWritable)o; + return this.value == other.value; + } + + public int hashCode() { + return Float.floatToIntBits(value); + } + + /** Compares two FloatWritables. */ + public int compareTo(Object o) { + float thisValue = this.value; + float thatValue = ((FloatWritable)o).value; + return (thisValue + * When two sequence files, which have same Key type but different Value + * types, are mapped out to reduce, multiple Value types is not allowed. + * In this case, this class can help you wrap instances with different types. + *

+ * + *

+ * Compared with ObjectWritable, this class is much more effective, + * because ObjectWritable will append the class declaration as a String + * into the output file in every Key-Value pair. + *

+ * + *

+ * Generic Writable implements {@link Configurable} interface, so that it will be + * configured by the framework. The configuration is passed to the wrapped objects + * implementing {@link Configurable} interface before deserialization. + *

+ * + * how to use it:
+ * 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ * 2. Implements the abstract method getTypes(), defines + * the classes which will be wrapped in GenericObject in application. + * Attention: this classes defined in getTypes() method, must + * implement Writable interface. + *

+ * + * The code looks like this: + *
+ * public class GenericObject extends GenericWritable {
+ * 
+ *   private static Class[] CLASSES = {
+ *               ClassType1.class, 
+ *               ClassType2.class,
+ *               ClassType3.class,
+ *               };
+ *
+ *   protected Class[] getTypes() {
+ *       return CLASSES;
+ *   }
+ *
+ * }
+ * 
+ * + * @since Nov 8, 2006 + */ +public abstract class GenericWritable implements Writable, Configurable { + + private static final byte NOT_SET = -1; + + private byte type = NOT_SET; + + private Writable instance; + + private Configuration conf = null; + + /** + * Set the instance that is wrapped. + * + * @param obj + */ + public void set(Writable obj) { + instance = obj; + Class instanceClazz = instance.getClass(); + Class[] clazzes = getTypes(); + for (int i = 0; i < clazzes.length; i++) { + Class clazz = clazzes[i]; + if (clazz.equals(instanceClazz)) { + type = (byte) i; + return; + } + } + throw new RuntimeException("The type of instance is: " + + instance.getClass() + ", which is NOT registered."); + } + + /** + * Return the wrapped instance. + */ + public Writable get() { + return instance; + } + + public String toString() { + return "GW[" + (instance != null ? ("class=" + instance.getClass().getName() + + ",value=" + instance.toString()) : "(null)") + "]"; + } + + public void readFields(DataInput in) throws IOException { + type = in.readByte(); + Class clazz = getTypes()[type & 0xff]; + try { + instance = ReflectionUtils.newInstance(clazz, conf); + } catch (Exception e) { + e.printStackTrace(); + throw new IOException("Cannot initialize the class: " + clazz); + } + instance.readFields(in); + } + + public void write(DataOutput out) throws IOException { + if (type == NOT_SET || instance == null) + throw new IOException("The GenericWritable has NOT been set correctly. type=" + + type + ", instance=" + instance); + out.writeByte(type); + instance.write(out); + } + + /** + * Return all classes that may be wrapped. Subclasses should implement this + * to return a constant array of classes. + */ + abstract protected Class[] getTypes(); + + public Configuration getConf() { + return conf; + } + + public void setConf(Configuration conf) { + this.conf = conf; + } + +} diff --git a/src/java/org/apache/hadoop/io/IOUtils.java b/src/java/org/apache/hadoop/io/IOUtils.java new file mode 100644 index 00000000000..44723f4c325 --- /dev/null +++ b/src/java/org/apache/hadoop/io/IOUtils.java @@ -0,0 +1,177 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; +import java.net.Socket; + +import org.apache.commons.logging.Log; + +import org.apache.hadoop.conf.Configuration; + +/** + * An utility class for I/O related functionality. + */ +public class IOUtils { + + /** + * Copies from one stream to another. + * @param in InputStrem to read from + * @param out OutputStream to write to + * @param buffSize the size of the buffer + * @param close whether or not close the InputStream and + * OutputStream at the end. The streams are closed in the finally clause. + */ + public static void copyBytes(InputStream in, OutputStream out, int buffSize, boolean close) + throws IOException { + + PrintStream ps = out instanceof PrintStream ? (PrintStream)out : null; + byte buf[] = new byte[buffSize]; + try { + int bytesRead = in.read(buf); + while (bytesRead >= 0) { + out.write(buf, 0, bytesRead); + if ((ps != null) && ps.checkError()) { + throw new IOException("Unable to write to output stream."); + } + bytesRead = in.read(buf); + } + } finally { + if(close) { + out.close(); + in.close(); + } + } + } + + /** + * Copies from one stream to another. closes the input and output streams + * at the end. + * @param in InputStrem to read from + * @param out OutputStream to write to + * @param conf the Configuration object + */ + public static void copyBytes(InputStream in, OutputStream out, Configuration conf) + throws IOException { + copyBytes(in, out, conf.getInt("io.file.buffer.size", 4096), true); + } + + /** + * Copies from one stream to another. + * @param in InputStrem to read from + * @param out OutputStream to write to + * @param conf the Configuration object + * @param close whether or not close the InputStream and + * OutputStream at the end. The streams are closed in the finally clause. + */ + public static void copyBytes(InputStream in, OutputStream out, Configuration conf, boolean close) + throws IOException { + copyBytes(in, out, conf.getInt("io.file.buffer.size", 4096), close); + } + + /** Reads len bytes in a loop. + * @param in The InputStream to read from + * @param buf The buffer to fill + * @param off offset from the buffer + * @param len the length of bytes to read + * @throws IOException if it could not read requested number of bytes + * for any reason (including EOF) + */ + public static void readFully( InputStream in, byte buf[], + int off, int len ) throws IOException { + int toRead = len; + while ( toRead > 0 ) { + int ret = in.read( buf, off, toRead ); + if ( ret < 0 ) { + throw new IOException( "Premeture EOF from inputStream"); + } + toRead -= ret; + off += ret; + } + } + + /** Similar to readFully(). Skips bytes in a loop. + * @param in The InputStream to skip bytes from + * @param len number of bytes to skip. + * @throws IOException if it could not skip requested number of bytes + * for any reason (including EOF) + */ + public static void skipFully( InputStream in, long len ) throws IOException { + while ( len > 0 ) { + long ret = in.skip( len ); + if ( ret < 0 ) { + throw new IOException( "Premeture EOF from inputStream"); + } + len -= ret; + } + } + + /** + * Close the Closeable objects and ignore any {@link IOException} or + * null pointers. Must only be used for cleanup in exception handlers. + * @param log the log to record problems to at debug level. Can be null. + * @param closeables the objects to close + */ + public static void cleanup(Log log, java.io.Closeable... closeables) { + for(java.io.Closeable c : closeables) { + if (c != null) { + try { + c.close(); + } catch(IOException e) { + if (log != null && log.isDebugEnabled()) { + log.debug("Exception in closing " + c, e); + } + } + } + } + } + + /** + * Closes the stream ignoring {@link IOException}. + * Must only be called in cleaning up from exception handlers. + * @param stream the Stream to close + */ + public static void closeStream( java.io.Closeable stream ) { + cleanup(null, stream); + } + + /** + * Closes the socket ignoring {@link IOException} + * @param sock the Socket to close + */ + public static void closeSocket( Socket sock ) { + // avoids try { close() } dance + if ( sock != null ) { + try { + sock.close(); + } catch ( IOException ignored ) { + } + } + } + + /** /dev/null of OutputStreams. + */ + public static class NullOutputStream extends OutputStream { + public void write(byte[] b, int off, int len) throws IOException { + } + + public void write(int b) throws IOException { + } + } +} diff --git a/src/java/org/apache/hadoop/io/InputBuffer.java b/src/java/org/apache/hadoop/io/InputBuffer.java new file mode 100644 index 00000000000..272a707738b --- /dev/null +++ b/src/java/org/apache/hadoop/io/InputBuffer.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + + +/** A reusable {@link InputStream} implementation that reads from an in-memory + * buffer. + * + *

This saves memory over creating a new InputStream and + * ByteArrayInputStream each time data is read. + * + *

Typical usage is something like the following:

+ *
+ * InputBuffer buffer = new InputBuffer();
+ * while (... loop condition ...) {
+ *   byte[] data = ... get data ...;
+ *   int dataLength = ... get data length ...;
+ *   buffer.reset(data, dataLength);
+ *   ... read buffer using InputStream methods ...
+ * }
+ * 
+ * @see DataInputBuffer + * @see DataOutput + */ +public class InputBuffer extends FilterInputStream { + + private static class Buffer extends ByteArrayInputStream { + public Buffer() { + super(new byte[] {}); + } + + public void reset(byte[] input, int start, int length) { + this.buf = input; + this.count = start+length; + this.mark = start; + this.pos = start; + } + + public int getPosition() { return pos; } + public int getLength() { return count; } + } + + private Buffer buffer; + + /** Constructs a new empty buffer. */ + public InputBuffer() { + this(new Buffer()); + } + + private InputBuffer(Buffer buffer) { + super(buffer); + this.buffer = buffer; + } + + /** Resets the data that the buffer reads. */ + public void reset(byte[] input, int length) { + buffer.reset(input, 0, length); + } + + /** Resets the data that the buffer reads. */ + public void reset(byte[] input, int start, int length) { + buffer.reset(input, start, length); + } + + /** Returns the current position in the input. */ + public int getPosition() { return buffer.getPosition(); } + + /** Returns the length of the input. */ + public int getLength() { return buffer.getLength(); } + +} diff --git a/src/java/org/apache/hadoop/io/IntWritable.java b/src/java/org/apache/hadoop/io/IntWritable.java new file mode 100644 index 00000000000..99875030a63 --- /dev/null +++ b/src/java/org/apache/hadoop/io/IntWritable.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +/** A WritableComparable for ints. */ +public class IntWritable implements WritableComparable { + private int value; + + public IntWritable() {} + + public IntWritable(int value) { set(value); } + + /** Set the value of this IntWritable. */ + public void set(int value) { this.value = value; } + + /** Return the value of this IntWritable. */ + public int get() { return value; } + + public void readFields(DataInput in) throws IOException { + value = in.readInt(); + } + + public void write(DataOutput out) throws IOException { + out.writeInt(value); + } + + /** Returns true iff o is a IntWritable with the same value. */ + public boolean equals(Object o) { + if (!(o instanceof IntWritable)) + return false; + IntWritable other = (IntWritable)o; + return this.value == other.value; + } + + public int hashCode() { + return value; + } + + /** Compares two IntWritables. */ + public int compareTo(Object o) { + int thisValue = this.value; + int thatValue = ((IntWritable)o).value; + return (thisValueo
is a LongWritable with the same value. */ + public boolean equals(Object o) { + if (!(o instanceof LongWritable)) + return false; + LongWritable other = (LongWritable)o; + return this.value == other.value; + } + + public int hashCode() { + return (int)value; + } + + /** Compares two LongWritables. */ + public int compareTo(Object o) { + long thisValue = this.value; + long thatValue = ((LongWritable)o).value; + return (thisValue { + public static final int MD5_LEN = 16; + + private static ThreadLocal DIGESTER_FACTORY = new ThreadLocal() { + protected MessageDigest initialValue() { + try { + return MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } + }; + + private byte[] digest; + + /** Constructs an MD5Hash. */ + public MD5Hash() { + this.digest = new byte[MD5_LEN]; + } + + /** Constructs an MD5Hash from a hex string. */ + public MD5Hash(String hex) { + setDigest(hex); + } + + /** Constructs an MD5Hash with a specified value. */ + public MD5Hash(byte[] digest) { + if (digest.length != MD5_LEN) + throw new IllegalArgumentException("Wrong length: " + digest.length); + this.digest = digest; + } + + // javadoc from Writable + public void readFields(DataInput in) throws IOException { + in.readFully(digest); + } + + /** Constructs, reads and returns an instance. */ + public static MD5Hash read(DataInput in) throws IOException { + MD5Hash result = new MD5Hash(); + result.readFields(in); + return result; + } + + // javadoc from Writable + public void write(DataOutput out) throws IOException { + out.write(digest); + } + + /** Copy the contents of another instance into this instance. */ + public void set(MD5Hash that) { + System.arraycopy(that.digest, 0, this.digest, 0, MD5_LEN); + } + + /** Returns the digest bytes. */ + public byte[] getDigest() { return digest; } + + /** Construct a hash value for a byte array. */ + public static MD5Hash digest(byte[] data) { + return digest(data, 0, data.length); + } + + /** Construct a hash value for the content from the InputStream. */ + public static MD5Hash digest(InputStream in) throws IOException { + final byte[] buffer = new byte[4*1024]; + + final MessageDigest digester = DIGESTER_FACTORY.get(); + for(int n; (n = in.read(buffer)) != -1; ) { + digester.update(buffer, 0, n); + } + + return new MD5Hash(digester.digest()); + } + + /** Construct a hash value for a byte array. */ + public static MD5Hash digest(byte[] data, int start, int len) { + byte[] digest; + MessageDigest digester = DIGESTER_FACTORY.get(); + digester.update(data, start, len); + digest = digester.digest(); + return new MD5Hash(digest); + } + + /** Construct a hash value for a String. */ + public static MD5Hash digest(String string) { + return digest(UTF8.getBytes(string)); + } + + /** Construct a hash value for a String. */ + public static MD5Hash digest(UTF8 utf8) { + return digest(utf8.getBytes(), 0, utf8.getLength()); + } + + /** Construct a half-sized version of this MD5. Fits in a long **/ + public long halfDigest() { + long value = 0; + for (int i = 0; i < 8; i++) + value |= ((digest[i] & 0xffL) << (8*(7-i))); + return value; + } + + /** + * Return a 32-bit digest of the MD5. + * @return the first 4 bytes of the md5 + */ + public int quarterDigest() { + int value = 0; + for (int i = 0; i < 4; i++) + value |= ((digest[i] & 0xff) << (8*(3-i))); + return value; + } + + /** Returns true iff o is an MD5Hash whose digest contains the + * same values. */ + public boolean equals(Object o) { + if (!(o instanceof MD5Hash)) + return false; + MD5Hash other = (MD5Hash)o; + return Arrays.equals(this.digest, other.digest); + } + + /** Returns a hash code value for this object. + * Only uses the first 4 bytes, since md5s are evenly distributed. + */ + public int hashCode() { + return quarterDigest(); + } + + + /** Compares this object with the specified object for order.*/ + public int compareTo(MD5Hash that) { + return WritableComparator.compareBytes(this.digest, 0, MD5_LEN, + that.digest, 0, MD5_LEN); + } + + /** A WritableComparator optimized for MD5Hash keys. */ + public static class Comparator extends WritableComparator { + public Comparator() { + super(MD5Hash.class); + } + + public int compare(byte[] b1, int s1, int l1, + byte[] b2, int s2, int l2) { + return compareBytes(b1, s1, MD5_LEN, b2, s2, MD5_LEN); + } + } + + static { // register this comparator + WritableComparator.define(MD5Hash.class, new Comparator()); + } + + private static final char[] HEX_DIGITS = + {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; + + /** Returns a string representation of this object. */ + public String toString() { + StringBuffer buf = new StringBuffer(MD5_LEN*2); + for (int i = 0; i < MD5_LEN; i++) { + int b = digest[i]; + buf.append(HEX_DIGITS[(b >> 4) & 0xf]); + buf.append(HEX_DIGITS[b & 0xf]); + } + return buf.toString(); + } + + /** Sets the digest value from a hex string. */ + public void setDigest(String hex) { + if (hex.length() != MD5_LEN*2) + throw new IllegalArgumentException("Wrong length: " + hex.length()); + byte[] digest = new byte[MD5_LEN]; + for (int i = 0; i < MD5_LEN; i++) { + int j = i << 1; + digest[i] = (byte)(charToNibble(hex.charAt(j)) << 4 | + charToNibble(hex.charAt(j+1))); + } + this.digest = digest; + } + + private static final int charToNibble(char c) { + if (c >= '0' && c <= '9') { + return c - '0'; + } else if (c >= 'a' && c <= 'f') { + return 0xa + (c - 'a'); + } else if (c >= 'A' && c <= 'F') { + return 0xA + (c - 'A'); + } else { + throw new RuntimeException("Not a hex character: " + c); + } + } + + +} diff --git a/src/java/org/apache/hadoop/io/MapFile.java b/src/java/org/apache/hadoop/io/MapFile.java new file mode 100644 index 00000000000..10598f0a42a --- /dev/null +++ b/src/java/org/apache/hadoop/io/MapFile.java @@ -0,0 +1,713 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.util.ArrayList; +import java.util.Arrays; +import java.io.*; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.io.SequenceFile.CompressionType; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.DefaultCodec; + +/** A file-based map from keys to values. + * + *

A map is a directory containing two files, the data file, + * containing all keys and values in the map, and a smaller index + * file, containing a fraction of the keys. The fraction is determined by + * {@link Writer#getIndexInterval()}. + * + *

The index file is read entirely into memory. Thus key implementations + * should try to keep themselves small. + * + *

Map files are created by adding entries in-order. To maintain a large + * database, perform updates by copying the previous version of a database and + * merging in a sorted change list, to create a new version of the database in + * a new file. Sorting large change lists can be done with {@link + * SequenceFile.Sorter}. + */ +public class MapFile { + private static final Log LOG = LogFactory.getLog(MapFile.class); + + /** The name of the index file. */ + public static final String INDEX_FILE_NAME = "index"; + + /** The name of the data file. */ + public static final String DATA_FILE_NAME = "data"; + + protected MapFile() {} // no public ctor + + /** Writes a new map. */ + public static class Writer implements java.io.Closeable { + private SequenceFile.Writer data; + private SequenceFile.Writer index; + + final private static String INDEX_INTERVAL = "io.map.index.interval"; + private int indexInterval = 128; + + private long size; + private LongWritable position = new LongWritable(); + + // the following fields are used only for checking key order + private WritableComparator comparator; + private DataInputBuffer inBuf = new DataInputBuffer(); + private DataOutputBuffer outBuf = new DataOutputBuffer(); + private WritableComparable lastKey; + + /** What's the position (in bytes) we wrote when we got the last index */ + private long lastIndexPos = -1; + + /** + * What was size when we last wrote an index. Set to MIN_VALUE to ensure that + * we have an index at position zero -- midKey will throw an exception if this + * is not the case + */ + private long lastIndexKeyCount = Long.MIN_VALUE; + + + /** Create the named map for keys of the named class. */ + public Writer(Configuration conf, FileSystem fs, String dirName, + Class keyClass, Class valClass) + throws IOException { + this(conf, fs, dirName, + WritableComparator.get(keyClass), valClass, + SequenceFile.getCompressionType(conf)); + } + + /** Create the named map for keys of the named class. */ + public Writer(Configuration conf, FileSystem fs, String dirName, + Class keyClass, Class valClass, + CompressionType compress, Progressable progress) + throws IOException { + this(conf, fs, dirName, WritableComparator.get(keyClass), valClass, + compress, progress); + } + + /** Create the named map for keys of the named class. */ + public Writer(Configuration conf, FileSystem fs, String dirName, + Class keyClass, Class valClass, + CompressionType compress, CompressionCodec codec, + Progressable progress) + throws IOException { + this(conf, fs, dirName, WritableComparator.get(keyClass), valClass, + compress, codec, progress); + } + + /** Create the named map for keys of the named class. */ + public Writer(Configuration conf, FileSystem fs, String dirName, + Class keyClass, Class valClass, + CompressionType compress) + throws IOException { + this(conf, fs, dirName, WritableComparator.get(keyClass), valClass, compress); + } + + /** Create the named map using the named key comparator. */ + public Writer(Configuration conf, FileSystem fs, String dirName, + WritableComparator comparator, Class valClass) + throws IOException { + this(conf, fs, dirName, comparator, valClass, + SequenceFile.getCompressionType(conf)); + } + /** Create the named map using the named key comparator. */ + public Writer(Configuration conf, FileSystem fs, String dirName, + WritableComparator comparator, Class valClass, + SequenceFile.CompressionType compress) + throws IOException { + this(conf, fs, dirName, comparator, valClass, compress, null); + } + /** Create the named map using the named key comparator. */ + public Writer(Configuration conf, FileSystem fs, String dirName, + WritableComparator comparator, Class valClass, + SequenceFile.CompressionType compress, + Progressable progress) + throws IOException { + this(conf, fs, dirName, comparator, valClass, + compress, new DefaultCodec(), progress); + } + /** Create the named map using the named key comparator. */ + public Writer(Configuration conf, FileSystem fs, String dirName, + WritableComparator comparator, Class valClass, + SequenceFile.CompressionType compress, CompressionCodec codec, + Progressable progress) + throws IOException { + + this.indexInterval = conf.getInt(INDEX_INTERVAL, this.indexInterval); + + this.comparator = comparator; + this.lastKey = comparator.newKey(); + + Path dir = new Path(dirName); + if (!fs.mkdirs(dir)) { + throw new IOException("Mkdirs failed to create directory " + dir.toString()); + } + Path dataFile = new Path(dir, DATA_FILE_NAME); + Path indexFile = new Path(dir, INDEX_FILE_NAME); + + Class keyClass = comparator.getKeyClass(); + this.data = + SequenceFile.createWriter + (fs, conf, dataFile, keyClass, valClass, compress, codec, progress); + this.index = + SequenceFile.createWriter + (fs, conf, indexFile, keyClass, LongWritable.class, + CompressionType.BLOCK, progress); + } + + /** The number of entries that are added before an index entry is added.*/ + public int getIndexInterval() { return indexInterval; } + + /** Sets the index interval. + * @see #getIndexInterval() + */ + public void setIndexInterval(int interval) { indexInterval = interval; } + + /** Sets the index interval and stores it in conf + * @see #getIndexInterval() + */ + public static void setIndexInterval(Configuration conf, int interval) { + conf.setInt(INDEX_INTERVAL, interval); + } + + /** Close the map. */ + public synchronized void close() throws IOException { + data.close(); + index.close(); + } + + /** Append a key/value pair to the map. The key must be greater or equal + * to the previous key added to the map. */ + public synchronized void append(WritableComparable key, Writable val) + throws IOException { + + checkKey(key); + + long pos = data.getLength(); + // Only write an index if we've changed positions. In a block compressed + // file, this means we write an entry at the start of each block + if (size >= lastIndexKeyCount + indexInterval && pos > lastIndexPos) { + position.set(pos); // point to current eof + index.append(key, position); + lastIndexPos = pos; + lastIndexKeyCount = size; + } + + data.append(key, val); // append key/value to data + size++; + } + + private void checkKey(WritableComparable key) throws IOException { + // check that keys are well-ordered + if (size != 0 && comparator.compare(lastKey, key) > 0) + throw new IOException("key out of order: "+key+" after "+lastKey); + + // update lastKey with a copy of key by writing and reading + outBuf.reset(); + key.write(outBuf); // write new key + + inBuf.reset(outBuf.getData(), outBuf.getLength()); + lastKey.readFields(inBuf); // read into lastKey + } + + } + + /** Provide access to an existing map. */ + public static class Reader implements java.io.Closeable { + + /** Number of index entries to skip between each entry. Zero by default. + * Setting this to values larger than zero can facilitate opening large map + * files using less memory. */ + private int INDEX_SKIP = 0; + + private WritableComparator comparator; + + private WritableComparable nextKey; + private long seekPosition = -1; + private int seekIndex = -1; + private long firstPosition; + + // the data, on disk + private SequenceFile.Reader data; + private SequenceFile.Reader index; + + // whether the index Reader was closed + private boolean indexClosed = false; + + // the index, in memory + private int count = -1; + private WritableComparable[] keys; + private long[] positions; + + /** Returns the class of keys in this file. */ + public Class getKeyClass() { return data.getKeyClass(); } + + /** Returns the class of values in this file. */ + public Class getValueClass() { return data.getValueClass(); } + + /** Construct a map reader for the named map.*/ + public Reader(FileSystem fs, String dirName, Configuration conf) throws IOException { + this(fs, dirName, null, conf); + INDEX_SKIP = conf.getInt("io.map.index.skip", 0); + } + + /** Construct a map reader for the named map using the named comparator.*/ + public Reader(FileSystem fs, String dirName, WritableComparator comparator, Configuration conf) + throws IOException { + this(fs, dirName, comparator, conf, true); + } + + /** + * Hook to allow subclasses to defer opening streams until further + * initialization is complete. + * @see #createDataFileReader(FileSystem, Path, Configuration) + */ + protected Reader(FileSystem fs, String dirName, + WritableComparator comparator, Configuration conf, boolean open) + throws IOException { + + if (open) { + open(fs, dirName, comparator, conf); + } + } + + protected synchronized void open(FileSystem fs, String dirName, + WritableComparator comparator, Configuration conf) throws IOException { + Path dir = new Path(dirName); + Path dataFile = new Path(dir, DATA_FILE_NAME); + Path indexFile = new Path(dir, INDEX_FILE_NAME); + + // open the data + this.data = createDataFileReader(fs, dataFile, conf); + this.firstPosition = data.getPosition(); + + if (comparator == null) + this.comparator = WritableComparator.get(data.getKeyClass().asSubclass(WritableComparable.class)); + else + this.comparator = comparator; + + // open the index + this.index = new SequenceFile.Reader(fs, indexFile, conf); + } + + /** + * Override this method to specialize the type of + * {@link SequenceFile.Reader} returned. + */ + protected SequenceFile.Reader createDataFileReader(FileSystem fs, + Path dataFile, Configuration conf) throws IOException { + return new SequenceFile.Reader(fs, dataFile, conf); + } + + private void readIndex() throws IOException { + // read the index entirely into memory + if (this.keys != null) + return; + this.count = 0; + this.positions = new long[1024]; + + try { + int skip = INDEX_SKIP; + LongWritable position = new LongWritable(); + WritableComparable lastKey = null; + long lastIndex = -1; + ArrayList keyBuilder = new ArrayList(1024); + while (true) { + WritableComparable k = comparator.newKey(); + + if (!index.next(k, position)) + break; + + // check order to make sure comparator is compatible + if (lastKey != null && comparator.compare(lastKey, k) > 0) + throw new IOException("key out of order: "+k+" after "+lastKey); + lastKey = k; + if (skip > 0) { + skip--; + continue; // skip this entry + } else { + skip = INDEX_SKIP; // reset skip + } + + // don't read an index that is the same as the previous one. Block + // compressed map files used to do this (multiple entries would point + // at the same block) + if (position.get() == lastIndex) + continue; + + if (count == positions.length) { + positions = Arrays.copyOf(positions, positions.length * 2); + } + + keyBuilder.add(k); + positions[count] = position.get(); + count++; + } + + this.keys = keyBuilder.toArray(new WritableComparable[count]); + positions = Arrays.copyOf(positions, count); + } catch (EOFException e) { + LOG.warn("Unexpected EOF reading " + index + + " at entry #" + count + ". Ignoring."); + } finally { + indexClosed = true; + index.close(); + } + } + + /** Re-positions the reader before its first key. */ + public synchronized void reset() throws IOException { + data.seek(firstPosition); + } + + /** Get the key at approximately the middle of the file. Or null if the + * file is empty. + */ + public synchronized WritableComparable midKey() throws IOException { + + readIndex(); + if (count == 0) { + return null; + } + + return keys[(count - 1) / 2]; + } + + /** Reads the final key from the file. + * + * @param key key to read into + */ + public synchronized void finalKey(WritableComparable key) + throws IOException { + + long originalPosition = data.getPosition(); // save position + try { + readIndex(); // make sure index is valid + if (count > 0) { + data.seek(positions[count-1]); // skip to last indexed entry + } else { + reset(); // start at the beginning + } + while (data.next(key)) {} // scan to eof + + } finally { + data.seek(originalPosition); // restore position + } + } + + /** Positions the reader at the named key, or if none such exists, at the + * first entry after the named key. Returns true iff the named key exists + * in this map. + */ + public synchronized boolean seek(WritableComparable key) throws IOException { + return seekInternal(key) == 0; + } + + /** + * Positions the reader at the named key, or if none such exists, at the + * first entry after the named key. + * + * @return 0 - exact match found + * < 0 - positioned at next record + * 1 - no more records in file + */ + private synchronized int seekInternal(WritableComparable key) + throws IOException { + return seekInternal(key, false); + } + + /** + * Positions the reader at the named key, or if none such exists, at the + * key that falls just before or just after dependent on how the + * before parameter is set. + * + * @param before - IF true, and key does not exist, position + * file at entry that falls just before key. Otherwise, + * position file at record that sorts just after. + * @return 0 - exact match found + * < 0 - positioned at next record + * 1 - no more records in file + */ + private synchronized int seekInternal(WritableComparable key, + final boolean before) + throws IOException { + readIndex(); // make sure index is read + + if (seekIndex != -1 // seeked before + && seekIndex+1 < count + && comparator.compare(key, keys[seekIndex+1])<0 // before next indexed + && comparator.compare(key, nextKey) + >= 0) { // but after last seeked + // do nothing + } else { + seekIndex = binarySearch(key); + if (seekIndex < 0) // decode insertion point + seekIndex = -seekIndex-2; + + if (seekIndex == -1) // belongs before first entry + seekPosition = firstPosition; // use beginning of file + else + seekPosition = positions[seekIndex]; // else use index + } + data.seek(seekPosition); + + if (nextKey == null) + nextKey = comparator.newKey(); + + // If we're looking for the key before, we need to keep track + // of the position we got the current key as well as the position + // of the key before it. + long prevPosition = -1; + long curPosition = seekPosition; + + while (data.next(nextKey)) { + int c = comparator.compare(key, nextKey); + if (c <= 0) { // at or beyond desired + if (before && c != 0) { + if (prevPosition == -1) { + // We're on the first record of this index block + // and we've already passed the search key. Therefore + // we must be at the beginning of the file, so seek + // to the beginning of this block and return c + data.seek(curPosition); + } else { + // We have a previous record to back up to + data.seek(prevPosition); + data.next(nextKey); + // now that we've rewound, the search key must be greater than this key + return 1; + } + } + return c; + } + if (before) { + prevPosition = curPosition; + curPosition = data.getPosition(); + } + } + + return 1; + } + + private int binarySearch(WritableComparable key) { + int low = 0; + int high = count-1; + + while (low <= high) { + int mid = (low + high) >>> 1; + WritableComparable midVal = keys[mid]; + int cmp = comparator.compare(midVal, key); + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else + return mid; // key found + } + return -(low + 1); // key not found. + } + + /** Read the next key/value pair in the map into key and + * val. Returns true if such a pair exists and false when at + * the end of the map */ + public synchronized boolean next(WritableComparable key, Writable val) + throws IOException { + return data.next(key, val); + } + + /** Return the value for the named key, or null if none exists. */ + public synchronized Writable get(WritableComparable key, Writable val) + throws IOException { + if (seek(key)) { + data.getCurrentValue(val); + return val; + } else + return null; + } + + /** + * Finds the record that is the closest match to the specified key. + * Returns key or if it does not exist, at the first entry + * after the named key. + * +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof. + */ + public synchronized WritableComparable getClosest(WritableComparable key, + Writable val) + throws IOException { + return getClosest(key, val, false); + } + + /** + * Finds the record that is the closest match to the specified key. + * + * @param key - key that we're trying to find + * @param val - data value if key is found + * @param before - IF true, and key does not exist, return + * the first entry that falls just before the key. Otherwise, + * return the record that sorts just after. + * @return - the key that was the closest match or null if eof. + */ + public synchronized WritableComparable getClosest(WritableComparable key, + Writable val, final boolean before) + throws IOException { + + int c = seekInternal(key, before); + + // If we didn't get an exact match, and we ended up in the wrong + // direction relative to the query key, return null since we + // must be at the beginning or end of the file. + if ((!before && c > 0) || + (before && c < 0)) { + return null; + } + + data.getCurrentValue(val); + return nextKey; + } + + /** Close the map. */ + public synchronized void close() throws IOException { + if (!indexClosed) { + index.close(); + } + data.close(); + } + + } + + /** Renames an existing map directory. */ + public static void rename(FileSystem fs, String oldName, String newName) + throws IOException { + Path oldDir = new Path(oldName); + Path newDir = new Path(newName); + if (!fs.rename(oldDir, newDir)) { + throw new IOException("Could not rename " + oldDir + " to " + newDir); + } + } + + /** Deletes the named map file. */ + public static void delete(FileSystem fs, String name) throws IOException { + Path dir = new Path(name); + Path data = new Path(dir, DATA_FILE_NAME); + Path index = new Path(dir, INDEX_FILE_NAME); + + fs.delete(data, true); + fs.delete(index, true); + fs.delete(dir, true); + } + + /** + * This method attempts to fix a corrupt MapFile by re-creating its index. + * @param fs filesystem + * @param dir directory containing the MapFile data and index + * @param keyClass key class (has to be a subclass of Writable) + * @param valueClass value class (has to be a subclass of Writable) + * @param dryrun do not perform any changes, just report what needs to be done + * @return number of valid entries in this MapFile, or -1 if no fixing was needed + * @throws Exception + */ + public static long fix(FileSystem fs, Path dir, + Class keyClass, + Class valueClass, boolean dryrun, + Configuration conf) throws Exception { + String dr = (dryrun ? "[DRY RUN ] " : ""); + Path data = new Path(dir, DATA_FILE_NAME); + Path index = new Path(dir, INDEX_FILE_NAME); + int indexInterval = 128; + if (!fs.exists(data)) { + // there's nothing we can do to fix this! + throw new Exception(dr + "Missing data file in " + dir + ", impossible to fix this."); + } + if (fs.exists(index)) { + // no fixing needed + return -1; + } + SequenceFile.Reader dataReader = new SequenceFile.Reader(fs, data, conf); + if (!dataReader.getKeyClass().equals(keyClass)) { + throw new Exception(dr + "Wrong key class in " + dir + ", expected" + keyClass.getName() + + ", got " + dataReader.getKeyClass().getName()); + } + if (!dataReader.getValueClass().equals(valueClass)) { + throw new Exception(dr + "Wrong value class in " + dir + ", expected" + valueClass.getName() + + ", got " + dataReader.getValueClass().getName()); + } + long cnt = 0L; + Writable key = ReflectionUtils.newInstance(keyClass, conf); + Writable value = ReflectionUtils.newInstance(valueClass, conf); + SequenceFile.Writer indexWriter = null; + if (!dryrun) indexWriter = SequenceFile.createWriter(fs, conf, index, keyClass, LongWritable.class); + try { + long pos = 0L; + LongWritable position = new LongWritable(); + while(dataReader.next(key, value)) { + cnt++; + if (cnt % indexInterval == 0) { + position.set(pos); + if (!dryrun) indexWriter.append(key, position); + } + pos = dataReader.getPosition(); + } + } catch(Throwable t) { + // truncated data file. swallow it. + } + dataReader.close(); + if (!dryrun) indexWriter.close(); + return cnt; + } + + + public static void main(String[] args) throws Exception { + String usage = "Usage: MapFile inFile outFile"; + + if (args.length != 2) { + System.err.println(usage); + System.exit(-1); + } + + String in = args[0]; + String out = args[1]; + + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.getLocal(conf); + MapFile.Reader reader = new MapFile.Reader(fs, in, conf); + MapFile.Writer writer = + new MapFile.Writer(conf, fs, out, + reader.getKeyClass().asSubclass(WritableComparable.class), + reader.getValueClass()); + + WritableComparable key = + ReflectionUtils.newInstance(reader.getKeyClass().asSubclass(WritableComparable.class), conf); + Writable value = + ReflectionUtils.newInstance(reader.getValueClass().asSubclass(Writable.class), conf); + + while (reader.next(key, value)) // copy all entries + writer.append(key, value); + + writer.close(); + } + +} diff --git a/src/java/org/apache/hadoop/io/MapWritable.java b/src/java/org/apache/hadoop/io/MapWritable.java new file mode 100644 index 00000000000..66c493be2c3 --- /dev/null +++ b/src/java/org/apache/hadoop/io/MapWritable.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.util.ReflectionUtils; + +/** + * A Writable Map. + */ +public class MapWritable extends AbstractMapWritable + implements Map { + + private Map instance; + + /** Default constructor. */ + public MapWritable() { + super(); + this.instance = new HashMap(); + } + + /** + * Copy constructor. + * + * @param other the map to copy from + */ + public MapWritable(MapWritable other) { + this(); + copy(other); + } + + /** {@inheritDoc} */ + public void clear() { + instance.clear(); + } + + /** {@inheritDoc} */ + public boolean containsKey(Object key) { + return instance.containsKey(key); + } + + /** {@inheritDoc} */ + public boolean containsValue(Object value) { + return instance.containsValue(value); + } + + /** {@inheritDoc} */ + public Set> entrySet() { + return instance.entrySet(); + } + + /** {@inheritDoc} */ + public Writable get(Object key) { + return instance.get(key); + } + + /** {@inheritDoc} */ + public boolean isEmpty() { + return instance.isEmpty(); + } + + /** {@inheritDoc} */ + public Set keySet() { + return instance.keySet(); + } + + /** {@inheritDoc} */ + @SuppressWarnings("unchecked") + public Writable put(Writable key, Writable value) { + addToMap(key.getClass()); + addToMap(value.getClass()); + return instance.put(key, value); + } + + /** {@inheritDoc} */ + public void putAll(Map t) { + for (Map.Entry e: t.entrySet()) { + put(e.getKey(), e.getValue()); + } + } + + /** {@inheritDoc} */ + public Writable remove(Object key) { + return instance.remove(key); + } + + /** {@inheritDoc} */ + public int size() { + return instance.size(); + } + + /** {@inheritDoc} */ + public Collection values() { + return instance.values(); + } + + // Writable + + /** {@inheritDoc} */ + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + + // Write out the number of entries in the map + + out.writeInt(instance.size()); + + // Then write out each key/value pair + + for (Map.Entry e: instance.entrySet()) { + out.writeByte(getId(e.getKey().getClass())); + e.getKey().write(out); + out.writeByte(getId(e.getValue().getClass())); + e.getValue().write(out); + } + } + + /** {@inheritDoc} */ + @SuppressWarnings("unchecked") + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + + // First clear the map. Otherwise we will just accumulate + // entries every time this method is called. + this.instance.clear(); + + // Read the number of entries in the map + + int entries = in.readInt(); + + // Then read each key/value pair + + for (int i = 0; i < entries; i++) { + Writable key = (Writable) ReflectionUtils.newInstance(getClass( + in.readByte()), getConf()); + + key.readFields(in); + + Writable value = (Writable) ReflectionUtils.newInstance(getClass( + in.readByte()), getConf()); + + value.readFields(in); + instance.put(key, value); + } + } +} diff --git a/src/java/org/apache/hadoop/io/MultipleIOException.java b/src/java/org/apache/hadoop/io/MultipleIOException.java new file mode 100644 index 00000000000..eea6b556d74 --- /dev/null +++ b/src/java/org/apache/hadoop/io/MultipleIOException.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io; + +import java.io.IOException; +import java.util.List; + +/** Encapsulate a list of {@link IOException} into an {@link IOException} */ +public class MultipleIOException extends IOException { + /** Require by {@link java.io.Serializable} */ + private static final long serialVersionUID = 1L; + + private final List exceptions; + + /** Constructor is private, use {@link #createIOException(List)}. */ + private MultipleIOException(List exceptions) { + super(exceptions.size() + " exceptions " + exceptions); + this.exceptions = exceptions; + } + + /** @return the underlying exceptions */ + public List getExceptions() {return exceptions;} + + /** A convenient method to create an {@link IOException}. */ + public static IOException createIOException(List exceptions) { + if (exceptions == null || exceptions.isEmpty()) { + return null; + } + if (exceptions.size() == 1) { + return exceptions.get(0); + } + return new MultipleIOException(exceptions); + } +} diff --git a/src/java/org/apache/hadoop/io/NullWritable.java b/src/java/org/apache/hadoop/io/NullWritable.java new file mode 100644 index 00000000000..1df85c84fa9 --- /dev/null +++ b/src/java/org/apache/hadoop/io/NullWritable.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +/** Singleton Writable with no data. */ +public class NullWritable implements WritableComparable { + + private static final NullWritable THIS = new NullWritable(); + + private NullWritable() {} // no public ctor + + /** Returns the single instance of this class. */ + public static NullWritable get() { return THIS; } + + public String toString() { + return "(null)"; + } + + public int hashCode() { return 0; } + public int compareTo(Object other) { + if (!(other instanceof NullWritable)) { + throw new ClassCastException("can't compare " + other.getClass().getName() + + " to NullWritable"); + } + return 0; + } + public boolean equals(Object other) { return other instanceof NullWritable; } + public void readFields(DataInput in) throws IOException {} + public void write(DataOutput out) throws IOException {} + + /** A Comparator "optimized" for NullWritable. */ + public static class Comparator extends WritableComparator { + public Comparator() { + super(NullWritable.class); + } + + /** + * Compare the buffers in serialized form. + */ + public int compare(byte[] b1, int s1, int l1, + byte[] b2, int s2, int l2) { + assert 0 == l1; + assert 0 == l2; + return 0; + } + } + + static { // register this comparator + WritableComparator.define(NullWritable.class, new Comparator()); + } +} + diff --git a/src/java/org/apache/hadoop/io/ObjectWritable.java b/src/java/org/apache/hadoop/io/ObjectWritable.java new file mode 100644 index 00000000000..df1c44bb2ac --- /dev/null +++ b/src/java/org/apache/hadoop/io/ObjectWritable.java @@ -0,0 +1,273 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.lang.reflect.Array; + +import java.io.*; +import java.util.*; + +import org.apache.hadoop.conf.*; + +/** A polymorphic Writable that writes an instance with it's class name. + * Handles arrays, strings and primitive types without a Writable wrapper. + */ +public class ObjectWritable implements Writable, Configurable { + + private Class declaredClass; + private Object instance; + private Configuration conf; + + public ObjectWritable() {} + + public ObjectWritable(Object instance) { + set(instance); + } + + public ObjectWritable(Class declaredClass, Object instance) { + this.declaredClass = declaredClass; + this.instance = instance; + } + + /** Return the instance, or null if none. */ + public Object get() { return instance; } + + /** Return the class this is meant to be. */ + public Class getDeclaredClass() { return declaredClass; } + + /** Reset the instance. */ + public void set(Object instance) { + this.declaredClass = instance.getClass(); + this.instance = instance; + } + + public String toString() { + return "OW[class=" + declaredClass + ",value=" + instance + "]"; + } + + + public void readFields(DataInput in) throws IOException { + readObject(in, this, this.conf); + } + + public void write(DataOutput out) throws IOException { + writeObject(out, instance, declaredClass, conf); + } + + private static final Map> PRIMITIVE_NAMES = new HashMap>(); + static { + PRIMITIVE_NAMES.put("boolean", Boolean.TYPE); + PRIMITIVE_NAMES.put("byte", Byte.TYPE); + PRIMITIVE_NAMES.put("char", Character.TYPE); + PRIMITIVE_NAMES.put("short", Short.TYPE); + PRIMITIVE_NAMES.put("int", Integer.TYPE); + PRIMITIVE_NAMES.put("long", Long.TYPE); + PRIMITIVE_NAMES.put("float", Float.TYPE); + PRIMITIVE_NAMES.put("double", Double.TYPE); + PRIMITIVE_NAMES.put("void", Void.TYPE); + } + + private static class NullInstance extends Configured implements Writable { + private Class declaredClass; + public NullInstance() { super(null); } + public NullInstance(Class declaredClass, Configuration conf) { + super(conf); + this.declaredClass = declaredClass; + } + public void readFields(DataInput in) throws IOException { + String className = UTF8.readString(in); + declaredClass = PRIMITIVE_NAMES.get(className); + if (declaredClass == null) { + try { + declaredClass = getConf().getClassByName(className); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e.toString()); + } + } + } + public void write(DataOutput out) throws IOException { + UTF8.writeString(out, declaredClass.getName()); + } + } + + /** Write a {@link Writable}, {@link String}, primitive type, or an array of + * the preceding. */ + public static void writeObject(DataOutput out, Object instance, + Class declaredClass, + Configuration conf) throws IOException { + + if (instance == null) { // null + instance = new NullInstance(declaredClass, conf); + declaredClass = Writable.class; + } + + UTF8.writeString(out, declaredClass.getName()); // always write declared + + if (declaredClass.isArray()) { // array + int length = Array.getLength(instance); + out.writeInt(length); + for (int i = 0; i < length; i++) { + writeObject(out, Array.get(instance, i), + declaredClass.getComponentType(), conf); + } + + } else if (declaredClass == String.class) { // String + UTF8.writeString(out, (String)instance); + + } else if (declaredClass.isPrimitive()) { // primitive type + + if (declaredClass == Boolean.TYPE) { // boolean + out.writeBoolean(((Boolean)instance).booleanValue()); + } else if (declaredClass == Character.TYPE) { // char + out.writeChar(((Character)instance).charValue()); + } else if (declaredClass == Byte.TYPE) { // byte + out.writeByte(((Byte)instance).byteValue()); + } else if (declaredClass == Short.TYPE) { // short + out.writeShort(((Short)instance).shortValue()); + } else if (declaredClass == Integer.TYPE) { // int + out.writeInt(((Integer)instance).intValue()); + } else if (declaredClass == Long.TYPE) { // long + out.writeLong(((Long)instance).longValue()); + } else if (declaredClass == Float.TYPE) { // float + out.writeFloat(((Float)instance).floatValue()); + } else if (declaredClass == Double.TYPE) { // double + out.writeDouble(((Double)instance).doubleValue()); + } else if (declaredClass == Void.TYPE) { // void + } else { + throw new IllegalArgumentException("Not a primitive: "+declaredClass); + } + } else if (declaredClass.isEnum()) { // enum + UTF8.writeString(out, ((Enum)instance).name()); + } else if (Writable.class.isAssignableFrom(declaredClass)) { // Writable + UTF8.writeString(out, instance.getClass().getName()); + ((Writable)instance).write(out); + + } else { + throw new IOException("Can't write: "+instance+" as "+declaredClass); + } + } + + + /** Read a {@link Writable}, {@link String}, primitive type, or an array of + * the preceding. */ + public static Object readObject(DataInput in, Configuration conf) + throws IOException { + return readObject(in, null, conf); + } + + /** Read a {@link Writable}, {@link String}, primitive type, or an array of + * the preceding. */ + @SuppressWarnings("unchecked") + public static Object readObject(DataInput in, ObjectWritable objectWritable, Configuration conf) + throws IOException { + String className = UTF8.readString(in); + Class declaredClass = PRIMITIVE_NAMES.get(className); + if (declaredClass == null) { + declaredClass = loadClass(conf, className); + } + + Object instance; + + if (declaredClass.isPrimitive()) { // primitive types + + if (declaredClass == Boolean.TYPE) { // boolean + instance = Boolean.valueOf(in.readBoolean()); + } else if (declaredClass == Character.TYPE) { // char + instance = Character.valueOf(in.readChar()); + } else if (declaredClass == Byte.TYPE) { // byte + instance = Byte.valueOf(in.readByte()); + } else if (declaredClass == Short.TYPE) { // short + instance = Short.valueOf(in.readShort()); + } else if (declaredClass == Integer.TYPE) { // int + instance = Integer.valueOf(in.readInt()); + } else if (declaredClass == Long.TYPE) { // long + instance = Long.valueOf(in.readLong()); + } else if (declaredClass == Float.TYPE) { // float + instance = Float.valueOf(in.readFloat()); + } else if (declaredClass == Double.TYPE) { // double + instance = Double.valueOf(in.readDouble()); + } else if (declaredClass == Void.TYPE) { // void + instance = null; + } else { + throw new IllegalArgumentException("Not a primitive: "+declaredClass); + } + + } else if (declaredClass.isArray()) { // array + int length = in.readInt(); + instance = Array.newInstance(declaredClass.getComponentType(), length); + for (int i = 0; i < length; i++) { + Array.set(instance, i, readObject(in, conf)); + } + + } else if (declaredClass == String.class) { // String + instance = UTF8.readString(in); + } else if (declaredClass.isEnum()) { // enum + instance = Enum.valueOf((Class) declaredClass, UTF8.readString(in)); + } else { // Writable + Class instanceClass = null; + String str = UTF8.readString(in); + instanceClass = loadClass(conf, str); + + Writable writable = WritableFactories.newInstance(instanceClass, conf); + writable.readFields(in); + instance = writable; + + if (instanceClass == NullInstance.class) { // null + declaredClass = ((NullInstance)instance).declaredClass; + instance = null; + } + } + + if (objectWritable != null) { // store values + objectWritable.declaredClass = declaredClass; + objectWritable.instance = instance; + } + + return instance; + + } + + /** + * Find and load the class with given name className by first finding + * it in the specified conf. If the specified conf is null, + * try load it directly. + */ + public static Class loadClass(Configuration conf, String className) { + Class declaredClass = null; + try { + if (conf != null) + declaredClass = conf.getClassByName(className); + else + declaredClass = Class.forName(className); + } catch (ClassNotFoundException e) { + throw new RuntimeException("readObject can't find class " + className, + e); + } + return declaredClass; + } + + public void setConf(Configuration conf) { + this.conf = conf; + } + + public Configuration getConf() { + return this.conf; + } + +} diff --git a/src/java/org/apache/hadoop/io/OutputBuffer.java b/src/java/org/apache/hadoop/io/OutputBuffer.java new file mode 100644 index 00000000000..943cb52dbce --- /dev/null +++ b/src/java/org/apache/hadoop/io/OutputBuffer.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +/** A reusable {@link OutputStream} implementation that writes to an in-memory + * buffer. + * + *

This saves memory over creating a new OutputStream and + * ByteArrayOutputStream each time data is written. + * + *

Typical usage is something like the following:

+ *
+ * OutputBuffer buffer = new OutputBuffer();
+ * while (... loop condition ...) {
+ *   buffer.reset();
+ *   ... write buffer using OutputStream methods ...
+ *   byte[] data = buffer.getData();
+ *   int dataLength = buffer.getLength();
+ *   ... write data to its ultimate destination ...
+ * }
+ * 
+ * @see DataOutputBuffer + * @see InputBuffer + */ +public class OutputBuffer extends FilterOutputStream { + + private static class Buffer extends ByteArrayOutputStream { + public byte[] getData() { return buf; } + public int getLength() { return count; } + public void reset() { count = 0; } + + public void write(InputStream in, int len) throws IOException { + int newcount = count + len; + if (newcount > buf.length) { + byte newbuf[] = new byte[Math.max(buf.length << 1, newcount)]; + System.arraycopy(buf, 0, newbuf, 0, count); + buf = newbuf; + } + IOUtils.readFully(in, buf, count, len); + count = newcount; + } + } + + private Buffer buffer; + + /** Constructs a new empty buffer. */ + public OutputBuffer() { + this(new Buffer()); + } + + private OutputBuffer(Buffer buffer) { + super(buffer); + this.buffer = buffer; + } + + /** Returns the current contents of the buffer. + * Data is only valid to {@link #getLength()}. + */ + public byte[] getData() { return buffer.getData(); } + + /** Returns the length of the valid data currently in the buffer. */ + public int getLength() { return buffer.getLength(); } + + /** Resets the buffer to empty. */ + public OutputBuffer reset() { + buffer.reset(); + return this; + } + + /** Writes bytes from a InputStream directly into the buffer. */ + public void write(InputStream in, int length) throws IOException { + buffer.write(in, length); + } +} diff --git a/src/java/org/apache/hadoop/io/RawComparator.java b/src/java/org/apache/hadoop/io/RawComparator.java new file mode 100644 index 00000000000..4efbb7acfc0 --- /dev/null +++ b/src/java/org/apache/hadoop/io/RawComparator.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.util.Comparator; + +import org.apache.hadoop.io.serializer.DeserializerComparator; + +/** + *

+ * A {@link Comparator} that operates directly on byte representations of + * objects. + *

+ * @param + * @see DeserializerComparator + */ +public interface RawComparator extends Comparator { + + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2); + +} diff --git a/src/java/org/apache/hadoop/io/SequenceFile.java b/src/java/org/apache/hadoop/io/SequenceFile.java new file mode 100644 index 00000000000..c2ee9540978 --- /dev/null +++ b/src/java/org/apache/hadoop/io/SequenceFile.java @@ -0,0 +1,3244 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; +import java.util.*; +import java.rmi.server.UID; +import java.security.MessageDigest; +import org.apache.commons.logging.*; +import org.apache.hadoop.fs.*; +import org.apache.hadoop.io.compress.CodecPool; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; +import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.io.compress.zlib.ZlibFactory; +import org.apache.hadoop.io.serializer.Deserializer; +import org.apache.hadoop.io.serializer.SerializationFactory; +import org.apache.hadoop.io.serializer.Serializer; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.util.Progress; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.NativeCodeLoader; +import org.apache.hadoop.util.MergeSort; +import org.apache.hadoop.util.PriorityQueue; + +/** + * SequenceFiles are flat files consisting of binary key/value + * pairs. + * + *

SequenceFile provides {@link Writer}, {@link Reader} and + * {@link Sorter} classes for writing, reading and sorting respectively.

+ * + * There are three SequenceFile Writers based on the + * {@link CompressionType} used to compress key/value pairs: + *
    + *
  1. + * Writer : Uncompressed records. + *
  2. + *
  3. + * RecordCompressWriter : Record-compressed files, only compress + * values. + *
  4. + *
  5. + * BlockCompressWriter : Block-compressed files, both keys & + * values are collected in 'blocks' + * separately and compressed. The size of + * the 'block' is configurable. + *
+ * + *

The actual compression algorithm used to compress key and/or values can be + * specified by using the appropriate {@link CompressionCodec}.

+ * + *

The recommended way is to use the static createWriter methods + * provided by the SequenceFile to chose the preferred format.

+ * + *

The {@link Reader} acts as the bridge and can read any of the above + * SequenceFile formats.

+ * + *

SequenceFile Formats

+ * + *

Essentially there are 3 different formats for SequenceFiles + * depending on the CompressionType specified. All of them share a + * common header described below. + * + *

+ *
    + *
  • + * version - 3 bytes of magic header SEQ, followed by 1 byte of actual + * version number (e.g. SEQ4 or SEQ6) + *
  • + *
  • + * keyClassName -key class + *
  • + *
  • + * valueClassName - value class + *
  • + *
  • + * compression - A boolean which specifies if compression is turned on for + * keys/values in this file. + *
  • + *
  • + * blockCompression - A boolean which specifies if block-compression is + * turned on for keys/values in this file. + *
  • + *
  • + * compression codec - CompressionCodec class which is used for + * compression of keys and/or values (if compression is + * enabled). + *
  • + *
  • + * metadata - {@link Metadata} for this file. + *
  • + *
  • + * sync - A sync marker to denote end of the header. + *
  • + *
+ * + *
Uncompressed SequenceFile Format
+ *
    + *
  • + * Header + *
  • + *
  • + * Record + *
      + *
    • Record length
    • + *
    • Key length
    • + *
    • Key
    • + *
    • Value
    • + *
    + *
  • + *
  • + * A sync-marker every few 100 bytes or so. + *
  • + *
+ * + *
Record-Compressed SequenceFile Format
+ *
    + *
  • + * Header + *
  • + *
  • + * Record + *
      + *
    • Record length
    • + *
    • Key length
    • + *
    • Key
    • + *
    • Compressed Value
    • + *
    + *
  • + *
  • + * A sync-marker every few 100 bytes or so. + *
  • + *
+ * + *
Block-Compressed SequenceFile Format
+ *
    + *
  • + * Header + *
  • + *
  • + * Record Block + *
      + *
    • Compressed key-lengths block-size
    • + *
    • Compressed key-lengths block
    • + *
    • Compressed keys block-size
    • + *
    • Compressed keys block
    • + *
    • Compressed value-lengths block-size
    • + *
    • Compressed value-lengths block
    • + *
    • Compressed values block-size
    • + *
    • Compressed values block
    • + *
    + *
  • + *
  • + * A sync-marker every few 100 bytes or so. + *
  • + *
+ * + *

The compressed blocks of key lengths and value lengths consist of the + * actual lengths of individual keys/values encoded in ZeroCompressedInteger + * format.

+ * + * @see CompressionCodec + */ +public class SequenceFile { + private static final Log LOG = LogFactory.getLog(SequenceFile.class); + + private SequenceFile() {} // no public ctor + + private static final byte BLOCK_COMPRESS_VERSION = (byte)4; + private static final byte CUSTOM_COMPRESS_VERSION = (byte)5; + private static final byte VERSION_WITH_METADATA = (byte)6; + private static byte[] VERSION = new byte[] { + (byte)'S', (byte)'E', (byte)'Q', VERSION_WITH_METADATA + }; + + private static final int SYNC_ESCAPE = -1; // "length" of sync entries + private static final int SYNC_HASH_SIZE = 16; // number of bytes in hash + private static final int SYNC_SIZE = 4+SYNC_HASH_SIZE; // escape + hash + + /** The number of bytes between sync points.*/ + public static final int SYNC_INTERVAL = 100*SYNC_SIZE; + + /** + * The compression type used to compress key/value pairs in the + * {@link SequenceFile}. + * + * @see SequenceFile.Writer + */ + public static enum CompressionType { + /** Do not compress records. */ + NONE, + /** Compress values only, each separately. */ + RECORD, + /** Compress sequences of records together in blocks. */ + BLOCK + } + + /** + * Get the compression type for the reduce outputs + * @param job the job config to look in + * @return the kind of compression to use + * @deprecated Use + * {@link org.apache.hadoop.mapred.SequenceFileOutputFormat#getOutputCompressionType(org.apache.hadoop.mapred.JobConf)} + * to get {@link CompressionType} for job-outputs. + */ + @Deprecated + static public CompressionType getCompressionType(Configuration job) { + String name = job.get("io.seqfile.compression.type"); + return name == null ? CompressionType.RECORD : + CompressionType.valueOf(name); + } + + /** + * Set the compression type for sequence files. + * @param job the configuration to modify + * @param val the new compression type (none, block, record) + * @deprecated Use the one of the many SequenceFile.createWriter methods to specify + * the {@link CompressionType} while creating the {@link SequenceFile} or + * {@link org.apache.hadoop.mapred.SequenceFileOutputFormat#setOutputCompressionType(org.apache.hadoop.mapred.JobConf, org.apache.hadoop.io.SequenceFile.CompressionType)} + * to specify the {@link CompressionType} for job-outputs. + * or + */ + @Deprecated + static public void setCompressionType(Configuration job, + CompressionType val) { + job.set("io.seqfile.compression.type", val.toString()); + } + + /** + * Construct the preferred type of SequenceFile Writer. + * @param fs The configured filesystem. + * @param conf The configuration. + * @param name The name of the file. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + public static Writer + createWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass) + throws IOException { + return createWriter(fs, conf, name, keyClass, valClass, + getCompressionType(conf)); + } + + /** + * Construct the preferred type of SequenceFile Writer. + * @param fs The configured filesystem. + * @param conf The configuration. + * @param name The name of the file. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @param compressionType The compression type. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + public static Writer + createWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, CompressionType compressionType) + throws IOException { + return createWriter(fs, conf, name, keyClass, valClass, + fs.getConf().getInt("io.file.buffer.size", 4096), + fs.getDefaultReplication(), fs.getDefaultBlockSize(), + compressionType, new DefaultCodec(), null, new Metadata()); + } + + /** + * Construct the preferred type of SequenceFile Writer. + * @param fs The configured filesystem. + * @param conf The configuration. + * @param name The name of the file. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @param compressionType The compression type. + * @param progress The Progressable object to track progress. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + public static Writer + createWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, CompressionType compressionType, + Progressable progress) throws IOException { + return createWriter(fs, conf, name, keyClass, valClass, + fs.getConf().getInt("io.file.buffer.size", 4096), + fs.getDefaultReplication(), fs.getDefaultBlockSize(), + compressionType, new DefaultCodec(), progress, new Metadata()); + } + + /** + * Construct the preferred type of SequenceFile Writer. + * @param fs The configured filesystem. + * @param conf The configuration. + * @param name The name of the file. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @param compressionType The compression type. + * @param codec The compression codec. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + public static Writer + createWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, + CompressionType compressionType, CompressionCodec codec) + throws IOException { + return createWriter(fs, conf, name, keyClass, valClass, + fs.getConf().getInt("io.file.buffer.size", 4096), + fs.getDefaultReplication(), fs.getDefaultBlockSize(), + compressionType, codec, null, new Metadata()); + } + + /** + * Construct the preferred type of SequenceFile Writer. + * @param fs The configured filesystem. + * @param conf The configuration. + * @param name The name of the file. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @param compressionType The compression type. + * @param codec The compression codec. + * @param progress The Progressable object to track progress. + * @param metadata The metadata of the file. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + public static Writer + createWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, + CompressionType compressionType, CompressionCodec codec, + Progressable progress, Metadata metadata) throws IOException { + return createWriter(fs, conf, name, keyClass, valClass, + fs.getConf().getInt("io.file.buffer.size", 4096), + fs.getDefaultReplication(), fs.getDefaultBlockSize(), + compressionType, codec, progress, metadata); + } + + /** + * Construct the preferred type of SequenceFile Writer. + * @param fs The configured filesystem. + * @param conf The configuration. + * @param name The name of the file. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @param bufferSize buffer size for the underlaying outputstream. + * @param replication replication factor for the file. + * @param blockSize block size for the file. + * @param compressionType The compression type. + * @param codec The compression codec. + * @param progress The Progressable object to track progress. + * @param metadata The metadata of the file. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + public static Writer + createWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, int bufferSize, + short replication, long blockSize, + CompressionType compressionType, CompressionCodec codec, + Progressable progress, Metadata metadata) throws IOException { + if ((codec instanceof GzipCodec) && + !NativeCodeLoader.isNativeCodeLoaded() && + !ZlibFactory.isNativeZlibLoaded(conf)) { + throw new IllegalArgumentException("SequenceFile doesn't work with " + + "GzipCodec without native-hadoop code!"); + } + + Writer writer = null; + + if (compressionType == CompressionType.NONE) { + writer = new Writer(fs, conf, name, keyClass, valClass, + bufferSize, replication, blockSize, + progress, metadata); + } else if (compressionType == CompressionType.RECORD) { + writer = new RecordCompressWriter(fs, conf, name, keyClass, valClass, + bufferSize, replication, blockSize, + codec, progress, metadata); + } else if (compressionType == CompressionType.BLOCK){ + writer = new BlockCompressWriter(fs, conf, name, keyClass, valClass, + bufferSize, replication, blockSize, + codec, progress, metadata); + } + + return writer; + } + + /** + * Construct the preferred type of SequenceFile Writer. + * @param fs The configured filesystem. + * @param conf The configuration. + * @param name The name of the file. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @param compressionType The compression type. + * @param codec The compression codec. + * @param progress The Progressable object to track progress. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + public static Writer + createWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, + CompressionType compressionType, CompressionCodec codec, + Progressable progress) throws IOException { + Writer writer = createWriter(fs, conf, name, keyClass, valClass, + compressionType, codec, progress, new Metadata()); + return writer; + } + + /** + * Construct the preferred type of 'raw' SequenceFile Writer. + * @param out The stream on top which the writer is to be constructed. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @param compress Compress data? + * @param blockCompress Compress blocks? + * @param metadata The metadata of the file. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + private static Writer + createWriter(Configuration conf, FSDataOutputStream out, + Class keyClass, Class valClass, boolean compress, boolean blockCompress, + CompressionCodec codec, Metadata metadata) + throws IOException { + if (codec != null && (codec instanceof GzipCodec) && + !NativeCodeLoader.isNativeCodeLoaded() && + !ZlibFactory.isNativeZlibLoaded(conf)) { + throw new IllegalArgumentException("SequenceFile doesn't work with " + + "GzipCodec without native-hadoop code!"); + } + + Writer writer = null; + + if (!compress) { + writer = new Writer(conf, out, keyClass, valClass, metadata); + } else if (compress && !blockCompress) { + writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata); + } else { + writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata); + } + + return writer; + } + + /** + * Construct the preferred type of 'raw' SequenceFile Writer. + * @param fs The configured filesystem. + * @param conf The configuration. + * @param file The name of the file. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @param compress Compress data? + * @param blockCompress Compress blocks? + * @param codec The compression codec. + * @param progress + * @param metadata The metadata of the file. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + private static Writer + createWriter(FileSystem fs, Configuration conf, Path file, + Class keyClass, Class valClass, + boolean compress, boolean blockCompress, + CompressionCodec codec, Progressable progress, Metadata metadata) + throws IOException { + if (codec != null && (codec instanceof GzipCodec) && + !NativeCodeLoader.isNativeCodeLoaded() && + !ZlibFactory.isNativeZlibLoaded(conf)) { + throw new IllegalArgumentException("SequenceFile doesn't work with " + + "GzipCodec without native-hadoop code!"); + } + + Writer writer = null; + + if (!compress) { + writer = new Writer(fs, conf, file, keyClass, valClass, progress, metadata); + } else if (compress && !blockCompress) { + writer = new RecordCompressWriter(fs, conf, file, keyClass, valClass, + codec, progress, metadata); + } else { + writer = new BlockCompressWriter(fs, conf, file, keyClass, valClass, + codec, progress, metadata); + } + + return writer; +} + + /** + * Construct the preferred type of 'raw' SequenceFile Writer. + * @param conf The configuration. + * @param out The stream on top which the writer is to be constructed. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @param compressionType The compression type. + * @param codec The compression codec. + * @param metadata The metadata of the file. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + public static Writer + createWriter(Configuration conf, FSDataOutputStream out, + Class keyClass, Class valClass, CompressionType compressionType, + CompressionCodec codec, Metadata metadata) + throws IOException { + if ((codec instanceof GzipCodec) && + !NativeCodeLoader.isNativeCodeLoaded() && + !ZlibFactory.isNativeZlibLoaded(conf)) { + throw new IllegalArgumentException("SequenceFile doesn't work with " + + "GzipCodec without native-hadoop code!"); + } + + Writer writer = null; + + if (compressionType == CompressionType.NONE) { + writer = new Writer(conf, out, keyClass, valClass, metadata); + } else if (compressionType == CompressionType.RECORD) { + writer = new RecordCompressWriter(conf, out, keyClass, valClass, codec, metadata); + } else if (compressionType == CompressionType.BLOCK){ + writer = new BlockCompressWriter(conf, out, keyClass, valClass, codec, metadata); + } + + return writer; + } + + /** + * Construct the preferred type of 'raw' SequenceFile Writer. + * @param conf The configuration. + * @param out The stream on top which the writer is to be constructed. + * @param keyClass The 'key' type. + * @param valClass The 'value' type. + * @param compressionType The compression type. + * @param codec The compression codec. + * @return Returns the handle to the constructed SequenceFile Writer. + * @throws IOException + */ + public static Writer + createWriter(Configuration conf, FSDataOutputStream out, + Class keyClass, Class valClass, CompressionType compressionType, + CompressionCodec codec) + throws IOException { + Writer writer = createWriter(conf, out, keyClass, valClass, compressionType, + codec, new Metadata()); + return writer; + } + + + /** The interface to 'raw' values of SequenceFiles. */ + public static interface ValueBytes { + + /** Writes the uncompressed bytes to the outStream. + * @param outStream : Stream to write uncompressed bytes into. + * @throws IOException + */ + public void writeUncompressedBytes(DataOutputStream outStream) + throws IOException; + + /** Write compressed bytes to outStream. + * Note: that it will NOT compress the bytes if they are not compressed. + * @param outStream : Stream to write compressed bytes into. + */ + public void writeCompressedBytes(DataOutputStream outStream) + throws IllegalArgumentException, IOException; + + /** + * Size of stored data. + */ + public int getSize(); + } + + private static class UncompressedBytes implements ValueBytes { + private int dataSize; + private byte[] data; + + private UncompressedBytes() { + data = null; + dataSize = 0; + } + + private void reset(DataInputStream in, int length) throws IOException { + if (data == null) { + data = new byte[length]; + } else if (length > data.length) { + data = new byte[Math.max(length, data.length * 2)]; + } + dataSize = -1; + in.readFully(data, 0, length); + dataSize = length; + } + + public int getSize() { + return dataSize; + } + + public void writeUncompressedBytes(DataOutputStream outStream) + throws IOException { + outStream.write(data, 0, dataSize); + } + + public void writeCompressedBytes(DataOutputStream outStream) + throws IllegalArgumentException, IOException { + throw + new IllegalArgumentException("UncompressedBytes cannot be compressed!"); + } + + } // UncompressedBytes + + private static class CompressedBytes implements ValueBytes { + private int dataSize; + private byte[] data; + DataInputBuffer rawData = null; + CompressionCodec codec = null; + CompressionInputStream decompressedStream = null; + + private CompressedBytes(CompressionCodec codec) { + data = null; + dataSize = 0; + this.codec = codec; + } + + private void reset(DataInputStream in, int length) throws IOException { + if (data == null) { + data = new byte[length]; + } else if (length > data.length) { + data = new byte[Math.max(length, data.length * 2)]; + } + dataSize = -1; + in.readFully(data, 0, length); + dataSize = length; + } + + public int getSize() { + return dataSize; + } + + public void writeUncompressedBytes(DataOutputStream outStream) + throws IOException { + if (decompressedStream == null) { + rawData = new DataInputBuffer(); + decompressedStream = codec.createInputStream(rawData); + } else { + decompressedStream.resetState(); + } + rawData.reset(data, 0, dataSize); + + byte[] buffer = new byte[8192]; + int bytesRead = 0; + while ((bytesRead = decompressedStream.read(buffer, 0, 8192)) != -1) { + outStream.write(buffer, 0, bytesRead); + } + } + + public void writeCompressedBytes(DataOutputStream outStream) + throws IllegalArgumentException, IOException { + outStream.write(data, 0, dataSize); + } + + } // CompressedBytes + + /** + * The class encapsulating with the metadata of a file. + * The metadata of a file is a list of attribute name/value + * pairs of Text type. + * + */ + public static class Metadata implements Writable { + + private TreeMap theMetadata; + + public Metadata() { + this(new TreeMap()); + } + + public Metadata(TreeMap arg) { + if (arg == null) { + this.theMetadata = new TreeMap(); + } else { + this.theMetadata = arg; + } + } + + public Text get(Text name) { + return this.theMetadata.get(name); + } + + public void set(Text name, Text value) { + this.theMetadata.put(name, value); + } + + public TreeMap getMetadata() { + return new TreeMap(this.theMetadata); + } + + public void write(DataOutput out) throws IOException { + out.writeInt(this.theMetadata.size()); + Iterator> iter = + this.theMetadata.entrySet().iterator(); + while (iter.hasNext()) { + Map.Entry en = iter.next(); + en.getKey().write(out); + en.getValue().write(out); + } + } + + public void readFields(DataInput in) throws IOException { + int sz = in.readInt(); + if (sz < 0) throw new IOException("Invalid size: " + sz + " for file metadata object"); + this.theMetadata = new TreeMap(); + for (int i = 0; i < sz; i++) { + Text key = new Text(); + Text val = new Text(); + key.readFields(in); + val.readFields(in); + this.theMetadata.put(key, val); + } + } + + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass() != this.getClass()) { + return false; + } else { + return equals((Metadata)other); + } + } + + public boolean equals(Metadata other) { + if (other == null) return false; + if (this.theMetadata.size() != other.theMetadata.size()) { + return false; + } + Iterator> iter1 = + this.theMetadata.entrySet().iterator(); + Iterator> iter2 = + other.theMetadata.entrySet().iterator(); + while (iter1.hasNext() && iter2.hasNext()) { + Map.Entry en1 = iter1.next(); + Map.Entry en2 = iter2.next(); + if (!en1.getKey().equals(en2.getKey())) { + return false; + } + if (!en1.getValue().equals(en2.getValue())) { + return false; + } + } + if (iter1.hasNext() || iter2.hasNext()) { + return false; + } + return true; + } + + public int hashCode() { + assert false : "hashCode not designed"; + return 42; // any arbitrary constant will do + } + + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("size: ").append(this.theMetadata.size()).append("\n"); + Iterator> iter = + this.theMetadata.entrySet().iterator(); + while (iter.hasNext()) { + Map.Entry en = iter.next(); + sb.append("\t").append(en.getKey().toString()).append("\t").append(en.getValue().toString()); + sb.append("\n"); + } + return sb.toString(); + } + } + + /** Write key/value pairs to a sequence-format file. */ + public static class Writer implements java.io.Closeable { + Configuration conf; + FSDataOutputStream out; + boolean ownOutputStream = true; + DataOutputBuffer buffer = new DataOutputBuffer(); + + Class keyClass; + Class valClass; + + private boolean compress; + CompressionCodec codec = null; + CompressionOutputStream deflateFilter = null; + DataOutputStream deflateOut = null; + Metadata metadata = null; + Compressor compressor = null; + + protected Serializer keySerializer; + protected Serializer uncompressedValSerializer; + protected Serializer compressedValSerializer; + + // Insert a globally unique 16-byte value every few entries, so that one + // can seek into the middle of a file and then synchronize with record + // starts and ends by scanning for this value. + long lastSyncPos; // position of last sync + byte[] sync; // 16 random bytes + { + try { + MessageDigest digester = MessageDigest.getInstance("MD5"); + long time = System.currentTimeMillis(); + digester.update((new UID()+"@"+time).getBytes()); + sync = digester.digest(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** Implicit constructor: needed for the period of transition!*/ + Writer() + {} + + /** Create the named file. */ + public Writer(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass) + throws IOException { + this(fs, conf, name, keyClass, valClass, null, new Metadata()); + } + + /** Create the named file with write-progress reporter. */ + public Writer(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, + Progressable progress, Metadata metadata) + throws IOException { + this(fs, conf, name, keyClass, valClass, + fs.getConf().getInt("io.file.buffer.size", 4096), + fs.getDefaultReplication(), fs.getDefaultBlockSize(), + progress, metadata); + } + + /** Create the named file with write-progress reporter. */ + public Writer(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, + int bufferSize, short replication, long blockSize, + Progressable progress, Metadata metadata) + throws IOException { + init(name, conf, + fs.create(name, true, bufferSize, replication, blockSize, progress), + keyClass, valClass, false, null, metadata); + initializeFileHeader(); + writeFileHeader(); + finalizeFileHeader(); + } + + /** Write to an arbitrary stream using a specified buffer size. */ + private Writer(Configuration conf, FSDataOutputStream out, + Class keyClass, Class valClass, Metadata metadata) + throws IOException { + this.ownOutputStream = false; + init(null, conf, out, keyClass, valClass, false, null, metadata); + + initializeFileHeader(); + writeFileHeader(); + finalizeFileHeader(); + } + + /** Write the initial part of file header. */ + void initializeFileHeader() + throws IOException{ + out.write(VERSION); + } + + /** Write the final part of file header. */ + void finalizeFileHeader() + throws IOException{ + out.write(sync); // write the sync bytes + out.flush(); // flush header + } + + boolean isCompressed() { return compress; } + boolean isBlockCompressed() { return false; } + + /** Write and flush the file header. */ + void writeFileHeader() + throws IOException { + Text.writeString(out, keyClass.getName()); + Text.writeString(out, valClass.getName()); + + out.writeBoolean(this.isCompressed()); + out.writeBoolean(this.isBlockCompressed()); + + if (this.isCompressed()) { + Text.writeString(out, (codec.getClass()).getName()); + } + this.metadata.write(out); + } + + /** Initialize. */ + @SuppressWarnings("unchecked") + void init(Path name, Configuration conf, FSDataOutputStream out, + Class keyClass, Class valClass, + boolean compress, CompressionCodec codec, Metadata metadata) + throws IOException { + this.conf = conf; + this.out = out; + this.keyClass = keyClass; + this.valClass = valClass; + this.compress = compress; + this.codec = codec; + this.metadata = metadata; + SerializationFactory serializationFactory = new SerializationFactory(conf); + this.keySerializer = serializationFactory.getSerializer(keyClass); + this.keySerializer.open(buffer); + this.uncompressedValSerializer = serializationFactory.getSerializer(valClass); + this.uncompressedValSerializer.open(buffer); + if (this.codec != null) { + ReflectionUtils.setConf(this.codec, this.conf); + this.compressor = CodecPool.getCompressor(this.codec); + this.deflateFilter = this.codec.createOutputStream(buffer, compressor); + this.deflateOut = + new DataOutputStream(new BufferedOutputStream(deflateFilter)); + this.compressedValSerializer = serializationFactory.getSerializer(valClass); + this.compressedValSerializer.open(deflateOut); + } + } + + /** Returns the class of keys in this file. */ + public Class getKeyClass() { return keyClass; } + + /** Returns the class of values in this file. */ + public Class getValueClass() { return valClass; } + + /** Returns the compression codec of data in this file. */ + public CompressionCodec getCompressionCodec() { return codec; } + + /** create a sync point */ + public void sync() throws IOException { + if (sync != null && lastSyncPos != out.getPos()) { + out.writeInt(SYNC_ESCAPE); // mark the start of the sync + out.write(sync); // write sync + lastSyncPos = out.getPos(); // update lastSyncPos + } + } + + /** Returns the configuration of this file. */ + Configuration getConf() { return conf; } + + /** Close the file. */ + public synchronized void close() throws IOException { + keySerializer.close(); + uncompressedValSerializer.close(); + if (compressedValSerializer != null) { + compressedValSerializer.close(); + } + + CodecPool.returnCompressor(compressor); + compressor = null; + + if (out != null) { + + // Close the underlying stream iff we own it... + if (ownOutputStream) { + out.close(); + } else { + out.flush(); + } + out = null; + } + } + + synchronized void checkAndWriteSync() throws IOException { + if (sync != null && + out.getPos() >= lastSyncPos+SYNC_INTERVAL) { // time to emit sync + sync(); + } + } + + /** Append a key/value pair. */ + public synchronized void append(Writable key, Writable val) + throws IOException { + append((Object) key, (Object) val); + } + + /** Append a key/value pair. */ + @SuppressWarnings("unchecked") + public synchronized void append(Object key, Object val) + throws IOException { + if (key.getClass() != keyClass) + throw new IOException("wrong key class: "+key.getClass().getName() + +" is not "+keyClass); + if (val.getClass() != valClass) + throw new IOException("wrong value class: "+val.getClass().getName() + +" is not "+valClass); + + buffer.reset(); + + // Append the 'key' + keySerializer.serialize(key); + int keyLength = buffer.getLength(); + if (keyLength < 0) + throw new IOException("negative length keys not allowed: " + key); + + // Append the 'value' + if (compress) { + deflateFilter.resetState(); + compressedValSerializer.serialize(val); + deflateOut.flush(); + deflateFilter.finish(); + } else { + uncompressedValSerializer.serialize(val); + } + + // Write the record out + checkAndWriteSync(); // sync + out.writeInt(buffer.getLength()); // total record length + out.writeInt(keyLength); // key portion length + out.write(buffer.getData(), 0, buffer.getLength()); // data + } + + public synchronized void appendRaw(byte[] keyData, int keyOffset, + int keyLength, ValueBytes val) throws IOException { + if (keyLength < 0) + throw new IOException("negative length keys not allowed: " + keyLength); + + int valLength = val.getSize(); + + checkAndWriteSync(); + + out.writeInt(keyLength+valLength); // total record length + out.writeInt(keyLength); // key portion length + out.write(keyData, keyOffset, keyLength); // key + val.writeUncompressedBytes(out); // value + } + + /** Returns the current length of the output file. + * + *

This always returns a synchronized position. In other words, + * immediately after calling {@link SequenceFile.Reader#seek(long)} with a position + * returned by this method, {@link SequenceFile.Reader#next(Writable)} may be called. However + * the key may be earlier in the file than key last written when this + * method was called (e.g., with block-compression, it may be the first key + * in the block that was being written when this method was called). + */ + public synchronized long getLength() throws IOException { + return out.getPos(); + } + + } // class Writer + + /** Write key/compressed-value pairs to a sequence-format file. */ + static class RecordCompressWriter extends Writer { + + /** Create the named file. */ + public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, CompressionCodec codec) + throws IOException { + this(conf, fs.create(name), keyClass, valClass, codec, new Metadata()); + } + + /** Create the named file with write-progress reporter. */ + public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, CompressionCodec codec, + Progressable progress, Metadata metadata) + throws IOException { + this(fs, conf, name, keyClass, valClass, + fs.getConf().getInt("io.file.buffer.size", 4096), + fs.getDefaultReplication(), fs.getDefaultBlockSize(), codec, + progress, metadata); + } + + /** Create the named file with write-progress reporter. */ + public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, + int bufferSize, short replication, long blockSize, + CompressionCodec codec, + Progressable progress, Metadata metadata) + throws IOException { + super.init(name, conf, + fs.create(name, true, bufferSize, replication, blockSize, progress), + keyClass, valClass, true, codec, metadata); + + initializeFileHeader(); + writeFileHeader(); + finalizeFileHeader(); + } + + /** Create the named file with write-progress reporter. */ + public RecordCompressWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, CompressionCodec codec, + Progressable progress) + throws IOException { + this(fs, conf, name, keyClass, valClass, codec, progress, new Metadata()); + } + + /** Write to an arbitrary stream using a specified buffer size. */ + private RecordCompressWriter(Configuration conf, FSDataOutputStream out, + Class keyClass, Class valClass, CompressionCodec codec, Metadata metadata) + throws IOException { + this.ownOutputStream = false; + super.init(null, conf, out, keyClass, valClass, true, codec, metadata); + + initializeFileHeader(); + writeFileHeader(); + finalizeFileHeader(); + + } + + boolean isCompressed() { return true; } + boolean isBlockCompressed() { return false; } + + /** Append a key/value pair. */ + @SuppressWarnings("unchecked") + public synchronized void append(Object key, Object val) + throws IOException { + if (key.getClass() != keyClass) + throw new IOException("wrong key class: "+key.getClass().getName() + +" is not "+keyClass); + if (val.getClass() != valClass) + throw new IOException("wrong value class: "+val.getClass().getName() + +" is not "+valClass); + + buffer.reset(); + + // Append the 'key' + keySerializer.serialize(key); + int keyLength = buffer.getLength(); + if (keyLength < 0) + throw new IOException("negative length keys not allowed: " + key); + + // Compress 'value' and append it + deflateFilter.resetState(); + compressedValSerializer.serialize(val); + deflateOut.flush(); + deflateFilter.finish(); + + // Write the record out + checkAndWriteSync(); // sync + out.writeInt(buffer.getLength()); // total record length + out.writeInt(keyLength); // key portion length + out.write(buffer.getData(), 0, buffer.getLength()); // data + } + + /** Append a key/value pair. */ + public synchronized void appendRaw(byte[] keyData, int keyOffset, + int keyLength, ValueBytes val) throws IOException { + + if (keyLength < 0) + throw new IOException("negative length keys not allowed: " + keyLength); + + int valLength = val.getSize(); + + checkAndWriteSync(); // sync + out.writeInt(keyLength+valLength); // total record length + out.writeInt(keyLength); // key portion length + out.write(keyData, keyOffset, keyLength); // 'key' data + val.writeCompressedBytes(out); // 'value' data + } + + } // RecordCompressionWriter + + /** Write compressed key/value blocks to a sequence-format file. */ + static class BlockCompressWriter extends Writer { + + private int noBufferedRecords = 0; + + private DataOutputBuffer keyLenBuffer = new DataOutputBuffer(); + private DataOutputBuffer keyBuffer = new DataOutputBuffer(); + + private DataOutputBuffer valLenBuffer = new DataOutputBuffer(); + private DataOutputBuffer valBuffer = new DataOutputBuffer(); + + private int compressionBlockSize; + + /** Create the named file. */ + public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, CompressionCodec codec) + throws IOException { + this(fs, conf, name, keyClass, valClass, + fs.getConf().getInt("io.file.buffer.size", 4096), + fs.getDefaultReplication(), fs.getDefaultBlockSize(), codec, + null, new Metadata()); + } + + /** Create the named file with write-progress reporter. */ + public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, CompressionCodec codec, + Progressable progress, Metadata metadata) + throws IOException { + this(fs, conf, name, keyClass, valClass, + fs.getConf().getInt("io.file.buffer.size", 4096), + fs.getDefaultReplication(), fs.getDefaultBlockSize(), codec, + progress, metadata); + } + + /** Create the named file with write-progress reporter. */ + public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, + int bufferSize, short replication, long blockSize, + CompressionCodec codec, + Progressable progress, Metadata metadata) + throws IOException { + super.init(name, conf, + fs.create(name, true, bufferSize, replication, blockSize, progress), + keyClass, valClass, true, codec, metadata); + init(conf.getInt("io.seqfile.compress.blocksize", 1000000)); + + initializeFileHeader(); + writeFileHeader(); + finalizeFileHeader(); + } + + /** Create the named file with write-progress reporter. */ + public BlockCompressWriter(FileSystem fs, Configuration conf, Path name, + Class keyClass, Class valClass, CompressionCodec codec, + Progressable progress) + throws IOException { + this(fs, conf, name, keyClass, valClass, codec, progress, new Metadata()); + } + + /** Write to an arbitrary stream using a specified buffer size. */ + private BlockCompressWriter(Configuration conf, FSDataOutputStream out, + Class keyClass, Class valClass, CompressionCodec codec, Metadata metadata) + throws IOException { + this.ownOutputStream = false; + super.init(null, conf, out, keyClass, valClass, true, codec, metadata); + init(1000000); + + initializeFileHeader(); + writeFileHeader(); + finalizeFileHeader(); + } + + boolean isCompressed() { return true; } + boolean isBlockCompressed() { return true; } + + /** Initialize */ + void init(int compressionBlockSize) throws IOException { + this.compressionBlockSize = compressionBlockSize; + keySerializer.close(); + keySerializer.open(keyBuffer); + uncompressedValSerializer.close(); + uncompressedValSerializer.open(valBuffer); + } + + /** Workhorse to check and write out compressed data/lengths */ + private synchronized + void writeBuffer(DataOutputBuffer uncompressedDataBuffer) + throws IOException { + deflateFilter.resetState(); + buffer.reset(); + deflateOut.write(uncompressedDataBuffer.getData(), 0, + uncompressedDataBuffer.getLength()); + deflateOut.flush(); + deflateFilter.finish(); + + WritableUtils.writeVInt(out, buffer.getLength()); + out.write(buffer.getData(), 0, buffer.getLength()); + } + + /** Compress and flush contents to dfs */ + public synchronized void sync() throws IOException { + if (noBufferedRecords > 0) { + super.sync(); + + // No. of records + WritableUtils.writeVInt(out, noBufferedRecords); + + // Write 'keys' and lengths + writeBuffer(keyLenBuffer); + writeBuffer(keyBuffer); + + // Write 'values' and lengths + writeBuffer(valLenBuffer); + writeBuffer(valBuffer); + + // Flush the file-stream + out.flush(); + + // Reset internal states + keyLenBuffer.reset(); + keyBuffer.reset(); + valLenBuffer.reset(); + valBuffer.reset(); + noBufferedRecords = 0; + } + + } + + /** Close the file. */ + public synchronized void close() throws IOException { + if (out != null) { + sync(); + } + super.close(); + } + + /** Append a key/value pair. */ + @SuppressWarnings("unchecked") + public synchronized void append(Object key, Object val) + throws IOException { + if (key.getClass() != keyClass) + throw new IOException("wrong key class: "+key+" is not "+keyClass); + if (val.getClass() != valClass) + throw new IOException("wrong value class: "+val+" is not "+valClass); + + // Save key/value into respective buffers + int oldKeyLength = keyBuffer.getLength(); + keySerializer.serialize(key); + int keyLength = keyBuffer.getLength() - oldKeyLength; + if (keyLength < 0) + throw new IOException("negative length keys not allowed: " + key); + WritableUtils.writeVInt(keyLenBuffer, keyLength); + + int oldValLength = valBuffer.getLength(); + uncompressedValSerializer.serialize(val); + int valLength = valBuffer.getLength() - oldValLength; + WritableUtils.writeVInt(valLenBuffer, valLength); + + // Added another key/value pair + ++noBufferedRecords; + + // Compress and flush? + int currentBlockSize = keyBuffer.getLength() + valBuffer.getLength(); + if (currentBlockSize >= compressionBlockSize) { + sync(); + } + } + + /** Append a key/value pair. */ + public synchronized void appendRaw(byte[] keyData, int keyOffset, + int keyLength, ValueBytes val) throws IOException { + + if (keyLength < 0) + throw new IOException("negative length keys not allowed"); + + int valLength = val.getSize(); + + // Save key/value data in relevant buffers + WritableUtils.writeVInt(keyLenBuffer, keyLength); + keyBuffer.write(keyData, keyOffset, keyLength); + WritableUtils.writeVInt(valLenBuffer, valLength); + val.writeUncompressedBytes(valBuffer); + + // Added another key/value pair + ++noBufferedRecords; + + // Compress and flush? + int currentBlockSize = keyBuffer.getLength() + valBuffer.getLength(); + if (currentBlockSize >= compressionBlockSize) { + sync(); + } + } + + } // BlockCompressionWriter + + /** Reads key/value pairs from a sequence-format file. */ + public static class Reader implements java.io.Closeable { + private Path file; + private FSDataInputStream in; + private DataOutputBuffer outBuf = new DataOutputBuffer(); + + private byte version; + + private String keyClassName; + private String valClassName; + private Class keyClass; + private Class valClass; + + private CompressionCodec codec = null; + private Metadata metadata = null; + + private byte[] sync = new byte[SYNC_HASH_SIZE]; + private byte[] syncCheck = new byte[SYNC_HASH_SIZE]; + private boolean syncSeen; + + private long end; + private int keyLength; + private int recordLength; + + private boolean decompress; + private boolean blockCompressed; + + private Configuration conf; + + private int noBufferedRecords = 0; + private boolean lazyDecompress = true; + private boolean valuesDecompressed = true; + + private int noBufferedKeys = 0; + private int noBufferedValues = 0; + + private DataInputBuffer keyLenBuffer = null; + private CompressionInputStream keyLenInFilter = null; + private DataInputStream keyLenIn = null; + private Decompressor keyLenDecompressor = null; + private DataInputBuffer keyBuffer = null; + private CompressionInputStream keyInFilter = null; + private DataInputStream keyIn = null; + private Decompressor keyDecompressor = null; + + private DataInputBuffer valLenBuffer = null; + private CompressionInputStream valLenInFilter = null; + private DataInputStream valLenIn = null; + private Decompressor valLenDecompressor = null; + private DataInputBuffer valBuffer = null; + private CompressionInputStream valInFilter = null; + private DataInputStream valIn = null; + private Decompressor valDecompressor = null; + + private Deserializer keyDeserializer; + private Deserializer valDeserializer; + + /** Open the named file. */ + public Reader(FileSystem fs, Path file, Configuration conf) + throws IOException { + this(fs, file, conf.getInt("io.file.buffer.size", 4096), conf, false); + } + + private Reader(FileSystem fs, Path file, int bufferSize, + Configuration conf, boolean tempReader) throws IOException { + this(fs, file, bufferSize, 0, fs.getFileStatus(file).getLen(), conf, tempReader); + } + + private Reader(FileSystem fs, Path file, int bufferSize, long start, + long length, Configuration conf, boolean tempReader) + throws IOException { + this.file = file; + this.in = openFile(fs, file, bufferSize, length); + this.conf = conf; + boolean succeeded = false; + try { + seek(start); + this.end = in.getPos() + length; + init(tempReader); + succeeded = true; + } finally { + if (!succeeded) { + IOUtils.cleanup(LOG, in); + } + } + } + + /** + * Override this method to specialize the type of + * {@link FSDataInputStream} returned. + */ + protected FSDataInputStream openFile(FileSystem fs, Path file, + int bufferSize, long length) throws IOException { + return fs.open(file, bufferSize); + } + + /** + * Initialize the {@link Reader} + * @param tmpReader true if we are constructing a temporary + * reader {@link SequenceFile.Sorter.cloneFileAttributes}, + * and hence do not initialize every component; + * false otherwise. + * @throws IOException + */ + private void init(boolean tempReader) throws IOException { + byte[] versionBlock = new byte[VERSION.length]; + in.readFully(versionBlock); + + if ((versionBlock[0] != VERSION[0]) || + (versionBlock[1] != VERSION[1]) || + (versionBlock[2] != VERSION[2])) + throw new IOException(file + " not a SequenceFile"); + + // Set 'version' + version = versionBlock[3]; + if (version > VERSION[3]) + throw new VersionMismatchException(VERSION[3], version); + + if (version < BLOCK_COMPRESS_VERSION) { + UTF8 className = new UTF8(); + + className.readFields(in); + keyClassName = className.toString(); // key class name + + className.readFields(in); + valClassName = className.toString(); // val class name + } else { + keyClassName = Text.readString(in); + valClassName = Text.readString(in); + } + + if (version > 2) { // if version > 2 + this.decompress = in.readBoolean(); // is compressed? + } else { + decompress = false; + } + + if (version >= BLOCK_COMPRESS_VERSION) { // if version >= 4 + this.blockCompressed = in.readBoolean(); // is block-compressed? + } else { + blockCompressed = false; + } + + // if version >= 5 + // setup the compression codec + if (decompress) { + if (version >= CUSTOM_COMPRESS_VERSION) { + String codecClassname = Text.readString(in); + try { + Class codecClass + = conf.getClassByName(codecClassname).asSubclass(CompressionCodec.class); + this.codec = ReflectionUtils.newInstance(codecClass, conf); + } catch (ClassNotFoundException cnfe) { + throw new IllegalArgumentException("Unknown codec: " + + codecClassname, cnfe); + } + } else { + codec = new DefaultCodec(); + ((Configurable)codec).setConf(conf); + } + } + + this.metadata = new Metadata(); + if (version >= VERSION_WITH_METADATA) { // if version >= 6 + this.metadata.readFields(in); + } + + if (version > 1) { // if version > 1 + in.readFully(sync); // read sync bytes + } + + // Initialize... *not* if this we are constructing a temporary Reader + if (!tempReader) { + valBuffer = new DataInputBuffer(); + if (decompress) { + valDecompressor = CodecPool.getDecompressor(codec); + valInFilter = codec.createInputStream(valBuffer, valDecompressor); + valIn = new DataInputStream(valInFilter); + } else { + valIn = valBuffer; + } + + if (blockCompressed) { + keyLenBuffer = new DataInputBuffer(); + keyBuffer = new DataInputBuffer(); + valLenBuffer = new DataInputBuffer(); + + keyLenDecompressor = CodecPool.getDecompressor(codec); + keyLenInFilter = codec.createInputStream(keyLenBuffer, + keyLenDecompressor); + keyLenIn = new DataInputStream(keyLenInFilter); + + keyDecompressor = CodecPool.getDecompressor(codec); + keyInFilter = codec.createInputStream(keyBuffer, keyDecompressor); + keyIn = new DataInputStream(keyInFilter); + + valLenDecompressor = CodecPool.getDecompressor(codec); + valLenInFilter = codec.createInputStream(valLenBuffer, + valLenDecompressor); + valLenIn = new DataInputStream(valLenInFilter); + } + + SerializationFactory serializationFactory = + new SerializationFactory(conf); + this.keyDeserializer = + getDeserializer(serializationFactory, getKeyClass()); + if (!blockCompressed) { + this.keyDeserializer.open(valBuffer); + } else { + this.keyDeserializer.open(keyIn); + } + this.valDeserializer = + getDeserializer(serializationFactory, getValueClass()); + this.valDeserializer.open(valIn); + } + } + + @SuppressWarnings("unchecked") + private Deserializer getDeserializer(SerializationFactory sf, Class c) { + return sf.getDeserializer(c); + } + + /** Close the file. */ + public synchronized void close() throws IOException { + // Return the decompressors to the pool + CodecPool.returnDecompressor(keyLenDecompressor); + CodecPool.returnDecompressor(keyDecompressor); + CodecPool.returnDecompressor(valLenDecompressor); + CodecPool.returnDecompressor(valDecompressor); + keyLenDecompressor = keyDecompressor = null; + valLenDecompressor = valDecompressor = null; + + if (keyDeserializer != null) { + keyDeserializer.close(); + } + if (valDeserializer != null) { + valDeserializer.close(); + } + + // Close the input-stream + in.close(); + } + + /** Returns the name of the key class. */ + public String getKeyClassName() { + return keyClassName; + } + + /** Returns the class of keys in this file. */ + public synchronized Class getKeyClass() { + if (null == keyClass) { + try { + keyClass = WritableName.getClass(getKeyClassName(), conf); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + return keyClass; + } + + /** Returns the name of the value class. */ + public String getValueClassName() { + return valClassName; + } + + /** Returns the class of values in this file. */ + public synchronized Class getValueClass() { + if (null == valClass) { + try { + valClass = WritableName.getClass(getValueClassName(), conf); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + return valClass; + } + + /** Returns true if values are compressed. */ + public boolean isCompressed() { return decompress; } + + /** Returns true if records are block-compressed. */ + public boolean isBlockCompressed() { return blockCompressed; } + + /** Returns the compression codec of data in this file. */ + public CompressionCodec getCompressionCodec() { return codec; } + + /** Returns the metadata object of the file */ + public Metadata getMetadata() { + return this.metadata; + } + + /** Returns the configuration used for this file. */ + Configuration getConf() { return conf; } + + /** Read a compressed buffer */ + private synchronized void readBuffer(DataInputBuffer buffer, + CompressionInputStream filter) throws IOException { + // Read data into a temporary buffer + DataOutputBuffer dataBuffer = new DataOutputBuffer(); + + try { + int dataBufferLength = WritableUtils.readVInt(in); + dataBuffer.write(in, dataBufferLength); + + // Set up 'buffer' connected to the input-stream + buffer.reset(dataBuffer.getData(), 0, dataBuffer.getLength()); + } finally { + dataBuffer.close(); + } + + // Reset the codec + filter.resetState(); + } + + /** Read the next 'compressed' block */ + private synchronized void readBlock() throws IOException { + // Check if we need to throw away a whole block of + // 'values' due to 'lazy decompression' + if (lazyDecompress && !valuesDecompressed) { + in.seek(WritableUtils.readVInt(in)+in.getPos()); + in.seek(WritableUtils.readVInt(in)+in.getPos()); + } + + // Reset internal states + noBufferedKeys = 0; noBufferedValues = 0; noBufferedRecords = 0; + valuesDecompressed = false; + + //Process sync + if (sync != null) { + in.readInt(); + in.readFully(syncCheck); // read syncCheck + if (!Arrays.equals(sync, syncCheck)) // check it + throw new IOException("File is corrupt!"); + } + syncSeen = true; + + // Read number of records in this block + noBufferedRecords = WritableUtils.readVInt(in); + + // Read key lengths and keys + readBuffer(keyLenBuffer, keyLenInFilter); + readBuffer(keyBuffer, keyInFilter); + noBufferedKeys = noBufferedRecords; + + // Read value lengths and values + if (!lazyDecompress) { + readBuffer(valLenBuffer, valLenInFilter); + readBuffer(valBuffer, valInFilter); + noBufferedValues = noBufferedRecords; + valuesDecompressed = true; + } + } + + /** + * Position valLenIn/valIn to the 'value' + * corresponding to the 'current' key + */ + private synchronized void seekToCurrentValue() throws IOException { + if (!blockCompressed) { + if (decompress) { + valInFilter.resetState(); + } + valBuffer.reset(); + } else { + // Check if this is the first value in the 'block' to be read + if (lazyDecompress && !valuesDecompressed) { + // Read the value lengths and values + readBuffer(valLenBuffer, valLenInFilter); + readBuffer(valBuffer, valInFilter); + noBufferedValues = noBufferedRecords; + valuesDecompressed = true; + } + + // Calculate the no. of bytes to skip + // Note: 'current' key has already been read! + int skipValBytes = 0; + int currentKey = noBufferedKeys + 1; + for (int i=noBufferedValues; i > currentKey; --i) { + skipValBytes += WritableUtils.readVInt(valLenIn); + --noBufferedValues; + } + + // Skip to the 'val' corresponding to 'current' key + if (skipValBytes > 0) { + if (valIn.skipBytes(skipValBytes) != skipValBytes) { + throw new IOException("Failed to seek to " + currentKey + + "(th) value!"); + } + } + } + } + + /** + * Get the 'value' corresponding to the last read 'key'. + * @param val : The 'value' to be read. + * @throws IOException + */ + public synchronized void getCurrentValue(Writable val) + throws IOException { + if (val instanceof Configurable) { + ((Configurable) val).setConf(this.conf); + } + + // Position stream to 'current' value + seekToCurrentValue(); + + if (!blockCompressed) { + val.readFields(valIn); + + if (valIn.read() > 0) { + LOG.info("available bytes: " + valIn.available()); + throw new IOException(val+" read "+(valBuffer.getPosition()-keyLength) + + " bytes, should read " + + (valBuffer.getLength()-keyLength)); + } + } else { + // Get the value + int valLength = WritableUtils.readVInt(valLenIn); + val.readFields(valIn); + + // Read another compressed 'value' + --noBufferedValues; + + // Sanity check + if (valLength < 0) { + LOG.debug(val + " is a zero-length value"); + } + } + + } + + /** + * Get the 'value' corresponding to the last read 'key'. + * @param val : The 'value' to be read. + * @throws IOException + */ + public synchronized Object getCurrentValue(Object val) + throws IOException { + if (val instanceof Configurable) { + ((Configurable) val).setConf(this.conf); + } + + // Position stream to 'current' value + seekToCurrentValue(); + + if (!blockCompressed) { + val = deserializeValue(val); + + if (valIn.read() > 0) { + LOG.info("available bytes: " + valIn.available()); + throw new IOException(val+" read "+(valBuffer.getPosition()-keyLength) + + " bytes, should read " + + (valBuffer.getLength()-keyLength)); + } + } else { + // Get the value + int valLength = WritableUtils.readVInt(valLenIn); + val = deserializeValue(val); + + // Read another compressed 'value' + --noBufferedValues; + + // Sanity check + if (valLength < 0) { + LOG.debug(val + " is a zero-length value"); + } + } + return val; + + } + + @SuppressWarnings("unchecked") + private Object deserializeValue(Object val) throws IOException { + return valDeserializer.deserialize(val); + } + + /** Read the next key in the file into key, skipping its + * value. True if another entry exists, and false at end of file. */ + public synchronized boolean next(Writable key) throws IOException { + if (key.getClass() != getKeyClass()) + throw new IOException("wrong key class: "+key.getClass().getName() + +" is not "+keyClass); + + if (!blockCompressed) { + outBuf.reset(); + + keyLength = next(outBuf); + if (keyLength < 0) + return false; + + valBuffer.reset(outBuf.getData(), outBuf.getLength()); + + key.readFields(valBuffer); + valBuffer.mark(0); + if (valBuffer.getPosition() != keyLength) + throw new IOException(key + " read " + valBuffer.getPosition() + + " bytes, should read " + keyLength); + } else { + //Reset syncSeen + syncSeen = false; + + if (noBufferedKeys == 0) { + try { + readBlock(); + } catch (EOFException eof) { + return false; + } + } + + int keyLength = WritableUtils.readVInt(keyLenIn); + + // Sanity check + if (keyLength < 0) { + return false; + } + + //Read another compressed 'key' + key.readFields(keyIn); + --noBufferedKeys; + } + + return true; + } + + /** Read the next key/value pair in the file into key and + * val. Returns true if such a pair exists and false when at + * end of file */ + public synchronized boolean next(Writable key, Writable val) + throws IOException { + if (val.getClass() != getValueClass()) + throw new IOException("wrong value class: "+val+" is not "+valClass); + + boolean more = next(key); + + if (more) { + getCurrentValue(val); + } + + return more; + } + + /** + * Read and return the next record length, potentially skipping over + * a sync block. + * @return the length of the next record or -1 if there is no next record + * @throws IOException + */ + private synchronized int readRecordLength() throws IOException { + if (in.getPos() >= end) { + return -1; + } + int length = in.readInt(); + if (version > 1 && sync != null && + length == SYNC_ESCAPE) { // process a sync entry + in.readFully(syncCheck); // read syncCheck + if (!Arrays.equals(sync, syncCheck)) // check it + throw new IOException("File is corrupt!"); + syncSeen = true; + if (in.getPos() >= end) { + return -1; + } + length = in.readInt(); // re-read length + } else { + syncSeen = false; + } + + return length; + } + + /** Read the next key/value pair in the file into buffer. + * Returns the length of the key read, or -1 if at end of file. The length + * of the value may be computed by calling buffer.getLength() before and + * after calls to this method. */ + /** @deprecated Call {@link #nextRaw(DataOutputBuffer,SequenceFile.ValueBytes)}. */ + public synchronized int next(DataOutputBuffer buffer) throws IOException { + // Unsupported for block-compressed sequence files + if (blockCompressed) { + throw new IOException("Unsupported call for block-compressed" + + " SequenceFiles - use SequenceFile.Reader.next(DataOutputStream, ValueBytes)"); + } + try { + int length = readRecordLength(); + if (length == -1) { + return -1; + } + int keyLength = in.readInt(); + buffer.write(in, length); + return keyLength; + } catch (ChecksumException e) { // checksum failure + handleChecksumException(e); + return next(buffer); + } + } + + public ValueBytes createValueBytes() { + ValueBytes val = null; + if (!decompress || blockCompressed) { + val = new UncompressedBytes(); + } else { + val = new CompressedBytes(codec); + } + return val; + } + + /** + * Read 'raw' records. + * @param key - The buffer into which the key is read + * @param val - The 'raw' value + * @return Returns the total record length or -1 for end of file + * @throws IOException + */ + public synchronized int nextRaw(DataOutputBuffer key, ValueBytes val) + throws IOException { + if (!blockCompressed) { + int length = readRecordLength(); + if (length == -1) { + return -1; + } + int keyLength = in.readInt(); + int valLength = length - keyLength; + key.write(in, keyLength); + if (decompress) { + CompressedBytes value = (CompressedBytes)val; + value.reset(in, valLength); + } else { + UncompressedBytes value = (UncompressedBytes)val; + value.reset(in, valLength); + } + + return length; + } else { + //Reset syncSeen + syncSeen = false; + + // Read 'key' + if (noBufferedKeys == 0) { + if (in.getPos() >= end) + return -1; + + try { + readBlock(); + } catch (EOFException eof) { + return -1; + } + } + int keyLength = WritableUtils.readVInt(keyLenIn); + if (keyLength < 0) { + throw new IOException("zero length key found!"); + } + key.write(keyIn, keyLength); + --noBufferedKeys; + + // Read raw 'value' + seekToCurrentValue(); + int valLength = WritableUtils.readVInt(valLenIn); + UncompressedBytes rawValue = (UncompressedBytes)val; + rawValue.reset(valIn, valLength); + --noBufferedValues; + + return (keyLength+valLength); + } + + } + + /** + * Read 'raw' keys. + * @param key - The buffer into which the key is read + * @return Returns the key length or -1 for end of file + * @throws IOException + */ + public synchronized int nextRawKey(DataOutputBuffer key) + throws IOException { + if (!blockCompressed) { + recordLength = readRecordLength(); + if (recordLength == -1) { + return -1; + } + keyLength = in.readInt(); + key.write(in, keyLength); + return keyLength; + } else { + //Reset syncSeen + syncSeen = false; + + // Read 'key' + if (noBufferedKeys == 0) { + if (in.getPos() >= end) + return -1; + + try { + readBlock(); + } catch (EOFException eof) { + return -1; + } + } + int keyLength = WritableUtils.readVInt(keyLenIn); + if (keyLength < 0) { + throw new IOException("zero length key found!"); + } + key.write(keyIn, keyLength); + --noBufferedKeys; + + return keyLength; + } + + } + + /** Read the next key in the file, skipping its + * value. Return null at end of file. */ + public synchronized Object next(Object key) throws IOException { + if (key != null && key.getClass() != getKeyClass()) { + throw new IOException("wrong key class: "+key.getClass().getName() + +" is not "+keyClass); + } + + if (!blockCompressed) { + outBuf.reset(); + + keyLength = next(outBuf); + if (keyLength < 0) + return null; + + valBuffer.reset(outBuf.getData(), outBuf.getLength()); + + key = deserializeKey(key); + valBuffer.mark(0); + if (valBuffer.getPosition() != keyLength) + throw new IOException(key + " read " + valBuffer.getPosition() + + " bytes, should read " + keyLength); + } else { + //Reset syncSeen + syncSeen = false; + + if (noBufferedKeys == 0) { + try { + readBlock(); + } catch (EOFException eof) { + return null; + } + } + + int keyLength = WritableUtils.readVInt(keyLenIn); + + // Sanity check + if (keyLength < 0) { + return null; + } + + //Read another compressed 'key' + key = deserializeKey(key); + --noBufferedKeys; + } + + return key; + } + + @SuppressWarnings("unchecked") + private Object deserializeKey(Object key) throws IOException { + return keyDeserializer.deserialize(key); + } + + /** + * Read 'raw' values. + * @param val - The 'raw' value + * @return Returns the value length + * @throws IOException + */ + public synchronized int nextRawValue(ValueBytes val) + throws IOException { + + // Position stream to current value + seekToCurrentValue(); + + if (!blockCompressed) { + int valLength = recordLength - keyLength; + if (decompress) { + CompressedBytes value = (CompressedBytes)val; + value.reset(in, valLength); + } else { + UncompressedBytes value = (UncompressedBytes)val; + value.reset(in, valLength); + } + + return valLength; + } else { + int valLength = WritableUtils.readVInt(valLenIn); + UncompressedBytes rawValue = (UncompressedBytes)val; + rawValue.reset(valIn, valLength); + --noBufferedValues; + return valLength; + } + + } + + private void handleChecksumException(ChecksumException e) + throws IOException { + if (this.conf.getBoolean("io.skip.checksum.errors", false)) { + LOG.warn("Bad checksum at "+getPosition()+". Skipping entries."); + sync(getPosition()+this.conf.getInt("io.bytes.per.checksum", 512)); + } else { + throw e; + } + } + + /** disables sync. often invoked for tmp files */ + synchronized void ignoreSync() { + sync = null; + } + + /** Set the current byte position in the input file. + * + *

The position passed must be a position returned by {@link + * SequenceFile.Writer#getLength()} when writing this file. To seek to an arbitrary + * position, use {@link SequenceFile.Reader#sync(long)}. + */ + public synchronized void seek(long position) throws IOException { + in.seek(position); + if (blockCompressed) { // trigger block read + noBufferedKeys = 0; + valuesDecompressed = true; + } + } + + /** Seek to the next sync mark past a given position.*/ + public synchronized void sync(long position) throws IOException { + if (position+SYNC_SIZE >= end) { + seek(end); + return; + } + + try { + seek(position+4); // skip escape + in.readFully(syncCheck); + int syncLen = sync.length; + for (int i = 0; in.getPos() < end; i++) { + int j = 0; + for (; j < syncLen; j++) { + if (sync[j] != syncCheck[(i+j)%syncLen]) + break; + } + if (j == syncLen) { + in.seek(in.getPos() - SYNC_SIZE); // position before sync + return; + } + syncCheck[i%syncLen] = in.readByte(); + } + } catch (ChecksumException e) { // checksum failure + handleChecksumException(e); + } + } + + /** Returns true iff the previous call to next passed a sync mark.*/ + public synchronized boolean syncSeen() { return syncSeen; } + + /** Return the current byte position in the input file. */ + public synchronized long getPosition() throws IOException { + return in.getPos(); + } + + /** Returns the name of the file. */ + public String toString() { + return file.toString(); + } + + } + + /** Sorts key/value pairs in a sequence-format file. + * + *

For best performance, applications should make sure that the {@link + * Writable#readFields(DataInput)} implementation of their keys is + * very efficient. In particular, it should avoid allocating memory. + */ + public static class Sorter { + + private RawComparator comparator; + + private MergeSort mergeSort; //the implementation of merge sort + + private Path[] inFiles; // when merging or sorting + + private Path outFile; + + private int memory; // bytes + private int factor; // merged per pass + + private FileSystem fs = null; + + private Class keyClass; + private Class valClass; + + private Configuration conf; + private Metadata metadata; + + private Progressable progressable = null; + + /** Sort and merge files containing the named classes. */ + public Sorter(FileSystem fs, Class keyClass, + Class valClass, Configuration conf) { + this(fs, WritableComparator.get(keyClass), keyClass, valClass, conf); + } + + /** Sort and merge using an arbitrary {@link RawComparator}. */ + public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, + Class valClass, Configuration conf) { + this(fs, comparator, keyClass, valClass, conf, new Metadata()); + } + + /** Sort and merge using an arbitrary {@link RawComparator}. */ + public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, + Class valClass, Configuration conf, Metadata metadata) { + this.fs = fs; + this.comparator = comparator; + this.keyClass = keyClass; + this.valClass = valClass; + this.memory = conf.getInt("io.sort.mb", 100) * 1024 * 1024; + this.factor = conf.getInt("io.sort.factor", 100); + this.conf = conf; + this.metadata = metadata; + } + + /** Set the number of streams to merge at once.*/ + public void setFactor(int factor) { this.factor = factor; } + + /** Get the number of streams to merge at once.*/ + public int getFactor() { return factor; } + + /** Set the total amount of buffer memory, in bytes.*/ + public void setMemory(int memory) { this.memory = memory; } + + /** Get the total amount of buffer memory, in bytes.*/ + public int getMemory() { return memory; } + + /** Set the progressable object in order to report progress. */ + public void setProgressable(Progressable progressable) { + this.progressable = progressable; + } + + /** + * Perform a file sort from a set of input files into an output file. + * @param inFiles the files to be sorted + * @param outFile the sorted output file + * @param deleteInput should the input files be deleted as they are read? + */ + public void sort(Path[] inFiles, Path outFile, + boolean deleteInput) throws IOException { + if (fs.exists(outFile)) { + throw new IOException("already exists: " + outFile); + } + + this.inFiles = inFiles; + this.outFile = outFile; + + int segments = sortPass(deleteInput); + if (segments > 1) { + mergePass(outFile.getParent()); + } + } + + /** + * Perform a file sort from a set of input files and return an iterator. + * @param inFiles the files to be sorted + * @param tempDir the directory where temp files are created during sort + * @param deleteInput should the input files be deleted as they are read? + * @return iterator the RawKeyValueIterator + */ + public RawKeyValueIterator sortAndIterate(Path[] inFiles, Path tempDir, + boolean deleteInput) throws IOException { + Path outFile = new Path(tempDir + Path.SEPARATOR + "all.2"); + if (fs.exists(outFile)) { + throw new IOException("already exists: " + outFile); + } + this.inFiles = inFiles; + //outFile will basically be used as prefix for temp files in the cases + //where sort outputs multiple sorted segments. For the single segment + //case, the outputFile itself will contain the sorted data for that + //segment + this.outFile = outFile; + + int segments = sortPass(deleteInput); + if (segments > 1) + return merge(outFile.suffix(".0"), outFile.suffix(".0.index"), + tempDir); + else if (segments == 1) + return merge(new Path[]{outFile}, true, tempDir); + else return null; + } + + /** + * The backwards compatible interface to sort. + * @param inFile the input file to sort + * @param outFile the sorted output file + */ + public void sort(Path inFile, Path outFile) throws IOException { + sort(new Path[]{inFile}, outFile, false); + } + + private int sortPass(boolean deleteInput) throws IOException { + LOG.debug("running sort pass"); + SortPass sortPass = new SortPass(); // make the SortPass + sortPass.setProgressable(progressable); + mergeSort = new MergeSort(sortPass.new SeqFileComparator()); + try { + return sortPass.run(deleteInput); // run it + } finally { + sortPass.close(); // close it + } + } + + private class SortPass { + private int memoryLimit = memory/4; + private int recordLimit = 1000000; + + private DataOutputBuffer rawKeys = new DataOutputBuffer(); + private byte[] rawBuffer; + + private int[] keyOffsets = new int[1024]; + private int[] pointers = new int[keyOffsets.length]; + private int[] pointersCopy = new int[keyOffsets.length]; + private int[] keyLengths = new int[keyOffsets.length]; + private ValueBytes[] rawValues = new ValueBytes[keyOffsets.length]; + + private ArrayList segmentLengths = new ArrayList(); + + private Reader in = null; + private FSDataOutputStream out = null; + private FSDataOutputStream indexOut = null; + private Path outName; + + private Progressable progressable = null; + + public int run(boolean deleteInput) throws IOException { + int segments = 0; + int currentFile = 0; + boolean atEof = (currentFile >= inFiles.length); + boolean isCompressed = false; + boolean isBlockCompressed = false; + CompressionCodec codec = null; + segmentLengths.clear(); + if (atEof) { + return 0; + } + + // Initialize + in = new Reader(fs, inFiles[currentFile], conf); + isCompressed = in.isCompressed(); + isBlockCompressed = in.isBlockCompressed(); + codec = in.getCompressionCodec(); + + for (int i=0; i < rawValues.length; ++i) { + rawValues[i] = null; + } + + while (!atEof) { + int count = 0; + int bytesProcessed = 0; + rawKeys.reset(); + while (!atEof && + bytesProcessed < memoryLimit && count < recordLimit) { + + // Read a record into buffer + // Note: Attempt to re-use 'rawValue' as far as possible + int keyOffset = rawKeys.getLength(); + ValueBytes rawValue = + (count == keyOffsets.length || rawValues[count] == null) ? + in.createValueBytes() : + rawValues[count]; + int recordLength = in.nextRaw(rawKeys, rawValue); + if (recordLength == -1) { + in.close(); + if (deleteInput) { + fs.delete(inFiles[currentFile], true); + } + currentFile += 1; + atEof = currentFile >= inFiles.length; + if (!atEof) { + in = new Reader(fs, inFiles[currentFile], conf); + } else { + in = null; + } + continue; + } + + int keyLength = rawKeys.getLength() - keyOffset; + + if (count == keyOffsets.length) + grow(); + + keyOffsets[count] = keyOffset; // update pointers + pointers[count] = count; + keyLengths[count] = keyLength; + rawValues[count] = rawValue; + + bytesProcessed += recordLength; + count++; + } + + // buffer is full -- sort & flush it + LOG.debug("flushing segment " + segments); + rawBuffer = rawKeys.getData(); + sort(count); + // indicate we're making progress + if (progressable != null) { + progressable.progress(); + } + flush(count, bytesProcessed, isCompressed, isBlockCompressed, codec, + segments==0 && atEof); + segments++; + } + return segments; + } + + public void close() throws IOException { + if (in != null) { + in.close(); + } + if (out != null) { + out.close(); + } + if (indexOut != null) { + indexOut.close(); + } + } + + private void grow() { + int newLength = keyOffsets.length * 3 / 2; + keyOffsets = grow(keyOffsets, newLength); + pointers = grow(pointers, newLength); + pointersCopy = new int[newLength]; + keyLengths = grow(keyLengths, newLength); + rawValues = grow(rawValues, newLength); + } + + private int[] grow(int[] old, int newLength) { + int[] result = new int[newLength]; + System.arraycopy(old, 0, result, 0, old.length); + return result; + } + + private ValueBytes[] grow(ValueBytes[] old, int newLength) { + ValueBytes[] result = new ValueBytes[newLength]; + System.arraycopy(old, 0, result, 0, old.length); + for (int i=old.length; i < newLength; ++i) { + result[i] = null; + } + return result; + } + + private void flush(int count, int bytesProcessed, boolean isCompressed, + boolean isBlockCompressed, CompressionCodec codec, boolean done) + throws IOException { + if (out == null) { + outName = done ? outFile : outFile.suffix(".0"); + out = fs.create(outName); + if (!done) { + indexOut = fs.create(outName.suffix(".index")); + } + } + + long segmentStart = out.getPos(); + Writer writer = createWriter(conf, out, keyClass, valClass, + isCompressed, isBlockCompressed, codec, + done ? metadata : new Metadata()); + + if (!done) { + writer.sync = null; // disable sync on temp files + } + + for (int i = 0; i < count; i++) { // write in sorted order + int p = pointers[i]; + writer.appendRaw(rawBuffer, keyOffsets[p], keyLengths[p], rawValues[p]); + } + writer.close(); + + if (!done) { + // Save the segment length + WritableUtils.writeVLong(indexOut, segmentStart); + WritableUtils.writeVLong(indexOut, (out.getPos()-segmentStart)); + indexOut.flush(); + } + } + + private void sort(int count) { + System.arraycopy(pointers, 0, pointersCopy, 0, count); + mergeSort.mergeSort(pointersCopy, pointers, 0, count); + } + class SeqFileComparator implements Comparator { + public int compare(IntWritable I, IntWritable J) { + return comparator.compare(rawBuffer, keyOffsets[I.get()], + keyLengths[I.get()], rawBuffer, + keyOffsets[J.get()], keyLengths[J.get()]); + } + } + + /** set the progressable object in order to report progress */ + public void setProgressable(Progressable progressable) + { + this.progressable = progressable; + } + + } // SequenceFile.Sorter.SortPass + + /** The interface to iterate over raw keys/values of SequenceFiles. */ + public static interface RawKeyValueIterator { + /** Gets the current raw key + * @return DataOutputBuffer + * @throws IOException + */ + DataOutputBuffer getKey() throws IOException; + /** Gets the current raw value + * @return ValueBytes + * @throws IOException + */ + ValueBytes getValue() throws IOException; + /** Sets up the current key and value (for getKey and getValue) + * @return true if there exists a key/value, false otherwise + * @throws IOException + */ + boolean next() throws IOException; + /** closes the iterator so that the underlying streams can be closed + * @throws IOException + */ + void close() throws IOException; + /** Gets the Progress object; this has a float (0.0 - 1.0) + * indicating the bytes processed by the iterator so far + */ + Progress getProgress(); + } + + /** + * Merges the list of segments of type SegmentDescriptor + * @param segments the list of SegmentDescriptors + * @param tmpDir the directory to write temporary files into + * @return RawKeyValueIterator + * @throws IOException + */ + public RawKeyValueIterator merge(List segments, + Path tmpDir) + throws IOException { + // pass in object to report progress, if present + MergeQueue mQueue = new MergeQueue(segments, tmpDir, progressable); + return mQueue.merge(); + } + + /** + * Merges the contents of files passed in Path[] using a max factor value + * that is already set + * @param inNames the array of path names + * @param deleteInputs true if the input files should be deleted when + * unnecessary + * @param tmpDir the directory to write temporary files into + * @return RawKeyValueIteratorMergeQueue + * @throws IOException + */ + public RawKeyValueIterator merge(Path [] inNames, boolean deleteInputs, + Path tmpDir) + throws IOException { + return merge(inNames, deleteInputs, + (inNames.length < factor) ? inNames.length : factor, + tmpDir); + } + + /** + * Merges the contents of files passed in Path[] + * @param inNames the array of path names + * @param deleteInputs true if the input files should be deleted when + * unnecessary + * @param factor the factor that will be used as the maximum merge fan-in + * @param tmpDir the directory to write temporary files into + * @return RawKeyValueIteratorMergeQueue + * @throws IOException + */ + public RawKeyValueIterator merge(Path [] inNames, boolean deleteInputs, + int factor, Path tmpDir) + throws IOException { + //get the segments from inNames + ArrayList a = new ArrayList (); + for (int i = 0; i < inNames.length; i++) { + SegmentDescriptor s = new SegmentDescriptor(0, + fs.getFileStatus(inNames[i]).getLen(), inNames[i]); + s.preserveInput(!deleteInputs); + s.doSync(); + a.add(s); + } + this.factor = factor; + MergeQueue mQueue = new MergeQueue(a, tmpDir, progressable); + return mQueue.merge(); + } + + /** + * Merges the contents of files passed in Path[] + * @param inNames the array of path names + * @param tempDir the directory for creating temp files during merge + * @param deleteInputs true if the input files should be deleted when + * unnecessary + * @return RawKeyValueIteratorMergeQueue + * @throws IOException + */ + public RawKeyValueIterator merge(Path [] inNames, Path tempDir, + boolean deleteInputs) + throws IOException { + //outFile will basically be used as prefix for temp files for the + //intermediate merge outputs + this.outFile = new Path(tempDir + Path.SEPARATOR + "merged"); + //get the segments from inNames + ArrayList a = new ArrayList (); + for (int i = 0; i < inNames.length; i++) { + SegmentDescriptor s = new SegmentDescriptor(0, + fs.getFileStatus(inNames[i]).getLen(), inNames[i]); + s.preserveInput(!deleteInputs); + s.doSync(); + a.add(s); + } + factor = (inNames.length < factor) ? inNames.length : factor; + // pass in object to report progress, if present + MergeQueue mQueue = new MergeQueue(a, tempDir, progressable); + return mQueue.merge(); + } + + /** + * Clones the attributes (like compression of the input file and creates a + * corresponding Writer + * @param inputFile the path of the input file whose attributes should be + * cloned + * @param outputFile the path of the output file + * @param prog the Progressable to report status during the file write + * @return Writer + * @throws IOException + */ + public Writer cloneFileAttributes(Path inputFile, Path outputFile, + Progressable prog) + throws IOException { + FileSystem srcFileSys = inputFile.getFileSystem(conf); + Reader reader = new Reader(srcFileSys, inputFile, 4096, conf, true); + boolean compress = reader.isCompressed(); + boolean blockCompress = reader.isBlockCompressed(); + CompressionCodec codec = reader.getCompressionCodec(); + reader.close(); + + Writer writer = createWriter(outputFile.getFileSystem(conf), conf, + outputFile, keyClass, valClass, compress, + blockCompress, codec, prog, + new Metadata()); + return writer; + } + + /** + * Writes records from RawKeyValueIterator into a file represented by the + * passed writer + * @param records the RawKeyValueIterator + * @param writer the Writer created earlier + * @throws IOException + */ + public void writeFile(RawKeyValueIterator records, Writer writer) + throws IOException { + while(records.next()) { + writer.appendRaw(records.getKey().getData(), 0, + records.getKey().getLength(), records.getValue()); + } + writer.sync(); + } + + /** Merge the provided files. + * @param inFiles the array of input path names + * @param outFile the final output file + * @throws IOException + */ + public void merge(Path[] inFiles, Path outFile) throws IOException { + if (fs.exists(outFile)) { + throw new IOException("already exists: " + outFile); + } + RawKeyValueIterator r = merge(inFiles, false, outFile.getParent()); + Writer writer = cloneFileAttributes(inFiles[0], outFile, null); + + writeFile(r, writer); + + writer.close(); + } + + /** sort calls this to generate the final merged output */ + private int mergePass(Path tmpDir) throws IOException { + LOG.debug("running merge pass"); + Writer writer = cloneFileAttributes( + outFile.suffix(".0"), outFile, null); + RawKeyValueIterator r = merge(outFile.suffix(".0"), + outFile.suffix(".0.index"), tmpDir); + writeFile(r, writer); + + writer.close(); + return 0; + } + + /** Used by mergePass to merge the output of the sort + * @param inName the name of the input file containing sorted segments + * @param indexIn the offsets of the sorted segments + * @param tmpDir the relative directory to store intermediate results in + * @return RawKeyValueIterator + * @throws IOException + */ + private RawKeyValueIterator merge(Path inName, Path indexIn, Path tmpDir) + throws IOException { + //get the segments from indexIn + //we create a SegmentContainer so that we can track segments belonging to + //inName and delete inName as soon as we see that we have looked at all + //the contained segments during the merge process & hence don't need + //them anymore + SegmentContainer container = new SegmentContainer(inName, indexIn); + MergeQueue mQueue = new MergeQueue(container.getSegmentList(), tmpDir, progressable); + return mQueue.merge(); + } + + /** This class implements the core of the merge logic */ + private class MergeQueue extends PriorityQueue + implements RawKeyValueIterator { + private boolean compress; + private boolean blockCompress; + private DataOutputBuffer rawKey = new DataOutputBuffer(); + private ValueBytes rawValue; + private long totalBytesProcessed; + private float progPerByte; + private Progress mergeProgress = new Progress(); + private Path tmpDir; + private Progressable progress = null; //handle to the progress reporting object + private SegmentDescriptor minSegment; + + //a TreeMap used to store the segments sorted by size (segment offset and + //segment path name is used to break ties between segments of same sizes) + private Map sortedSegmentSizes = + new TreeMap(); + + @SuppressWarnings("unchecked") + public void put(SegmentDescriptor stream) throws IOException { + if (size() == 0) { + compress = stream.in.isCompressed(); + blockCompress = stream.in.isBlockCompressed(); + } else if (compress != stream.in.isCompressed() || + blockCompress != stream.in.isBlockCompressed()) { + throw new IOException("All merged files must be compressed or not."); + } + super.put(stream); + } + + /** + * A queue of file segments to merge + * @param segments the file segments to merge + * @param tmpDir a relative local directory to save intermediate files in + * @param progress the reference to the Progressable object + */ + public MergeQueue(List segments, + Path tmpDir, Progressable progress) { + int size = segments.size(); + for (int i = 0; i < size; i++) { + sortedSegmentSizes.put(segments.get(i), null); + } + this.tmpDir = tmpDir; + this.progress = progress; + } + protected boolean lessThan(Object a, Object b) { + // indicate we're making progress + if (progress != null) { + progress.progress(); + } + SegmentDescriptor msa = (SegmentDescriptor)a; + SegmentDescriptor msb = (SegmentDescriptor)b; + return comparator.compare(msa.getKey().getData(), 0, + msa.getKey().getLength(), msb.getKey().getData(), 0, + msb.getKey().getLength()) < 0; + } + public void close() throws IOException { + SegmentDescriptor ms; // close inputs + while ((ms = (SegmentDescriptor)pop()) != null) { + ms.cleanup(); + } + minSegment = null; + } + public DataOutputBuffer getKey() throws IOException { + return rawKey; + } + public ValueBytes getValue() throws IOException { + return rawValue; + } + public boolean next() throws IOException { + if (size() == 0) + return false; + if (minSegment != null) { + //minSegment is non-null for all invocations of next except the first + //one. For the first invocation, the priority queue is ready for use + //but for the subsequent invocations, first adjust the queue + adjustPriorityQueue(minSegment); + if (size() == 0) { + minSegment = null; + return false; + } + } + minSegment = (SegmentDescriptor)top(); + long startPos = minSegment.in.getPosition(); // Current position in stream + //save the raw key reference + rawKey = minSegment.getKey(); + //load the raw value. Re-use the existing rawValue buffer + if (rawValue == null) { + rawValue = minSegment.in.createValueBytes(); + } + minSegment.nextRawValue(rawValue); + long endPos = minSegment.in.getPosition(); // End position after reading value + updateProgress(endPos - startPos); + return true; + } + + public Progress getProgress() { + return mergeProgress; + } + + private void adjustPriorityQueue(SegmentDescriptor ms) throws IOException{ + long startPos = ms.in.getPosition(); // Current position in stream + boolean hasNext = ms.nextRawKey(); + long endPos = ms.in.getPosition(); // End position after reading key + updateProgress(endPos - startPos); + if (hasNext) { + adjustTop(); + } else { + pop(); + ms.cleanup(); + } + } + + private void updateProgress(long bytesProcessed) { + totalBytesProcessed += bytesProcessed; + if (progPerByte > 0) { + mergeProgress.set(totalBytesProcessed * progPerByte); + } + } + + /** This is the single level merge that is called multiple times + * depending on the factor size and the number of segments + * @return RawKeyValueIterator + * @throws IOException + */ + public RawKeyValueIterator merge() throws IOException { + //create the MergeStreams from the sorted map created in the constructor + //and dump the final output to a file + int numSegments = sortedSegmentSizes.size(); + int origFactor = factor; + int passNo = 1; + LocalDirAllocator lDirAlloc = new LocalDirAllocator("io.seqfile.local.dir"); + do { + //get the factor for this pass of merge + factor = getPassFactor(passNo, numSegments); + List segmentsToMerge = + new ArrayList(); + int segmentsConsidered = 0; + int numSegmentsToConsider = factor; + while (true) { + //extract the smallest 'factor' number of segment pointers from the + //TreeMap. Call cleanup on the empty segments (no key/value data) + SegmentDescriptor[] mStream = + getSegmentDescriptors(numSegmentsToConsider); + for (int i = 0; i < mStream.length; i++) { + if (mStream[i].nextRawKey()) { + segmentsToMerge.add(mStream[i]); + segmentsConsidered++; + // Count the fact that we read some bytes in calling nextRawKey() + updateProgress(mStream[i].in.getPosition()); + } + else { + mStream[i].cleanup(); + numSegments--; //we ignore this segment for the merge + } + } + //if we have the desired number of segments + //or looked at all available segments, we break + if (segmentsConsidered == factor || + sortedSegmentSizes.size() == 0) { + break; + } + + numSegmentsToConsider = factor - segmentsConsidered; + } + //feed the streams to the priority queue + initialize(segmentsToMerge.size()); clear(); + for (int i = 0; i < segmentsToMerge.size(); i++) { + put(segmentsToMerge.get(i)); + } + //if we have lesser number of segments remaining, then just return the + //iterator, else do another single level merge + if (numSegments <= factor) { + //calculate the length of the remaining segments. Required for + //calculating the merge progress + long totalBytes = 0; + for (int i = 0; i < segmentsToMerge.size(); i++) { + totalBytes += segmentsToMerge.get(i).segmentLength; + } + if (totalBytes != 0) //being paranoid + progPerByte = 1.0f / (float)totalBytes; + //reset factor to what it originally was + factor = origFactor; + return this; + } else { + //we want to spread the creation of temp files on multiple disks if + //available under the space constraints + long approxOutputSize = 0; + for (SegmentDescriptor s : segmentsToMerge) { + approxOutputSize += s.segmentLength + + ChecksumFileSystem.getApproxChkSumLength( + s.segmentLength); + } + Path tmpFilename = + new Path(tmpDir, "intermediate").suffix("." + passNo); + + Path outputFile = lDirAlloc.getLocalPathForWrite( + tmpFilename.toString(), + approxOutputSize, conf); + LOG.debug("writing intermediate results to " + outputFile); + Writer writer = cloneFileAttributes( + fs.makeQualified(segmentsToMerge.get(0).segmentPathName), + fs.makeQualified(outputFile), null); + writer.sync = null; //disable sync for temp files + writeFile(this, writer); + writer.close(); + + //we finished one single level merge; now clean up the priority + //queue + this.close(); + + SegmentDescriptor tempSegment = + new SegmentDescriptor(0, + fs.getFileStatus(outputFile).getLen(), outputFile); + //put the segment back in the TreeMap + sortedSegmentSizes.put(tempSegment, null); + numSegments = sortedSegmentSizes.size(); + passNo++; + } + //we are worried about only the first pass merge factor. So reset the + //factor to what it originally was + factor = origFactor; + } while(true); + } + + //Hadoop-591 + public int getPassFactor(int passNo, int numSegments) { + if (passNo > 1 || numSegments <= factor || factor == 1) + return factor; + int mod = (numSegments - 1) % (factor - 1); + if (mod == 0) + return factor; + return mod + 1; + } + + /** Return (& remove) the requested number of segment descriptors from the + * sorted map. + */ + public SegmentDescriptor[] getSegmentDescriptors(int numDescriptors) { + if (numDescriptors > sortedSegmentSizes.size()) + numDescriptors = sortedSegmentSizes.size(); + SegmentDescriptor[] SegmentDescriptors = + new SegmentDescriptor[numDescriptors]; + Iterator iter = sortedSegmentSizes.keySet().iterator(); + int i = 0; + while (i < numDescriptors) { + SegmentDescriptors[i++] = (SegmentDescriptor)iter.next(); + iter.remove(); + } + return SegmentDescriptors; + } + } // SequenceFile.Sorter.MergeQueue + + /** This class defines a merge segment. This class can be subclassed to + * provide a customized cleanup method implementation. In this + * implementation, cleanup closes the file handle and deletes the file + */ + public class SegmentDescriptor implements Comparable { + + long segmentOffset; //the start of the segment in the file + long segmentLength; //the length of the segment + Path segmentPathName; //the path name of the file containing the segment + boolean ignoreSync = true; //set to true for temp files + private Reader in = null; + private DataOutputBuffer rawKey = null; //this will hold the current key + private boolean preserveInput = false; //delete input segment files? + + /** Constructs a segment + * @param segmentOffset the offset of the segment in the file + * @param segmentLength the length of the segment + * @param segmentPathName the path name of the file containing the segment + */ + public SegmentDescriptor (long segmentOffset, long segmentLength, + Path segmentPathName) { + this.segmentOffset = segmentOffset; + this.segmentLength = segmentLength; + this.segmentPathName = segmentPathName; + } + + /** Do the sync checks */ + public void doSync() {ignoreSync = false;} + + /** Whether to delete the files when no longer needed */ + public void preserveInput(boolean preserve) { + preserveInput = preserve; + } + + public boolean shouldPreserveInput() { + return preserveInput; + } + + public int compareTo(Object o) { + SegmentDescriptor that = (SegmentDescriptor)o; + if (this.segmentLength != that.segmentLength) { + return (this.segmentLength < that.segmentLength ? -1 : 1); + } + if (this.segmentOffset != that.segmentOffset) { + return (this.segmentOffset < that.segmentOffset ? -1 : 1); + } + return (this.segmentPathName.toString()). + compareTo(that.segmentPathName.toString()); + } + + public boolean equals(Object o) { + if (!(o instanceof SegmentDescriptor)) { + return false; + } + SegmentDescriptor that = (SegmentDescriptor)o; + if (this.segmentLength == that.segmentLength && + this.segmentOffset == that.segmentOffset && + this.segmentPathName.toString().equals( + that.segmentPathName.toString())) { + return true; + } + return false; + } + + public int hashCode() { + return 37 * 17 + (int) (segmentOffset^(segmentOffset>>>32)); + } + + /** Fills up the rawKey object with the key returned by the Reader + * @return true if there is a key returned; false, otherwise + * @throws IOException + */ + public boolean nextRawKey() throws IOException { + if (in == null) { + int bufferSize = conf.getInt("io.file.buffer.size", 4096); + if (fs.getUri().getScheme().startsWith("ramfs")) { + bufferSize = conf.getInt("io.bytes.per.checksum", 512); + } + Reader reader = new Reader(fs, segmentPathName, + bufferSize, segmentOffset, + segmentLength, conf, false); + + //sometimes we ignore syncs especially for temp merge files + if (ignoreSync) reader.ignoreSync(); + + if (reader.getKeyClass() != keyClass) + throw new IOException("wrong key class: " + reader.getKeyClass() + + " is not " + keyClass); + if (reader.getValueClass() != valClass) + throw new IOException("wrong value class: "+reader.getValueClass()+ + " is not " + valClass); + this.in = reader; + rawKey = new DataOutputBuffer(); + } + rawKey.reset(); + int keyLength = + in.nextRawKey(rawKey); + return (keyLength >= 0); + } + + /** Fills up the passed rawValue with the value corresponding to the key + * read earlier + * @param rawValue + * @return the length of the value + * @throws IOException + */ + public int nextRawValue(ValueBytes rawValue) throws IOException { + int valLength = in.nextRawValue(rawValue); + return valLength; + } + + /** Returns the stored rawKey */ + public DataOutputBuffer getKey() { + return rawKey; + } + + /** closes the underlying reader */ + private void close() throws IOException { + this.in.close(); + this.in = null; + } + + /** The default cleanup. Subclasses can override this with a custom + * cleanup + */ + public void cleanup() throws IOException { + close(); + if (!preserveInput) { + fs.delete(segmentPathName, true); + } + } + } // SequenceFile.Sorter.SegmentDescriptor + + /** This class provisions multiple segments contained within a single + * file + */ + private class LinkedSegmentsDescriptor extends SegmentDescriptor { + + SegmentContainer parentContainer = null; + + /** Constructs a segment + * @param segmentOffset the offset of the segment in the file + * @param segmentLength the length of the segment + * @param segmentPathName the path name of the file containing the segment + * @param parent the parent SegmentContainer that holds the segment + */ + public LinkedSegmentsDescriptor (long segmentOffset, long segmentLength, + Path segmentPathName, SegmentContainer parent) { + super(segmentOffset, segmentLength, segmentPathName); + this.parentContainer = parent; + } + /** The default cleanup. Subclasses can override this with a custom + * cleanup + */ + public void cleanup() throws IOException { + super.close(); + if (super.shouldPreserveInput()) return; + parentContainer.cleanup(); + } + + public boolean equals(Object o) { + if (!(o instanceof LinkedSegmentsDescriptor)) { + return false; + } + return super.equals(o); + } + } //SequenceFile.Sorter.LinkedSegmentsDescriptor + + /** The class that defines a container for segments to be merged. Primarily + * required to delete temp files as soon as all the contained segments + * have been looked at */ + private class SegmentContainer { + private int numSegmentsCleanedUp = 0; //track the no. of segment cleanups + private int numSegmentsContained; //# of segments contained + private Path inName; //input file from where segments are created + + //the list of segments read from the file + private ArrayList segments = + new ArrayList (); + /** This constructor is there primarily to serve the sort routine that + * generates a single output file with an associated index file */ + public SegmentContainer(Path inName, Path indexIn) throws IOException { + //get the segments from indexIn + FSDataInputStream fsIndexIn = fs.open(indexIn); + long end = fs.getFileStatus(indexIn).getLen(); + while (fsIndexIn.getPos() < end) { + long segmentOffset = WritableUtils.readVLong(fsIndexIn); + long segmentLength = WritableUtils.readVLong(fsIndexIn); + Path segmentName = inName; + segments.add(new LinkedSegmentsDescriptor(segmentOffset, + segmentLength, segmentName, this)); + } + fsIndexIn.close(); + fs.delete(indexIn, true); + numSegmentsContained = segments.size(); + this.inName = inName; + } + + public List getSegmentList() { + return segments; + } + public void cleanup() throws IOException { + numSegmentsCleanedUp++; + if (numSegmentsCleanedUp == numSegmentsContained) { + fs.delete(inName, true); + } + } + } //SequenceFile.Sorter.SegmentContainer + + } // SequenceFile.Sorter + +} // SequenceFile diff --git a/src/java/org/apache/hadoop/io/SetFile.java b/src/java/org/apache/hadoop/io/SetFile.java new file mode 100644 index 00000000000..a0cb84922aa --- /dev/null +++ b/src/java/org/apache/hadoop/io/SetFile.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +import org.apache.hadoop.fs.*; +import org.apache.hadoop.conf.*; + +/** A file-based set of keys. */ +public class SetFile extends MapFile { + + protected SetFile() {} // no public ctor + + /** + * Write a new set file. + */ + public static class Writer extends MapFile.Writer { + + /** Create the named set for keys of the named class. + * @deprecated pass a Configuration too + */ + public Writer(FileSystem fs, String dirName, + Class keyClass) throws IOException { + super(new Configuration(), fs, dirName, keyClass, NullWritable.class); + } + + /** Create a set naming the element class and compression type. */ + public Writer(Configuration conf, FileSystem fs, String dirName, + Class keyClass, + SequenceFile.CompressionType compress) + throws IOException { + this(conf, fs, dirName, WritableComparator.get(keyClass), compress); + } + + /** Create a set naming the element comparator and compression type. */ + public Writer(Configuration conf, FileSystem fs, String dirName, + WritableComparator comparator, + SequenceFile.CompressionType compress) throws IOException { + super(conf, fs, dirName, comparator, NullWritable.class, compress); + } + + /** Append a key to a set. The key must be strictly greater than the + * previous key added to the set. */ + public void append(WritableComparable key) throws IOException{ + append(key, NullWritable.get()); + } + } + + /** Provide access to an existing set file. */ + public static class Reader extends MapFile.Reader { + + /** Construct a set reader for the named set.*/ + public Reader(FileSystem fs, String dirName, Configuration conf) throws IOException { + super(fs, dirName, conf); + } + + /** Construct a set reader for the named set using the named comparator.*/ + public Reader(FileSystem fs, String dirName, WritableComparator comparator, Configuration conf) + throws IOException { + super(fs, dirName, comparator, conf); + } + + // javadoc inherited + public boolean seek(WritableComparable key) + throws IOException { + return super.seek(key); + } + + /** Read the next key in a set into key. Returns + * true if such a key exists and false when at the end of the set. */ + public boolean next(WritableComparable key) + throws IOException { + return next(key, NullWritable.get()); + } + + /** Read the matching key from a set into key. + * Returns key, or null if no match exists. */ + public WritableComparable get(WritableComparable key) + throws IOException { + if (seek(key)) { + next(key); + return key; + } else + return null; + } + } + +} diff --git a/src/java/org/apache/hadoop/io/SortedMapWritable.java b/src/java/org/apache/hadoop/io/SortedMapWritable.java new file mode 100644 index 00000000000..53a28dddd3a --- /dev/null +++ b/src/java/org/apache/hadoop/io/SortedMapWritable.java @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Collection; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.hadoop.util.ReflectionUtils; + +/** + * A Writable SortedMap. + */ +public class SortedMapWritable extends AbstractMapWritable + implements SortedMap { + + private SortedMap instance; + + /** default constructor. */ + public SortedMapWritable() { + super(); + this.instance = new TreeMap(); + } + + /** + * Copy constructor. + * + * @param other the map to copy from + */ + public SortedMapWritable(SortedMapWritable other) { + this(); + copy(other); + } + + /** {@inheritDoc} */ + public Comparator comparator() { + // Returning null means we use the natural ordering of the keys + return null; + } + + /** {@inheritDoc} */ + public WritableComparable firstKey() { + return instance.firstKey(); + } + + /** {@inheritDoc} */ + public SortedMap + headMap(WritableComparable toKey) { + + return instance.headMap(toKey); + } + + /** {@inheritDoc} */ + public WritableComparable lastKey() { + return instance.lastKey(); + } + + /** {@inheritDoc} */ + public SortedMap + subMap(WritableComparable fromKey, WritableComparable toKey) { + + return instance.subMap(fromKey, toKey); + } + + /** {@inheritDoc} */ + public SortedMap + tailMap(WritableComparable fromKey) { + + return instance.tailMap(fromKey); + } + + /** {@inheritDoc} */ + public void clear() { + instance.clear(); + } + + /** {@inheritDoc} */ + public boolean containsKey(Object key) { + return instance.containsKey(key); + } + + /** {@inheritDoc} */ + public boolean containsValue(Object value) { + return instance.containsValue(value); + } + + /** {@inheritDoc} */ + public Set> entrySet() { + return instance.entrySet(); + } + + /** {@inheritDoc} */ + public Writable get(Object key) { + return instance.get(key); + } + + /** {@inheritDoc} */ + public boolean isEmpty() { + return instance.isEmpty(); + } + + /** {@inheritDoc} */ + public Set keySet() { + return instance.keySet(); + } + + /** {@inheritDoc} */ + public Writable put(WritableComparable key, Writable value) { + addToMap(key.getClass()); + addToMap(value.getClass()); + return instance.put(key, value); + } + + /** {@inheritDoc} */ + public void putAll(Map t) { + for (Map.Entry e: + t.entrySet()) { + + instance.put(e.getKey(), e.getValue()); + } + } + + /** {@inheritDoc} */ + public Writable remove(Object key) { + return instance.remove(key); + } + + /** {@inheritDoc} */ + public int size() { + return instance.size(); + } + + /** {@inheritDoc} */ + public Collection values() { + return instance.values(); + } + + /** {@inheritDoc} */ + @SuppressWarnings("unchecked") + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + + // Read the number of entries in the map + + int entries = in.readInt(); + + // Then read each key/value pair + + for (int i = 0; i < entries; i++) { + WritableComparable key = + (WritableComparable) ReflectionUtils.newInstance(getClass( + in.readByte()), getConf()); + + key.readFields(in); + + Writable value = (Writable) ReflectionUtils.newInstance(getClass( + in.readByte()), getConf()); + + value.readFields(in); + instance.put(key, value); + } + } + + /** {@inheritDoc} */ + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + + // Write out the number of entries in the map + + out.writeInt(instance.size()); + + // Then write out each key/value pair + + for (Map.Entry e: instance.entrySet()) { + out.writeByte(getId(e.getKey().getClass())); + e.getKey().write(out); + out.writeByte(getId(e.getValue().getClass())); + e.getValue().write(out); + } + } +} diff --git a/src/java/org/apache/hadoop/io/Stringifier.java b/src/java/org/apache/hadoop/io/Stringifier.java new file mode 100644 index 00000000000..e8dba8e05ec --- /dev/null +++ b/src/java/org/apache/hadoop/io/Stringifier.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.IOException; + +/** + * Stringifier interface offers two methods to convert an object + * to a string representation and restore the object given its + * string representation. + * @param the class of the objects to stringify + */ +public interface Stringifier extends java.io.Closeable { + + /** + * Converts the object to a string representation + * @param obj the object to convert + * @return the string representation of the object + * @throws IOException if the object cannot be converted + */ + public String toString(T obj) throws IOException; + + /** + * Restores the object from its string representation. + * @param str the string representation of the object + * @return restored object + * @throws IOException if the object cannot be restored + */ + public T fromString(String str) throws IOException; + + + /** + * Closes this object. + * @throws IOException if an I/O error occurs + * */ + public void close() throws IOException; + +} diff --git a/src/java/org/apache/hadoop/io/Text.java b/src/java/org/apache/hadoop/io/Text.java new file mode 100644 index 00000000000..19faa8768d6 --- /dev/null +++ b/src/java/org/apache/hadoop/io/Text.java @@ -0,0 +1,594 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.IOException; +import java.io.DataInput; +import java.io.DataOutput; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.MalformedInputException; +import java.text.CharacterIterator; +import java.text.StringCharacterIterator; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** This class stores text using standard UTF8 encoding. It provides methods + * to serialize, deserialize, and compare texts at byte level. The type of + * length is integer and is serialized using zero-compressed format.

In + * addition, it provides methods for string traversal without converting the + * byte array to a string.

Also includes utilities for + * serializing/deserialing a string, coding/decoding a string, checking if a + * byte array contains valid UTF8 code, calculating the length of an encoded + * string. + */ +public class Text extends BinaryComparable + implements WritableComparable { + private static final Log LOG= LogFactory.getLog(Text.class); + + private static ThreadLocal ENCODER_FACTORY = + new ThreadLocal() { + protected CharsetEncoder initialValue() { + return Charset.forName("UTF-8").newEncoder(). + onMalformedInput(CodingErrorAction.REPORT). + onUnmappableCharacter(CodingErrorAction.REPORT); + } + }; + + private static ThreadLocal DECODER_FACTORY = + new ThreadLocal() { + protected CharsetDecoder initialValue() { + return Charset.forName("UTF-8").newDecoder(). + onMalformedInput(CodingErrorAction.REPORT). + onUnmappableCharacter(CodingErrorAction.REPORT); + } + }; + + private static final byte [] EMPTY_BYTES = new byte[0]; + + private byte[] bytes; + private int length; + + public Text() { + bytes = EMPTY_BYTES; + } + + /** Construct from a string. + */ + public Text(String string) { + set(string); + } + + /** Construct from another text. */ + public Text(Text utf8) { + set(utf8); + } + + /** Construct from a byte array. + */ + public Text(byte[] utf8) { + set(utf8); + } + + /** + * Returns the raw bytes; however, only data up to {@link #getLength()} is + * valid. + */ + public byte[] getBytes() { + return bytes; + } + + /** Returns the number of bytes in the byte array */ + public int getLength() { + return length; + } + + /** + * Returns the Unicode Scalar Value (32-bit integer value) + * for the character at position. Note that this + * method avoids using the converter or doing String instatiation + * @return the Unicode scalar value at position or -1 + * if the position is invalid or points to a + * trailing byte + */ + public int charAt(int position) { + if (position > this.length) return -1; // too long + if (position < 0) return -1; // duh. + + ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position); + return bytesToCodePoint(bb.slice()); + } + + public int find(String what) { + return find(what, 0); + } + + /** + * Finds any occurence of what in the backing + * buffer, starting as position start. The starting + * position is measured in bytes and the return value is in + * terms of byte position in the buffer. The backing buffer is + * not converted to a string for this operation. + * @return byte position of the first occurence of the search + * string in the UTF-8 buffer or -1 if not found + */ + public int find(String what, int start) { + try { + ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length); + ByteBuffer tgt = encode(what); + byte b = tgt.get(); + src.position(start); + + while (src.hasRemaining()) { + if (b == src.get()) { // matching first byte + src.mark(); // save position in loop + tgt.mark(); // save position in target + boolean found = true; + int pos = src.position()-1; + while (tgt.hasRemaining()) { + if (!src.hasRemaining()) { // src expired first + tgt.reset(); + src.reset(); + found = false; + break; + } + if (!(tgt.get() == src.get())) { + tgt.reset(); + src.reset(); + found = false; + break; // no match + } + } + if (found) return pos; + } + } + return -1; // not found + } catch (CharacterCodingException e) { + // can't get here + e.printStackTrace(); + return -1; + } + } + /** Set to contain the contents of a string. + */ + public void set(String string) { + try { + ByteBuffer bb = encode(string, true); + bytes = bb.array(); + length = bb.limit(); + }catch(CharacterCodingException e) { + throw new RuntimeException("Should not have happened " + e.toString()); + } + } + + /** Set to a utf8 byte array + */ + public void set(byte[] utf8) { + set(utf8, 0, utf8.length); + } + + /** copy a text. */ + public void set(Text other) { + set(other.getBytes(), 0, other.getLength()); + } + + /** + * Set the Text to range of bytes + * @param utf8 the data to copy from + * @param start the first position of the new string + * @param len the number of bytes of the new string + */ + public void set(byte[] utf8, int start, int len) { + setCapacity(len, false); + System.arraycopy(utf8, start, bytes, 0, len); + this.length = len; + } + + /** + * Append a range of bytes to the end of the given text + * @param utf8 the data to copy from + * @param start the first position to append from utf8 + * @param len the number of bytes to append + */ + public void append(byte[] utf8, int start, int len) { + setCapacity(length + len, true); + System.arraycopy(utf8, start, bytes, length, len); + length += len; + } + + /** + * Clear the string to empty. + */ + public void clear() { + length = 0; + } + + /* + * Sets the capacity of this Text object to at least + * len bytes. If the current buffer is longer, + * then the capacity and existing content of the buffer are + * unchanged. If len is larger + * than the current capacity, the Text object's capacity is + * increased to match. + * @param len the number of bytes we need + * @param keepData should the old data be kept + */ + private void setCapacity(int len, boolean keepData) { + if (bytes == null || bytes.length < len) { + byte[] newBytes = new byte[len]; + if (bytes != null && keepData) { + System.arraycopy(bytes, 0, newBytes, 0, length); + } + bytes = newBytes; + } + } + + /** + * Convert text back to string + * @see java.lang.Object#toString() + */ + public String toString() { + try { + return decode(bytes, 0, length); + } catch (CharacterCodingException e) { + throw new RuntimeException("Should not have happened " + e.toString()); + } + } + + /** deserialize + */ + public void readFields(DataInput in) throws IOException { + int newLength = WritableUtils.readVInt(in); + setCapacity(newLength, false); + in.readFully(bytes, 0, newLength); + length = newLength; + } + + /** Skips over one Text in the input. */ + public static void skip(DataInput in) throws IOException { + int length = WritableUtils.readVInt(in); + WritableUtils.skipFully(in, length); + } + + /** serialize + * write this object to out + * length uses zero-compressed encoding + * @see Writable#write(DataOutput) + */ + public void write(DataOutput out) throws IOException { + WritableUtils.writeVInt(out, length); + out.write(bytes, 0, length); + } + + /** Returns true iff o is a Text with the same contents. */ + public boolean equals(Object o) { + if (o instanceof Text) + return super.equals(o); + return false; + } + + public int hashCode() { + return super.hashCode(); + } + + /** A WritableComparator optimized for Text keys. */ + public static class Comparator extends WritableComparator { + public Comparator() { + super(Text.class); + } + + public int compare(byte[] b1, int s1, int l1, + byte[] b2, int s2, int l2) { + int n1 = WritableUtils.decodeVIntSize(b1[s1]); + int n2 = WritableUtils.decodeVIntSize(b2[s2]); + return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2); + } + } + + static { + // register this comparator + WritableComparator.define(Text.class, new Comparator()); + } + + /// STATIC UTILITIES FROM HERE DOWN + /** + * Converts the provided byte array to a String using the + * UTF-8 encoding. If the input is malformed, + * replace by a default value. + */ + public static String decode(byte[] utf8) throws CharacterCodingException { + return decode(ByteBuffer.wrap(utf8), true); + } + + public static String decode(byte[] utf8, int start, int length) + throws CharacterCodingException { + return decode(ByteBuffer.wrap(utf8, start, length), true); + } + + /** + * Converts the provided byte array to a String using the + * UTF-8 encoding. If replace is true, then + * malformed input is replaced with the + * substitution character, which is U+FFFD. Otherwise the + * method throws a MalformedInputException. + */ + public static String decode(byte[] utf8, int start, int length, boolean replace) + throws CharacterCodingException { + return decode(ByteBuffer.wrap(utf8, start, length), replace); + } + + private static String decode(ByteBuffer utf8, boolean replace) + throws CharacterCodingException { + CharsetDecoder decoder = DECODER_FACTORY.get(); + if (replace) { + decoder.onMalformedInput( + java.nio.charset.CodingErrorAction.REPLACE); + decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + } + String str = decoder.decode(utf8).toString(); + // set decoder back to its default value: REPORT + if (replace) { + decoder.onMalformedInput(CodingErrorAction.REPORT); + decoder.onUnmappableCharacter(CodingErrorAction.REPORT); + } + return str; + } + + /** + * Converts the provided String to bytes using the + * UTF-8 encoding. If the input is malformed, + * invalid chars are replaced by a default value. + * @return ByteBuffer: bytes stores at ByteBuffer.array() + * and length is ByteBuffer.limit() + */ + + public static ByteBuffer encode(String string) + throws CharacterCodingException { + return encode(string, true); + } + + /** + * Converts the provided String to bytes using the + * UTF-8 encoding. If replace is true, then + * malformed input is replaced with the + * substitution character, which is U+FFFD. Otherwise the + * method throws a MalformedInputException. + * @return ByteBuffer: bytes stores at ByteBuffer.array() + * and length is ByteBuffer.limit() + */ + public static ByteBuffer encode(String string, boolean replace) + throws CharacterCodingException { + CharsetEncoder encoder = ENCODER_FACTORY.get(); + if (replace) { + encoder.onMalformedInput(CodingErrorAction.REPLACE); + encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + } + ByteBuffer bytes = + encoder.encode(CharBuffer.wrap(string.toCharArray())); + if (replace) { + encoder.onMalformedInput(CodingErrorAction.REPORT); + encoder.onUnmappableCharacter(CodingErrorAction.REPORT); + } + return bytes; + } + + /** Read a UTF8 encoded string from in + */ + public static String readString(DataInput in) throws IOException { + int length = WritableUtils.readVInt(in); + byte [] bytes = new byte[length]; + in.readFully(bytes, 0, length); + return decode(bytes); + } + + /** Write a UTF8 encoded string to out + */ + public static int writeString(DataOutput out, String s) throws IOException { + ByteBuffer bytes = encode(s); + int length = bytes.limit(); + WritableUtils.writeVInt(out, length); + out.write(bytes.array(), 0, length); + return length; + } + + ////// states for validateUTF8 + + private static final int LEAD_BYTE = 0; + + private static final int TRAIL_BYTE_1 = 1; + + private static final int TRAIL_BYTE = 2; + + /** + * Check if a byte array contains valid utf-8 + * @param utf8 byte array + * @throws MalformedInputException if the byte array contains invalid utf-8 + */ + public static void validateUTF8(byte[] utf8) throws MalformedInputException { + validateUTF8(utf8, 0, utf8.length); + } + + /** + * Check to see if a byte array is valid utf-8 + * @param utf8 the array of bytes + * @param start the offset of the first byte in the array + * @param len the length of the byte sequence + * @throws MalformedInputException if the byte array contains invalid bytes + */ + public static void validateUTF8(byte[] utf8, int start, int len) + throws MalformedInputException { + int count = start; + int leadByte = 0; + int length = 0; + int state = LEAD_BYTE; + while (count < start+len) { + int aByte = ((int) utf8[count] & 0xFF); + + switch (state) { + case LEAD_BYTE: + leadByte = aByte; + length = bytesFromUTF8[aByte]; + + switch (length) { + case 0: // check for ASCII + if (leadByte > 0x7F) + throw new MalformedInputException(count); + break; + case 1: + if (leadByte < 0xC2 || leadByte > 0xDF) + throw new MalformedInputException(count); + state = TRAIL_BYTE_1; + break; + case 2: + if (leadByte < 0xE0 || leadByte > 0xEF) + throw new MalformedInputException(count); + state = TRAIL_BYTE_1; + break; + case 3: + if (leadByte < 0xF0 || leadByte > 0xF4) + throw new MalformedInputException(count); + state = TRAIL_BYTE_1; + break; + default: + // too long! Longest valid UTF-8 is 4 bytes (lead + three) + // or if < 0 we got a trail byte in the lead byte position + throw new MalformedInputException(count); + } // switch (length) + break; + + case TRAIL_BYTE_1: + if (leadByte == 0xF0 && aByte < 0x90) + throw new MalformedInputException(count); + if (leadByte == 0xF4 && aByte > 0x8F) + throw new MalformedInputException(count); + if (leadByte == 0xE0 && aByte < 0xA0) + throw new MalformedInputException(count); + if (leadByte == 0xED && aByte > 0x9F) + throw new MalformedInputException(count); + // falls through to regular trail-byte test!! + case TRAIL_BYTE: + if (aByte < 0x80 || aByte > 0xBF) + throw new MalformedInputException(count); + if (--length == 0) { + state = LEAD_BYTE; + } else { + state = TRAIL_BYTE; + } + break; + } // switch (state) + count++; + } + } + + /** + * Magic numbers for UTF-8. These are the number of bytes + * that follow a given lead byte. Trailing bytes + * have the value -1. The values 4 and 5 are presented in + * this table, even though valid UTF-8 cannot include the + * five and six byte sequences. + */ + static final int[] bytesFromUTF8 = + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + // trail bytes + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, + 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 }; + + /** + * Returns the next code point at the current position in + * the buffer. The buffer's position will be incremented. + * Any mark set on this buffer will be changed by this method! + */ + public static int bytesToCodePoint(ByteBuffer bytes) { + bytes.mark(); + byte b = bytes.get(); + bytes.reset(); + int extraBytesToRead = bytesFromUTF8[(b & 0xFF)]; + if (extraBytesToRead < 0) return -1; // trailing byte! + int ch = 0; + + switch (extraBytesToRead) { + case 5: ch += (bytes.get() & 0xFF); ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += (bytes.get() & 0xFF); ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += (bytes.get() & 0xFF); ch <<= 6; + case 2: ch += (bytes.get() & 0xFF); ch <<= 6; + case 1: ch += (bytes.get() & 0xFF); ch <<= 6; + case 0: ch += (bytes.get() & 0xFF); + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + return ch; + } + + + static final int offsetsFromUTF8[] = + { 0x00000000, 0x00003080, + 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 }; + + /** + * For the given string, returns the number of UTF-8 bytes + * required to encode the string. + * @param string text to encode + * @return number of UTF-8 bytes required to encode + */ + public static int utf8Length(String string) { + CharacterIterator iter = new StringCharacterIterator(string); + char ch = iter.first(); + int size = 0; + while (ch != CharacterIterator.DONE) { + if ((ch >= 0xD800) && (ch < 0xDC00)) { + // surrogate pair? + char trail = iter.next(); + if ((trail > 0xDBFF) && (trail < 0xE000)) { + // valid pair + size += 4; + } else { + // invalid pair + size += 3; + iter.previous(); // rewind one + } + } else if (ch < 0x80) { + size++; + } else if (ch < 0x800) { + size += 2; + } else { + // ch < 0x10000, that is, the largest char value + size += 3; + } + ch = iter.next(); + } + return size; + } +} diff --git a/src/java/org/apache/hadoop/io/TwoDArrayWritable.java b/src/java/org/apache/hadoop/io/TwoDArrayWritable.java new file mode 100644 index 00000000000..23463a2a124 --- /dev/null +++ b/src/java/org/apache/hadoop/io/TwoDArrayWritable.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; +import java.lang.reflect.Array; + +/** A Writable for 2D arrays containing a matrix of instances of a class. */ +public class TwoDArrayWritable implements Writable { + private Class valueClass; + private Writable[][] values; + + public TwoDArrayWritable(Class valueClass) { + this.valueClass = valueClass; + } + + public TwoDArrayWritable(Class valueClass, Writable[][] values) { + this(valueClass); + this.values = values; + } + + public Object toArray() { + int dimensions[] = {values.length, 0}; + Object result = Array.newInstance(valueClass, dimensions); + for (int i = 0; i < values.length; i++) { + Object resultRow = Array.newInstance(valueClass, values[i].length); + Array.set(result, i, resultRow); + for (int j = 0; j < values[i].length; j++) { + Array.set(resultRow, j, values[i][j]); + } + } + return result; + } + + public void set(Writable[][] values) { this.values = values; } + + public Writable[][] get() { return values; } + + public void readFields(DataInput in) throws IOException { + // construct matrix + values = new Writable[in.readInt()][]; + for (int i = 0; i < values.length; i++) { + values[i] = new Writable[in.readInt()]; + } + + // construct values + for (int i = 0; i < values.length; i++) { + for (int j = 0; j < values[i].length; j++) { + Writable value; // construct value + try { + value = (Writable)valueClass.newInstance(); + } catch (InstantiationException e) { + throw new RuntimeException(e.toString()); + } catch (IllegalAccessException e) { + throw new RuntimeException(e.toString()); + } + value.readFields(in); // read a value + values[i][j] = value; // store it in values + } + } + } + + public void write(DataOutput out) throws IOException { + out.writeInt(values.length); // write values + for (int i = 0; i < values.length; i++) { + out.writeInt(values[i].length); + } + for (int i = 0; i < values.length; i++) { + for (int j = 0; j < values[i].length; j++) { + values[i][j].write(out); + } + } + } +} + diff --git a/src/java/org/apache/hadoop/io/UTF8.java b/src/java/org/apache/hadoop/io/UTF8.java new file mode 100644 index 00000000000..d9f45f7e6b4 --- /dev/null +++ b/src/java/org/apache/hadoop/io/UTF8.java @@ -0,0 +1,286 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.IOException; +import java.io.DataInput; +import java.io.DataOutput; + + +import org.apache.commons.logging.*; + +/** A WritableComparable for strings that uses the UTF8 encoding. + * + *

Also includes utilities for efficiently reading and writing UTF-8. + * + * @deprecated replaced by Text + */ +public class UTF8 implements WritableComparable { + private static final Log LOG= LogFactory.getLog(UTF8.class); + private static final DataOutputBuffer OBUF = new DataOutputBuffer(); + private static final DataInputBuffer IBUF = new DataInputBuffer(); + + private static final byte[] EMPTY_BYTES = new byte[0]; + + private byte[] bytes = EMPTY_BYTES; + private int length; + + public UTF8() { + //set(""); + } + + /** Construct from a given string. */ + public UTF8(String string) { + set(string); + } + + /** Construct from a given string. */ + public UTF8(UTF8 utf8) { + set(utf8); + } + + /** The raw bytes. */ + public byte[] getBytes() { + return bytes; + } + + /** The number of bytes in the encoded string. */ + public int getLength() { + return length; + } + + /** Set to contain the contents of a string. */ + public void set(String string) { + if (string.length() > 0xffff/3) { // maybe too long + LOG.warn("truncating long string: " + string.length() + + " chars, starting with " + string.substring(0, 20)); + string = string.substring(0, 0xffff/3); + } + + length = utf8Length(string); // compute length + if (length > 0xffff) // double-check length + throw new RuntimeException("string too long!"); + + if (bytes == null || length > bytes.length) // grow buffer + bytes = new byte[length]; + + try { // avoid sync'd allocations + synchronized (OBUF) { + OBUF.reset(); + writeChars(OBUF, string, 0, string.length()); + System.arraycopy(OBUF.getData(), 0, bytes, 0, length); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** Set to contain the contents of a string. */ + public void set(UTF8 other) { + length = other.length; + if (bytes == null || length > bytes.length) // grow buffer + bytes = new byte[length]; + System.arraycopy(other.bytes, 0, bytes, 0, length); + } + + public void readFields(DataInput in) throws IOException { + length = in.readUnsignedShort(); + if (bytes == null || bytes.length < length) + bytes = new byte[length]; + in.readFully(bytes, 0, length); + } + + /** Skips over one UTF8 in the input. */ + public static void skip(DataInput in) throws IOException { + int length = in.readUnsignedShort(); + WritableUtils.skipFully(in, length); + } + + public void write(DataOutput out) throws IOException { + out.writeShort(length); + out.write(bytes, 0, length); + } + + /** Compare two UTF8s. */ + public int compareTo(Object o) { + UTF8 that = (UTF8)o; + return WritableComparator.compareBytes(bytes, 0, length, + that.bytes, 0, that.length); + } + + /** Convert to a String. */ + public String toString() { + StringBuffer buffer = new StringBuffer(length); + try { + synchronized (IBUF) { + IBUF.reset(bytes, length); + readChars(IBUF, buffer, length); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + return buffer.toString(); + } + + /** Returns true iff o is a UTF8 with the same contents. */ + public boolean equals(Object o) { + if (!(o instanceof UTF8)) + return false; + UTF8 that = (UTF8)o; + if (this.length != that.length) + return false; + else + return WritableComparator.compareBytes(bytes, 0, length, + that.bytes, 0, that.length) == 0; + } + + public int hashCode() { + return WritableComparator.hashBytes(bytes, length); + } + + /** A WritableComparator optimized for UTF8 keys. */ + public static class Comparator extends WritableComparator { + public Comparator() { + super(UTF8.class); + } + + public int compare(byte[] b1, int s1, int l1, + byte[] b2, int s2, int l2) { + int n1 = readUnsignedShort(b1, s1); + int n2 = readUnsignedShort(b2, s2); + return compareBytes(b1, s1+2, n1, b2, s2+2, n2); + } + } + + static { // register this comparator + WritableComparator.define(UTF8.class, new Comparator()); + } + + /// STATIC UTILITIES FROM HERE DOWN + + /// These are probably not used much anymore, and might be removed... + + /** Convert a string to a UTF-8 encoded byte array. + * @see String#getBytes(String) + */ + public static byte[] getBytes(String string) { + byte[] result = new byte[utf8Length(string)]; + try { // avoid sync'd allocations + synchronized (OBUF) { + OBUF.reset(); + writeChars(OBUF, string, 0, string.length()); + System.arraycopy(OBUF.getData(), 0, result, 0, OBUF.getLength()); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + return result; + } + + /** Read a UTF-8 encoded string. + * + * @see DataInput#readUTF() + */ + public static String readString(DataInput in) throws IOException { + int bytes = in.readUnsignedShort(); + StringBuffer buffer = new StringBuffer(bytes); + readChars(in, buffer, bytes); + return buffer.toString(); + } + + private static void readChars(DataInput in, StringBuffer buffer, int nBytes) + throws IOException { + synchronized (OBUF) { + OBUF.reset(); + OBUF.write(in, nBytes); + byte[] bytes = OBUF.getData(); + int i = 0; + while (i < nBytes) { + byte b = bytes[i++]; + if ((b & 0x80) == 0) { + buffer.append((char)(b & 0x7F)); + } else if ((b & 0xE0) != 0xE0) { + buffer.append((char)(((b & 0x1F) << 6) + | (bytes[i++] & 0x3F))); + } else { + buffer.append((char)(((b & 0x0F) << 12) + | ((bytes[i++] & 0x3F) << 6) + | (bytes[i++] & 0x3F))); + } + } + } + } + + /** Write a UTF-8 encoded string. + * + * @see DataOutput#writeUTF(String) + */ + public static int writeString(DataOutput out, String s) throws IOException { + if (s.length() > 0xffff/3) { // maybe too long + LOG.warn("truncating long string: " + s.length() + + " chars, starting with " + s.substring(0, 20)); + s = s.substring(0, 0xffff/3); + } + + int len = utf8Length(s); + if (len > 0xffff) // double-check length + throw new IOException("string too long!"); + + out.writeShort(len); + writeChars(out, s, 0, s.length()); + return len; + } + + /** Returns the number of bytes required to write this. */ + private static int utf8Length(String string) { + int stringLength = string.length(); + int utf8Length = 0; + for (int i = 0; i < stringLength; i++) { + int c = string.charAt(i); + if ((c >= 0x0001) && (c <= 0x007F)) { + utf8Length++; + } else if (c > 0x07FF) { + utf8Length += 3; + } else { + utf8Length += 2; + } + } + return utf8Length; + } + + private static void writeChars(DataOutput out, + String s, int start, int length) + throws IOException { + final int end = start + length; + for (int i = start; i < end; i++) { + int code = s.charAt(i); + if (code >= 0x01 && code <= 0x7F) { + out.writeByte((byte)code); + } else if (code <= 0x07FF) { + out.writeByte((byte)(0xC0 | ((code >> 6) & 0x1F))); + out.writeByte((byte)(0x80 | code & 0x3F)); + } else { + out.writeByte((byte)(0xE0 | ((code >> 12) & 0X0F))); + out.writeByte((byte)(0x80 | ((code >> 6) & 0x3F))); + out.writeByte((byte)(0x80 | (code & 0x3F))); + } + } + } + +} diff --git a/src/java/org/apache/hadoop/io/VIntWritable.java b/src/java/org/apache/hadoop/io/VIntWritable.java new file mode 100644 index 00000000000..a8af11bcfff --- /dev/null +++ b/src/java/org/apache/hadoop/io/VIntWritable.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +/** A WritableComparable for integer values stored in variable-length format. + * Such values take between one and five bytes. Smaller values take fewer bytes. + * + * @see org.apache.hadoop.io.WritableUtils#readVInt(DataInput) + */ +public class VIntWritable implements WritableComparable { + private int value; + + public VIntWritable() {} + + public VIntWritable(int value) { set(value); } + + /** Set the value of this VIntWritable. */ + public void set(int value) { this.value = value; } + + /** Return the value of this VIntWritable. */ + public int get() { return value; } + + public void readFields(DataInput in) throws IOException { + value = WritableUtils.readVInt(in); + } + + public void write(DataOutput out) throws IOException { + WritableUtils.writeVInt(out, value); + } + + /** Returns true iff o is a VIntWritable with the same value. */ + public boolean equals(Object o) { + if (!(o instanceof VIntWritable)) + return false; + VIntWritable other = (VIntWritable)o; + return this.value == other.value; + } + + public int hashCode() { + return value; + } + + /** Compares two VIntWritables. */ + public int compareTo(Object o) { + int thisValue = this.value; + int thatValue = ((VIntWritable)o).value; + return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1)); + } + + public String toString() { + return Integer.toString(value); + } + +} + diff --git a/src/java/org/apache/hadoop/io/VLongWritable.java b/src/java/org/apache/hadoop/io/VLongWritable.java new file mode 100644 index 00000000000..14d8602275f --- /dev/null +++ b/src/java/org/apache/hadoop/io/VLongWritable.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +/** A WritableComparable for longs in a variable-length format. Such values take + * between one and five bytes. Smaller values take fewer bytes. + * + * @see org.apache.hadoop.io.WritableUtils#readVLong(DataInput) + */ +public class VLongWritable implements WritableComparable { + private long value; + + public VLongWritable() {} + + public VLongWritable(long value) { set(value); } + + /** Set the value of this LongWritable. */ + public void set(long value) { this.value = value; } + + /** Return the value of this LongWritable. */ + public long get() { return value; } + + public void readFields(DataInput in) throws IOException { + value = WritableUtils.readVLong(in); + } + + public void write(DataOutput out) throws IOException { + WritableUtils.writeVLong(out, value); + } + + /** Returns true iff o is a VLongWritable with the same value. */ + public boolean equals(Object o) { + if (!(o instanceof VLongWritable)) + return false; + VLongWritable other = (VLongWritable)o; + return this.value == other.value; + } + + public int hashCode() { + return (int)value; + } + + /** Compares two VLongWritables. */ + public int compareTo(Object o) { + long thisValue = this.value; + long thatValue = ((VLongWritable)o).value; + return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1)); + } + + public String toString() { + return Long.toString(value); + } + +} + diff --git a/src/java/org/apache/hadoop/io/VersionMismatchException.java b/src/java/org/apache/hadoop/io/VersionMismatchException.java new file mode 100644 index 00000000000..5f57908fd5a --- /dev/null +++ b/src/java/org/apache/hadoop/io/VersionMismatchException.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.IOException; + +/** Thrown by {@link VersionedWritable#readFields(DataInput)} when the + * version of an object being read does not match the current implementation + * version as returned by {@link VersionedWritable#getVersion()}. */ +public class VersionMismatchException extends IOException { + + private byte expectedVersion; + private byte foundVersion; + + public VersionMismatchException(byte expectedVersionIn, byte foundVersionIn){ + expectedVersion = expectedVersionIn; + foundVersion = foundVersionIn; + } + + /** Returns a string representation of this object. */ + public String toString(){ + return "A record version mismatch occured. Expecting v" + + expectedVersion + ", found v" + foundVersion; + } +} diff --git a/src/java/org/apache/hadoop/io/VersionedWritable.java b/src/java/org/apache/hadoop/io/VersionedWritable.java new file mode 100644 index 00000000000..3ca4fe919ab --- /dev/null +++ b/src/java/org/apache/hadoop/io/VersionedWritable.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.DataOutput; +import java.io.DataInput; +import java.io.IOException; + +/** A base class for Writables that provides version checking. + * + *

This is useful when a class may evolve, so that instances written by the + * old version of the class may still be processed by the new version. To + * handle this situation, {@link #readFields(DataInput)} + * implementations should catch {@link VersionMismatchException}. + */ +public abstract class VersionedWritable implements Writable { + + /** Return the version number of the current implementation. */ + public abstract byte getVersion(); + + // javadoc from Writable + public void write(DataOutput out) throws IOException { + out.writeByte(getVersion()); // store version + } + + // javadoc from Writable + public void readFields(DataInput in) throws IOException { + byte version = in.readByte(); // read version + if (version != getVersion()) + throw new VersionMismatchException(getVersion(), version); + } + + +} diff --git a/src/java/org/apache/hadoop/io/Writable.java b/src/java/org/apache/hadoop/io/Writable.java new file mode 100644 index 00000000000..b61e5b5c34a --- /dev/null +++ b/src/java/org/apache/hadoop/io/Writable.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.DataOutput; +import java.io.DataInput; +import java.io.IOException; + +/** + * A serializable object which implements a simple, efficient, serialization + * protocol, based on {@link DataInput} and {@link DataOutput}. + * + *

Any key or value type in the Hadoop Map-Reduce + * framework implements this interface.

+ * + *

Implementations typically implement a static read(DataInput) + * method which constructs a new instance, calls {@link #readFields(DataInput)} + * and returns the instance.

+ * + *

Example:

+ *

+ *     public class MyWritable implements Writable {
+ *       // Some data     
+ *       private int counter;
+ *       private long timestamp;
+ *       
+ *       public void write(DataOutput out) throws IOException {
+ *         out.writeInt(counter);
+ *         out.writeLong(timestamp);
+ *       }
+ *       
+ *       public void readFields(DataInput in) throws IOException {
+ *         counter = in.readInt();
+ *         timestamp = in.readLong();
+ *       }
+ *       
+ *       public static MyWritable read(DataInput in) throws IOException {
+ *         MyWritable w = new MyWritable();
+ *         w.readFields(in);
+ *         return w;
+ *       }
+ *     }
+ * 

+ */ +public interface Writable { + /** + * Serialize the fields of this object to out. + * + * @param out DataOuput to serialize this object into. + * @throws IOException + */ + void write(DataOutput out) throws IOException; + + /** + * Deserialize the fields of this object from in. + * + *

For efficiency, implementations should attempt to re-use storage in the + * existing object where possible.

+ * + * @param in DataInput to deseriablize this object from. + * @throws IOException + */ + void readFields(DataInput in) throws IOException; +} diff --git a/src/java/org/apache/hadoop/io/WritableComparable.java b/src/java/org/apache/hadoop/io/WritableComparable.java new file mode 100644 index 00000000000..b8aaf731cc5 --- /dev/null +++ b/src/java/org/apache/hadoop/io/WritableComparable.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +/** + * A {@link Writable} which is also {@link Comparable}. + * + *

WritableComparables can be compared to each other, typically + * via Comparators. Any type which is to be used as a + * key in the Hadoop Map-Reduce framework should implement this + * interface.

+ * + *

Example:

+ *

+ *     public class MyWritableComparable implements WritableComparable {
+ *       // Some data
+ *       private int counter;
+ *       private long timestamp;
+ *       
+ *       public void write(DataOutput out) throws IOException {
+ *         out.writeInt(counter);
+ *         out.writeLong(timestamp);
+ *       }
+ *       
+ *       public void readFields(DataInput in) throws IOException {
+ *         counter = in.readInt();
+ *         timestamp = in.readLong();
+ *       }
+ *       
+ *       public int compareTo(MyWritableComparable w) {
+ *         int thisValue = this.value;
+ *         int thatValue = ((IntWritable)o).value;
+ *         return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ *       }
+ *     }
+ * 

+ */ +public interface WritableComparable extends Writable, Comparable { +} diff --git a/src/java/org/apache/hadoop/io/WritableComparator.java b/src/java/org/apache/hadoop/io/WritableComparator.java new file mode 100644 index 00000000000..b0b08b4126c --- /dev/null +++ b/src/java/org/apache/hadoop/io/WritableComparator.java @@ -0,0 +1,216 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; +import java.util.*; + +import org.apache.hadoop.util.ReflectionUtils; + +/** A Comparator for {@link WritableComparable}s. + * + *

This base implemenation uses the natural ordering. To define alternate + * orderings, override {@link #compare(WritableComparable,WritableComparable)}. + * + *

One may optimize compare-intensive operations by overriding + * {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + * provided to assist in optimized implementations of this method. + */ +public class WritableComparator implements RawComparator { + + private static HashMap comparators = + new HashMap(); // registry + + /** Get a comparator for a {@link WritableComparable} implementation. */ + public static synchronized WritableComparator get(Class c) { + WritableComparator comparator = comparators.get(c); + if (comparator == null) + comparator = new WritableComparator(c, true); + return comparator; + } + + /** Register an optimized comparator for a {@link WritableComparable} + * implementation. */ + public static synchronized void define(Class c, + WritableComparator comparator) { + comparators.put(c, comparator); + } + + + private final Class keyClass; + private final WritableComparable key1; + private final WritableComparable key2; + private final DataInputBuffer buffer; + + /** Construct for a {@link WritableComparable} implementation. */ + protected WritableComparator(Class keyClass) { + this(keyClass, false); + } + + protected WritableComparator(Class keyClass, + boolean createInstances) { + this.keyClass = keyClass; + if (createInstances) { + key1 = newKey(); + key2 = newKey(); + buffer = new DataInputBuffer(); + } else { + key1 = key2 = null; + buffer = null; + } + } + + /** Returns the WritableComparable implementation class. */ + public Class getKeyClass() { return keyClass; } + + /** Construct a new {@link WritableComparable} instance. */ + public WritableComparable newKey() { + return ReflectionUtils.newInstance(keyClass, null); + } + + /** Optimization hook. Override this to make SequenceFile.Sorter's scream. + * + *

The default implementation reads the data into two {@link + * WritableComparable}s (using {@link + * Writable#readFields(DataInput)}, then calls {@link + * #compare(WritableComparable,WritableComparable)}. + */ + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { + try { + buffer.reset(b1, s1, l1); // parse key1 + key1.readFields(buffer); + + buffer.reset(b2, s2, l2); // parse key2 + key2.readFields(buffer); + + } catch (IOException e) { + throw new RuntimeException(e); + } + + return compare(key1, key2); // compare them + } + + /** Compare two WritableComparables. + * + *

The default implementation uses the natural ordering, calling {@link + * Comparable#compareTo(Object)}. */ + @SuppressWarnings("unchecked") + public int compare(WritableComparable a, WritableComparable b) { + return a.compareTo(b); + } + + public int compare(Object a, Object b) { + return compare((WritableComparable)a, (WritableComparable)b); + } + + /** Lexicographic order of binary data. */ + public static int compareBytes(byte[] b1, int s1, int l1, + byte[] b2, int s2, int l2) { + int end1 = s1 + l1; + int end2 = s2 + l2; + for (int i = s1, j = s2; i < end1 && j < end2; i++, j++) { + int a = (b1[i] & 0xff); + int b = (b2[j] & 0xff); + if (a != b) { + return a - b; + } + } + return l1 - l2; + } + + /** Compute hash for binary data. */ + public static int hashBytes(byte[] bytes, int offset, int length) { + int hash = 1; + for (int i = offset; i < offset + length; i++) + hash = (31 * hash) + (int)bytes[i]; + return hash; + } + + /** Compute hash for binary data. */ + public static int hashBytes(byte[] bytes, int length) { + return hashBytes(bytes, 0, length); + } + + /** Parse an unsigned short from a byte array. */ + public static int readUnsignedShort(byte[] bytes, int start) { + return (((bytes[start] & 0xff) << 8) + + ((bytes[start+1] & 0xff))); + } + + /** Parse an integer from a byte array. */ + public static int readInt(byte[] bytes, int start) { + return (((bytes[start ] & 0xff) << 24) + + ((bytes[start+1] & 0xff) << 16) + + ((bytes[start+2] & 0xff) << 8) + + ((bytes[start+3] & 0xff))); + + } + + /** Parse a float from a byte array. */ + public static float readFloat(byte[] bytes, int start) { + return Float.intBitsToFloat(readInt(bytes, start)); + } + + /** Parse a long from a byte array. */ + public static long readLong(byte[] bytes, int start) { + return ((long)(readInt(bytes, start)) << 32) + + (readInt(bytes, start+4) & 0xFFFFFFFFL); + } + + /** Parse a double from a byte array. */ + public static double readDouble(byte[] bytes, int start) { + return Double.longBitsToDouble(readLong(bytes, start)); + } + + /** + * Reads a zero-compressed encoded long from a byte array and returns it. + * @param bytes byte array with decode long + * @param start starting index + * @throws java.io.IOException + * @return deserialized long + */ + public static long readVLong(byte[] bytes, int start) throws IOException { + int len = bytes[start]; + if (len >= -112) { + return len; + } + boolean isNegative = (len < -120); + len = isNegative ? -(len + 120) : -(len + 112); + if (start+1+len>bytes.length) + throw new IOException( + "Not enough number of bytes for a zero-compressed integer"); + long i = 0; + for (int idx = 0; idx < len; idx++) { + i = i << 8; + i = i | (bytes[start+1+idx] & 0xFF); + } + return (isNegative ? (i ^ -1L) : i); + } + + /** + * Reads a zero-compressed encoded integer from a byte array and returns it. + * @param bytes byte array with the encoded integer + * @param start start index + * @throws java.io.IOException + * @return deserialized integer + */ + public static int readVInt(byte[] bytes, int start) throws IOException { + return (int) readVLong(bytes, start); + } +} diff --git a/src/java/org/apache/hadoop/io/WritableFactories.java b/src/java/org/apache/hadoop/io/WritableFactories.java new file mode 100644 index 00000000000..92569bd7bae --- /dev/null +++ b/src/java/org/apache/hadoop/io/WritableFactories.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import org.apache.hadoop.conf.*; +import org.apache.hadoop.util.ReflectionUtils; +import java.util.HashMap; + +/** Factories for non-public writables. Defining a factory permits {@link + * ObjectWritable} to be able to construct instances of non-public classes. */ +public class WritableFactories { + private static final HashMap CLASS_TO_FACTORY = + new HashMap(); + + private WritableFactories() {} // singleton + + /** Define a factory for a class. */ + public static synchronized void setFactory(Class c, WritableFactory factory) { + CLASS_TO_FACTORY.put(c, factory); + } + + /** Define a factory for a class. */ + public static synchronized WritableFactory getFactory(Class c) { + return CLASS_TO_FACTORY.get(c); + } + + /** Create a new instance of a class with a defined factory. */ + public static Writable newInstance(Class c, Configuration conf) { + WritableFactory factory = WritableFactories.getFactory(c); + if (factory != null) { + Writable result = factory.newInstance(); + if (result instanceof Configurable) { + ((Configurable) result).setConf(conf); + } + return result; + } else { + return ReflectionUtils.newInstance(c, conf); + } + } + + /** Create a new instance of a class with a defined factory. */ + public static Writable newInstance(Class c) { + return newInstance(c, null); + } + +} + diff --git a/src/java/org/apache/hadoop/io/WritableFactory.java b/src/java/org/apache/hadoop/io/WritableFactory.java new file mode 100644 index 00000000000..736485eef57 --- /dev/null +++ b/src/java/org/apache/hadoop/io/WritableFactory.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +/** A factory for a class of Writable. + * @see WritableFactories + */ +public interface WritableFactory { + /** Return a new instance. */ + Writable newInstance(); +} + diff --git a/src/java/org/apache/hadoop/io/WritableName.java b/src/java/org/apache/hadoop/io/WritableName.java new file mode 100644 index 00000000000..6b6c1480b55 --- /dev/null +++ b/src/java/org/apache/hadoop/io/WritableName.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.util.HashMap; +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; + +/** Utility to permit renaming of Writable implementation classes without + * invalidiating files that contain their class name. + */ +public class WritableName { + private static HashMap> NAME_TO_CLASS = + new HashMap>(); + private static HashMap, String> CLASS_TO_NAME = + new HashMap, String>(); + + static { // define important types + WritableName.setName(NullWritable.class, "null"); + WritableName.setName(LongWritable.class, "long"); + WritableName.setName(UTF8.class, "UTF8"); + WritableName.setName(MD5Hash.class, "MD5Hash"); + } + + private WritableName() {} // no public ctor + + /** Set the name that a class should be known as to something other than the + * class name. */ + public static synchronized void setName(Class writableClass, String name) { + CLASS_TO_NAME.put(writableClass, name); + NAME_TO_CLASS.put(name, writableClass); + } + + /** Add an alternate name for a class. */ + public static synchronized void addName(Class writableClass, String name) { + NAME_TO_CLASS.put(name, writableClass); + } + + /** Return the name for a class. Default is {@link Class#getName()}. */ + public static synchronized String getName(Class writableClass) { + String name = CLASS_TO_NAME.get(writableClass); + if (name != null) + return name; + return writableClass.getName(); + } + + /** Return the class for a name. Default is {@link Class#forName(String)}.*/ + public static synchronized Class getClass(String name, Configuration conf + ) throws IOException { + Class writableClass = NAME_TO_CLASS.get(name); + if (writableClass != null) + return writableClass.asSubclass(Writable.class); + try { + return conf.getClassByName(name); + } catch (ClassNotFoundException e) { + IOException newE = new IOException("WritableName can't load class: " + name); + newE.initCause(e); + throw newE; + } + } + +} diff --git a/src/java/org/apache/hadoop/io/WritableUtils.java b/src/java/org/apache/hadoop/io/WritableUtils.java new file mode 100644 index 00000000000..e49ea9240c2 --- /dev/null +++ b/src/java/org/apache/hadoop/io/WritableUtils.java @@ -0,0 +1,418 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io; + +import java.io.*; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ReflectionUtils; + +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +public final class WritableUtils { + + public static byte[] readCompressedByteArray(DataInput in) throws IOException { + int length = in.readInt(); + if (length == -1) return null; + byte[] buffer = new byte[length]; + in.readFully(buffer); // could/should use readFully(buffer,0,length)? + GZIPInputStream gzi = new GZIPInputStream(new ByteArrayInputStream(buffer, 0, buffer.length)); + byte[] outbuf = new byte[length]; + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int len; + while((len=gzi.read(outbuf, 0, outbuf.length)) != -1){ + bos.write(outbuf, 0, len); + } + byte[] decompressed = bos.toByteArray(); + bos.close(); + gzi.close(); + return decompressed; + } + + public static void skipCompressedByteArray(DataInput in) throws IOException { + int length = in.readInt(); + if (length != -1) { + skipFully(in, length); + } + } + + public static int writeCompressedByteArray(DataOutput out, byte[] bytes) throws IOException { + if (bytes != null) { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + GZIPOutputStream gzout = new GZIPOutputStream(bos); + gzout.write(bytes, 0, bytes.length); + gzout.close(); + byte[] buffer = bos.toByteArray(); + int len = buffer.length; + out.writeInt(len); + out.write(buffer, 0, len); + /* debug only! Once we have confidence, can lose this. */ + return ((bytes.length != 0) ? (100*buffer.length)/bytes.length : 0); + } else { + out.writeInt(-1); + return -1; + } + } + + + /* Ugly utility, maybe someone else can do this better */ + public static String readCompressedString(DataInput in) throws IOException { + byte[] bytes = readCompressedByteArray(in); + if (bytes == null) return null; + return new String(bytes, "UTF-8"); + } + + + public static int writeCompressedString(DataOutput out, String s) throws IOException { + return writeCompressedByteArray(out, (s != null) ? s.getBytes("UTF-8") : null); + } + + /* + * + * Write a String as a Network Int n, followed by n Bytes + * Alternative to 16 bit read/writeUTF. + * Encoding standard is... ? + * + */ + public static void writeString(DataOutput out, String s) throws IOException { + if (s != null) { + byte[] buffer = s.getBytes("UTF-8"); + int len = buffer.length; + out.writeInt(len); + out.write(buffer, 0, len); + } else { + out.writeInt(-1); + } + } + + /* + * Read a String as a Network Int n, followed by n Bytes + * Alternative to 16 bit read/writeUTF. + * Encoding standard is... ? + * + */ + public static String readString(DataInput in) throws IOException{ + int length = in.readInt(); + if (length == -1) return null; + byte[] buffer = new byte[length]; + in.readFully(buffer); // could/should use readFully(buffer,0,length)? + return new String(buffer,"UTF-8"); + } + + + /* + * Write a String array as a Nework Int N, followed by Int N Byte Array Strings. + * Could be generalised using introspection. + * + */ + public static void writeStringArray(DataOutput out, String[] s) throws IOException{ + out.writeInt(s.length); + for(int i = 0; i < s.length; i++) { + writeString(out, s[i]); + } + } + + /* + * Write a String array as a Nework Int N, followed by Int N Byte Array of + * compressed Strings. Handles also null arrays and null values. + * Could be generalised using introspection. + * + */ + public static void writeCompressedStringArray(DataOutput out, String[] s) throws IOException{ + if (s == null) { + out.writeInt(-1); + return; + } + out.writeInt(s.length); + for(int i = 0; i < s.length; i++) { + writeCompressedString(out, s[i]); + } + } + + /* + * Write a String array as a Nework Int N, followed by Int N Byte Array Strings. + * Could be generalised using introspection. Actually this bit couldn't... + * + */ + public static String[] readStringArray(DataInput in) throws IOException { + int len = in.readInt(); + if (len == -1) return null; + String[] s = new String[len]; + for(int i = 0; i < len; i++) { + s[i] = readString(in); + } + return s; + } + + + /* + * Write a String array as a Nework Int N, followed by Int N Byte Array Strings. + * Could be generalised using introspection. Handles null arrays and null values. + * + */ + public static String[] readCompressedStringArray(DataInput in) throws IOException { + int len = in.readInt(); + if (len == -1) return null; + String[] s = new String[len]; + for(int i = 0; i < len; i++) { + s[i] = readCompressedString(in); + } + return s; + } + + + /* + * + * Test Utility Method Display Byte Array. + * + */ + public static void displayByteArray(byte[] record){ + int i; + for(i=0;i < record.length -1; i++){ + if (i % 16 == 0) { System.out.println(); } + System.out.print(Integer.toHexString(record[i] >> 4 & 0x0F)); + System.out.print(Integer.toHexString(record[i] & 0x0F)); + System.out.print(","); + } + System.out.print(Integer.toHexString(record[i] >> 4 & 0x0F)); + System.out.print(Integer.toHexString(record[i] & 0x0F)); + System.out.println(); + } + + /** + * Make a copy of a writable object using serialization to a buffer. + * @param orig The object to copy + * @return The copied object + */ + public static T clone(T orig, Configuration conf) { + try { + @SuppressWarnings("unchecked") // Unchecked cast from Class to Class + T newInst = ReflectionUtils.newInstance((Class) orig.getClass(), conf); + ReflectionUtils.copy(conf, orig, newInst); + return newInst; + } catch (IOException e) { + throw new RuntimeException("Error writing/reading clone buffer", e); + } + } + + /** + * Make a copy of the writable object using serialiation to a buffer + * @param dst the object to copy from + * @param src the object to copy into, which is destroyed + * @throws IOException + * @deprecated use ReflectionUtils.cloneInto instead. + */ + @Deprecated + public static void cloneInto(Writable dst, Writable src) throws IOException { + ReflectionUtils.cloneWritableInto(dst, src); + } + + /** + * Serializes an integer to a binary stream with zero-compressed encoding. + * For -120 <= i <= 127, only one byte is used with the actual value. + * For other values of i, the first byte value indicates whether the + * integer is positive or negative, and the number of bytes that follow. + * If the first byte value v is between -121 and -124, the following integer + * is positive, with number of bytes that follow are -(v+120). + * If the first byte value v is between -125 and -128, the following integer + * is negative, with number of bytes that follow are -(v+124). Bytes are + * stored in the high-non-zero-byte-first order. + * + * @param stream Binary output stream + * @param i Integer to be serialized + * @throws java.io.IOException + */ + public static void writeVInt(DataOutput stream, int i) throws IOException { + writeVLong(stream, i); + } + + /** + * Serializes a long to a binary stream with zero-compressed encoding. + * For -112 <= i <= 127, only one byte is used with the actual value. + * For other values of i, the first byte value indicates whether the + * long is positive or negative, and the number of bytes that follow. + * If the first byte value v is between -113 and -120, the following long + * is positive, with number of bytes that follow are -(v+112). + * If the first byte value v is between -121 and -128, the following long + * is negative, with number of bytes that follow are -(v+120). Bytes are + * stored in the high-non-zero-byte-first order. + * + * @param stream Binary output stream + * @param i Long to be serialized + * @throws java.io.IOException + */ + public static void writeVLong(DataOutput stream, long i) throws IOException { + if (i >= -112 && i <= 127) { + stream.writeByte((byte)i); + return; + } + + int len = -112; + if (i < 0) { + i ^= -1L; // take one's complement' + len = -120; + } + + long tmp = i; + while (tmp != 0) { + tmp = tmp >> 8; + len--; + } + + stream.writeByte((byte)len); + + len = (len < -120) ? -(len + 120) : -(len + 112); + + for (int idx = len; idx != 0; idx--) { + int shiftbits = (idx - 1) * 8; + long mask = 0xFFL << shiftbits; + stream.writeByte((byte)((i & mask) >> shiftbits)); + } + } + + + /** + * Reads a zero-compressed encoded long from input stream and returns it. + * @param stream Binary input stream + * @throws java.io.IOException + * @return deserialized long from stream. + */ + public static long readVLong(DataInput stream) throws IOException { + byte firstByte = stream.readByte(); + int len = decodeVIntSize(firstByte); + if (len == 1) { + return firstByte; + } + long i = 0; + for (int idx = 0; idx < len-1; idx++) { + byte b = stream.readByte(); + i = i << 8; + i = i | (b & 0xFF); + } + return (isNegativeVInt(firstByte) ? (i ^ -1L) : i); + } + + /** + * Reads a zero-compressed encoded integer from input stream and returns it. + * @param stream Binary input stream + * @throws java.io.IOException + * @return deserialized integer from stream. + */ + public static int readVInt(DataInput stream) throws IOException { + return (int) readVLong(stream); + } + + /** + * Given the first byte of a vint/vlong, determine the sign + * @param value the first byte + * @return is the value negative + */ + public static boolean isNegativeVInt(byte value) { + return value < -120 || (value >= -112 && value < 0); + } + + /** + * Parse the first byte of a vint/vlong to determine the number of bytes + * @param value the first byte of the vint/vlong + * @return the total number of bytes (1 to 9) + */ + public static int decodeVIntSize(byte value) { + if (value >= -112) { + return 1; + } else if (value < -120) { + return -119 - value; + } + return -111 - value; + } + + /** + * Get the encoded length if an integer is stored in a variable-length format + * @return the encoded length + */ + public static int getVIntSize(long i) { + if (i >= -112 && i <= 127) { + return 1; + } + + if (i < 0) { + i ^= -1L; // take one's complement' + } + // find the number of bytes with non-leading zeros + int dataBits = Long.SIZE - Long.numberOfLeadingZeros(i); + // find the number of data bytes + length byte + return (dataBits + 7) / 8 + 1; + } + /** + * Read an Enum value from DataInput, Enums are read and written + * using String values. + * @param Enum type + * @param in DataInput to read from + * @param enumType Class type of Enum + * @return Enum represented by String read from DataInput + * @throws IOException + */ + public static > T readEnum(DataInput in, Class enumType) + throws IOException{ + return T.valueOf(enumType, Text.readString(in)); + } + /** + * writes String value of enum to DataOutput. + * @param out Dataoutput stream + * @param enumVal enum value + * @throws IOException + */ + public static void writeEnum(DataOutput out, Enum enumVal) + throws IOException{ + Text.writeString(out, enumVal.name()); + } + /** + * Skip len number of bytes in input streamin + * @param in input stream + * @param len number of bytes to skip + * @throws IOException when skipped less number of bytes + */ + public static void skipFully(DataInput in, int len) throws IOException { + int total = 0; + int cur = 0; + + while ((total 0)) { + total += cur; + } + + if (total getCompressorType() { + return BZip2DummyCompressor.class; + } + + /** + * This functionality is currently not supported. + * + * @throws java.lang.UnsupportedOperationException + * Throws UnsupportedOperationException + */ + public Compressor createCompressor() { + return new BZip2DummyCompressor(); + } + + /** + * Creates CompressionInputStream to be used to read off uncompressed data. + * + * @param in + * The InputStream + * @return Returns CompressionInputStream for BZip2 + * @throws java.io.IOException + * Throws IOException + */ + public CompressionInputStream createInputStream(InputStream in) + throws IOException { + return new BZip2CompressionInputStream(in); + } + + /** + * This functionality is currently not supported. + * + * @throws java.lang.UnsupportedOperationException + * Throws UnsupportedOperationException + */ + public CompressionInputStream createInputStream(InputStream in, + Decompressor decompressor) throws IOException { + return createInputStream(in); + } + + /** + * This functionality is currently not supported. + * + * @throws java.lang.UnsupportedOperationException + * Throws UnsupportedOperationException + */ + public Class getDecompressorType() { + return BZip2DummyDecompressor.class; + } + + /** + * This functionality is currently not supported. + * + * @throws java.lang.UnsupportedOperationException + * Throws UnsupportedOperationException + */ + public Decompressor createDecompressor() { + return new BZip2DummyDecompressor(); + } + + /** + * .bz2 is recognized as the default extension for compressed BZip2 files + * + * @return A String telling the default bzip2 file extension + */ + public String getDefaultExtension() { + return ".bz2"; + } + + private static class BZip2CompressionOutputStream extends CompressionOutputStream { + + // class data starts here// + private CBZip2OutputStream output; + private boolean needsReset; + // class data ends here// + + public BZip2CompressionOutputStream(OutputStream out) + throws IOException { + super(out); + needsReset = true; + } + + private void writeStreamHeader() throws IOException { + if (super.out != null) { + // The compressed bzip2 stream should start with the + // identifying characters BZ. Caller of CBZip2OutputStream + // i.e. this class must write these characters. + out.write(HEADER.getBytes()); + } + } + + public void finish() throws IOException { + if (needsReset) { + // In the case that nothing is written to this stream, we still need to + // write out the header before closing, otherwise the stream won't be + // recognized by BZip2CompressionInputStream. + internalReset(); + } + this.output.finish(); + needsReset = true; + } + + private void internalReset() throws IOException { + if (needsReset) { + needsReset = false; + writeStreamHeader(); + this.output = new CBZip2OutputStream(out); + } + } + + public void resetState() throws IOException { + // Cannot write to out at this point because out might not be ready + // yet, as in SequenceFile.Writer implementation. + needsReset = true; + } + + public void write(int b) throws IOException { + if (needsReset) { + internalReset(); + } + this.output.write(b); + } + + public void write(byte[] b, int off, int len) throws IOException { + if (needsReset) { + internalReset(); + } + this.output.write(b, off, len); + } + + public void close() throws IOException { + if (needsReset) { + // In the case that nothing is written to this stream, we still need to + // write out the header before closing, otherwise the stream won't be + // recognized by BZip2CompressionInputStream. + internalReset(); + } + this.output.flush(); + this.output.close(); + needsReset = true; + } + + }// end of class BZip2CompressionOutputStream + + private static class BZip2CompressionInputStream extends CompressionInputStream { + + // class data starts here// + private CBZip2InputStream input; + boolean needsReset; + // class data ends here// + + public BZip2CompressionInputStream(InputStream in) throws IOException { + + super(in); + needsReset = true; + } + + private BufferedInputStream readStreamHeader() throws IOException { + // We are flexible enough to allow the compressed stream not to + // start with the header of BZ. So it works fine either we have + // the header or not. + BufferedInputStream bufferedIn = null; + if (super.in != null) { + bufferedIn = new BufferedInputStream(super.in); + bufferedIn.mark(HEADER_LEN); + byte[] headerBytes = new byte[HEADER_LEN]; + int actualRead = bufferedIn.read(headerBytes, 0, HEADER_LEN); + if (actualRead != -1) { + String header = new String(headerBytes); + if (header.compareTo(HEADER) != 0) { + bufferedIn.reset(); + } + } + } + + if (bufferedIn == null) { + throw new IOException("Failed to read bzip2 stream."); + } + + return bufferedIn; + + }// end of method + + public void close() throws IOException { + if (!needsReset) { + input.close(); + needsReset = true; + } + } + + public int read(byte[] b, int off, int len) throws IOException { + if (needsReset) { + internalReset(); + } + return this.input.read(b, off, len); + + } + + private void internalReset() throws IOException { + if (needsReset) { + needsReset = false; + BufferedInputStream bufferedIn = readStreamHeader(); + input = new CBZip2InputStream(bufferedIn); + } + } + + public void resetState() throws IOException { + // Cannot read from bufferedIn at this point because bufferedIn might not be ready + // yet, as in SequenceFile.Reader implementation. + needsReset = true; + } + + public int read() throws IOException { + if (needsReset) { + internalReset(); + } + return this.input.read(); + } + + }// end of BZip2CompressionInputStream + +} diff --git a/src/java/org/apache/hadoop/io/compress/BlockCompressorStream.java b/src/java/org/apache/hadoop/io/compress/BlockCompressorStream.java new file mode 100644 index 00000000000..b1fb21f0eaf --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/BlockCompressorStream.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * A {@link org.apache.hadoop.io.compress.CompressorStream} which works + * with 'block-based' based compression algorithms, as opposed to + * 'stream-based' compression algorithms. + * + * It should be noted that this wrapper does not guarantee that blocks will + * be sized for the compressor. If the + * {@link org.apache.hadoop.io.compress.Compressor} requires buffering to + * effect meaningful compression, it is responsible for it. + */ +public class BlockCompressorStream extends CompressorStream { + + // The 'maximum' size of input data to be compressed, to account + // for the overhead of the compression algorithm. + private final int MAX_INPUT_SIZE; + + /** + * Create a {@link BlockCompressorStream}. + * + * @param out stream + * @param compressor compressor to be used + * @param bufferSize size of buffer + * @param compressionOverhead maximum 'overhead' of the compression + * algorithm with given bufferSize + */ + public BlockCompressorStream(OutputStream out, Compressor compressor, + int bufferSize, int compressionOverhead) { + super(out, compressor, bufferSize); + MAX_INPUT_SIZE = bufferSize - compressionOverhead; + } + + /** + * Create a {@link BlockCompressorStream} with given output-stream and + * compressor. + * Use default of 512 as bufferSize and compressionOverhead of + * (1% of bufferSize + 12 bytes) = 18 bytes (zlib algorithm). + * + * @param out stream + * @param compressor compressor to be used + */ + public BlockCompressorStream(OutputStream out, Compressor compressor) { + this(out, compressor, 512, 18); + } + + /** + * Write the data provided to the compression codec, compressing no more + * than the buffer size less the compression overhead as specified during + * construction for each block. + * + * Each block contains the uncompressed length for the block, followed by + * one or more length-prefixed blocks of compressed data. + */ + public void write(byte[] b, int off, int len) throws IOException { + // Sanity checks + if (compressor.finished()) { + throw new IOException("write beyond end of stream"); + } + if (b == null) { + throw new NullPointerException(); + } else if ((off < 0) || (off > b.length) || (len < 0) || + ((off + len) > b.length)) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return; + } + + long limlen = compressor.getBytesRead(); + if (len + limlen > MAX_INPUT_SIZE && limlen > 0) { + // Adding this segment would exceed the maximum size. + // Flush data if we have it. + finish(); + compressor.reset(); + } + + if (len > MAX_INPUT_SIZE) { + // The data we're given exceeds the maximum size. Any data + // we had have been flushed, so we write out this chunk in segments + // not exceeding the maximum size until it is exhausted. + rawWriteInt(len); + do { + int bufLen = Math.min(len, MAX_INPUT_SIZE); + + compressor.setInput(b, off, bufLen); + compressor.finish(); + while (!compressor.finished()) { + compress(); + } + compressor.reset(); + off += bufLen; + len -= bufLen; + } while (len > 0); + return; + } + + // Give data to the compressor + compressor.setInput(b, off, len); + if (!compressor.needsInput()) { + // compressor buffer size might be smaller than the maximum + // size, so we permit it to flush if required. + rawWriteInt((int)compressor.getBytesRead()); + do { + compress(); + } while (!compressor.needsInput()); + } + } + + public void finish() throws IOException { + if (!compressor.finished()) { + rawWriteInt((int)compressor.getBytesRead()); + compressor.finish(); + while (!compressor.finished()) { + compress(); + } + } + } + + protected void compress() throws IOException { + int len = compressor.compress(buffer, 0, buffer.length); + if (len > 0) { + // Write out the compressed chunk + rawWriteInt(len); + out.write(buffer, 0, len); + } + } + + private void rawWriteInt(int v) throws IOException { + out.write((v >>> 24) & 0xFF); + out.write((v >>> 16) & 0xFF); + out.write((v >>> 8) & 0xFF); + out.write((v >>> 0) & 0xFF); + } + +} diff --git a/src/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java b/src/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java new file mode 100644 index 00000000000..96636e7a4ff --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +/** + * A {@link org.apache.hadoop.io.compress.DecompressorStream} which works + * with 'block-based' based compression algorithms, as opposed to + * 'stream-based' compression algorithms. + * + */ +public class BlockDecompressorStream extends DecompressorStream { + private int originalBlockSize = 0; + private int noUncompressedBytes = 0; + + /** + * Create a {@link BlockDecompressorStream}. + * + * @param in input stream + * @param decompressor decompressor to use + * @param bufferSize size of buffer + */ + public BlockDecompressorStream(InputStream in, Decompressor decompressor, + int bufferSize) { + super(in, decompressor, bufferSize); + } + + /** + * Create a {@link BlockDecompressorStream}. + * + * @param in input stream + * @param decompressor decompressor to use + */ + public BlockDecompressorStream(InputStream in, Decompressor decompressor) { + super(in, decompressor); + } + + protected BlockDecompressorStream(InputStream in) { + super(in); + } + + protected int decompress(byte[] b, int off, int len) throws IOException { + // Check if we are the beginning of a block + if (noUncompressedBytes == originalBlockSize) { + // Get original data size + try { + originalBlockSize = rawReadInt(); + } catch (IOException ioe) { + return -1; + } + noUncompressedBytes = 0; + } + + int n = 0; + while ((n = decompressor.decompress(b, off, len)) == 0) { + if (decompressor.finished() || decompressor.needsDictionary()) { + if (noUncompressedBytes >= originalBlockSize) { + eof = true; + return -1; + } + } + if (decompressor.needsInput()) { + getCompressedData(); + } + } + + // Note the no. of decompressed bytes read from 'current' block + noUncompressedBytes += n; + + return n; + } + + protected void getCompressedData() throws IOException { + checkStream(); + + // Get the size of the compressed chunk + int len = rawReadInt(); + + // Read len bytes from underlying stream + if (len > buffer.length) { + buffer = new byte[len]; + } + int n = 0, off = 0; + while (n < len) { + int count = in.read(buffer, off + n, len - n); + if (count < 0) { + throw new EOFException(); + } + n += count; + } + + // Send the read data to the decompressor + decompressor.setInput(buffer, 0, len); + } + + public void resetState() throws IOException { + super.resetState(); + } + + private int rawReadInt() throws IOException { + int b1 = in.read(); + int b2 = in.read(); + int b3 = in.read(); + int b4 = in.read(); + if ((b1 | b2 | b3 | b4) < 0) + throw new EOFException(); + return ((b1 << 24) + (b2 << 16) + (b3 << 8) + (b4 << 0)); + } +} diff --git a/src/java/org/apache/hadoop/io/compress/CodecPool.java b/src/java/org/apache/hadoop/io/compress/CodecPool.java new file mode 100644 index 00000000000..8960b41ef3d --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/CodecPool.java @@ -0,0 +1,154 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.compress; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.ReflectionUtils; + +/** + * A global compressor/decompressor pool used to save and reuse + * (possibly native) compression/decompression codecs. + */ +public class CodecPool { + private static final Log LOG = LogFactory.getLog(CodecPool.class); + + /** + * A global compressor pool used to save the expensive + * construction/destruction of (possibly native) decompression codecs. + */ + private static final Map, List> compressorPool = + new HashMap, List>(); + + /** + * A global decompressor pool used to save the expensive + * construction/destruction of (possibly native) decompression codecs. + */ + private static final Map, List> decompressorPool = + new HashMap, List>(); + + private static T borrow(Map, List> pool, + Class codecClass) { + T codec = null; + + // Check if an appropriate codec is available + synchronized (pool) { + if (pool.containsKey(codecClass)) { + List codecList = pool.get(codecClass); + + if (codecList != null) { + synchronized (codecList) { + if (!codecList.isEmpty()) { + codec = codecList.remove(codecList.size()-1); + } + } + } + } + } + + return codec; + } + + private static void payback(Map, List> pool, T codec) { + if (codec != null) { + Class codecClass = ReflectionUtils.getClass(codec); + synchronized (pool) { + if (!pool.containsKey(codecClass)) { + pool.put(codecClass, new ArrayList()); + } + + List codecList = pool.get(codecClass); + synchronized (codecList) { + codecList.add(codec); + } + } + } + } + + /** + * Get a {@link Compressor} for the given {@link CompressionCodec} from the + * pool or a new one. + * + * @param codec the CompressionCodec for which to get the + * Compressor + * @return Compressor for the given + * CompressionCodec from the pool or a new one + */ + public static Compressor getCompressor(CompressionCodec codec) { + Compressor compressor = borrow(compressorPool, codec.getCompressorType()); + if (compressor == null) { + compressor = codec.createCompressor(); + LOG.info("Got brand-new compressor"); + } else { + LOG.debug("Got recycled compressor"); + } + return compressor; + } + + /** + * Get a {@link Decompressor} for the given {@link CompressionCodec} from the + * pool or a new one. + * + * @param codec the CompressionCodec for which to get the + * Decompressor + * @return Decompressor for the given + * CompressionCodec the pool or a new one + */ + public static Decompressor getDecompressor(CompressionCodec codec) { + Decompressor decompressor = borrow(decompressorPool, codec.getDecompressorType()); + if (decompressor == null) { + decompressor = codec.createDecompressor(); + LOG.info("Got brand-new decompressor"); + } else { + LOG.debug("Got recycled decompressor"); + } + return decompressor; + } + + /** + * Return the {@link Compressor} to the pool. + * + * @param compressor the Compressor to be returned to the pool + */ + public static void returnCompressor(Compressor compressor) { + if (compressor == null) { + return; + } + compressor.reset(); + payback(compressorPool, compressor); + } + + /** + * Return the {@link Decompressor} to the pool. + * + * @param decompressor the Decompressor to be returned to the + * pool + */ + public static void returnDecompressor(Decompressor decompressor) { + if (decompressor == null) { + return; + } + decompressor.reset(); + payback(decompressorPool, decompressor); + } +} diff --git a/src/java/org/apache/hadoop/io/compress/CompressionCodec.java b/src/java/org/apache/hadoop/io/compress/CompressionCodec.java new file mode 100644 index 00000000000..9d9ccd4e632 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/CompressionCodec.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * This class encapsulates a streaming compression/decompression pair. + */ +public interface CompressionCodec { + + /** + * Create a {@link CompressionOutputStream} that will write to the given + * {@link OutputStream}. + * + * @param out the location for the final output stream + * @return a stream the user can write uncompressed data to have it compressed + * @throws IOException + */ + CompressionOutputStream createOutputStream(OutputStream out) + throws IOException; + + /** + * Create a {@link CompressionOutputStream} that will write to the given + * {@link OutputStream} with the given {@link Compressor}. + * + * @param out the location for the final output stream + * @param compressor compressor to use + * @return a stream the user can write uncompressed data to have it compressed + * @throws IOException + */ + CompressionOutputStream createOutputStream(OutputStream out, + Compressor compressor) + throws IOException; + + /** + * Get the type of {@link Compressor} needed by this {@link CompressionCodec}. + * + * @return the type of compressor needed by this codec. + */ + Class getCompressorType(); + + /** + * Create a new {@link Compressor} for use by this {@link CompressionCodec}. + * + * @return a new compressor for use by this codec + */ + Compressor createCompressor(); + + /** + * Create a stream decompressor that will read from the given input stream. + * + * @param in the stream to read compressed bytes from + * @return a stream to read uncompressed bytes from + * @throws IOException + */ + CompressionInputStream createInputStream(InputStream in) throws IOException; + + /** + * Create a {@link CompressionInputStream} that will read from the given + * {@link InputStream} with the given {@link Decompressor}. + * + * @param in the stream to read compressed bytes from + * @param decompressor decompressor to use + * @return a stream to read uncompressed bytes from + * @throws IOException + */ + CompressionInputStream createInputStream(InputStream in, + Decompressor decompressor) + throws IOException; + + + /** + * Get the type of {@link Decompressor} needed by this {@link CompressionCodec}. + * + * @return the type of decompressor needed by this codec. + */ + Class getDecompressorType(); + + /** + * Create a new {@link Decompressor} for use by this {@link CompressionCodec}. + * + * @return a new decompressor for use by this codec + */ + Decompressor createDecompressor(); + + /** + * Get the default filename extension for this kind of compression. + * @return the extension including the '.' + */ + String getDefaultExtension(); +} diff --git a/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java b/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java new file mode 100644 index 00000000000..dae2e68e1c3 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.compress; + +import java.util.*; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.ReflectionUtils; + +/** + * A factory that will find the correct codec for a given filename. + */ +public class CompressionCodecFactory { + + public static final Log LOG = + LogFactory.getLog(CompressionCodecFactory.class.getName()); + + /** + * A map from the reversed filename suffixes to the codecs. + * This is probably overkill, because the maps should be small, but it + * automatically supports finding the longest matching suffix. + */ + private SortedMap codecs = null; + + private void addCodec(CompressionCodec codec) { + String suffix = codec.getDefaultExtension(); + codecs.put(new StringBuffer(suffix).reverse().toString(), codec); + } + + /** + * Print the extension map out as a string. + */ + public String toString() { + StringBuffer buf = new StringBuffer(); + Iterator> itr = + codecs.entrySet().iterator(); + buf.append("{ "); + if (itr.hasNext()) { + Map.Entry entry = itr.next(); + buf.append(entry.getKey()); + buf.append(": "); + buf.append(entry.getValue().getClass().getName()); + while (itr.hasNext()) { + entry = itr.next(); + buf.append(", "); + buf.append(entry.getKey()); + buf.append(": "); + buf.append(entry.getValue().getClass().getName()); + } + } + buf.append(" }"); + return buf.toString(); + } + + /** + * Get the list of codecs listed in the configuration + * @param conf the configuration to look in + * @return a list of the Configuration classes or null if the attribute + * was not set + */ + public static List> getCodecClasses(Configuration conf) { + String codecsString = conf.get("io.compression.codecs"); + if (codecsString != null) { + List> result + = new ArrayList>(); + StringTokenizer codecSplit = new StringTokenizer(codecsString, ","); + while (codecSplit.hasMoreElements()) { + String codecSubstring = codecSplit.nextToken(); + if (codecSubstring.length() != 0) { + try { + Class cls = conf.getClassByName(codecSubstring); + if (!CompressionCodec.class.isAssignableFrom(cls)) { + throw new IllegalArgumentException("Class " + codecSubstring + + " is not a CompressionCodec"); + } + result.add(cls.asSubclass(CompressionCodec.class)); + } catch (ClassNotFoundException ex) { + throw new IllegalArgumentException("Compression codec " + + codecSubstring + " not found.", + ex); + } + } + } + return result; + } else { + return null; + } + } + + /** + * Sets a list of codec classes in the configuration. + * @param conf the configuration to modify + * @param classes the list of classes to set + */ + public static void setCodecClasses(Configuration conf, + List classes) { + StringBuffer buf = new StringBuffer(); + Iterator itr = classes.iterator(); + if (itr.hasNext()) { + Class cls = itr.next(); + buf.append(cls.getName()); + while(itr.hasNext()) { + buf.append(','); + buf.append(itr.next().getName()); + } + } + conf.set("io.compression.codecs", buf.toString()); + } + + /** + * Find the codecs specified in the config value io.compression.codecs + * and register them. Defaults to gzip and zip. + */ + public CompressionCodecFactory(Configuration conf) { + codecs = new TreeMap(); + List> codecClasses = getCodecClasses(conf); + if (codecClasses == null) { + addCodec(new GzipCodec()); + addCodec(new DefaultCodec()); + } else { + Iterator> itr = codecClasses.iterator(); + while (itr.hasNext()) { + CompressionCodec codec = ReflectionUtils.newInstance(itr.next(), conf); + addCodec(codec); + } + } + } + + /** + * Find the relevant compression codec for the given file based on its + * filename suffix. + * @param file the filename to check + * @return the codec object + */ + public CompressionCodec getCodec(Path file) { + CompressionCodec result = null; + if (codecs != null) { + String filename = file.getName(); + String reversedFilename = new StringBuffer(filename).reverse().toString(); + SortedMap subMap = + codecs.headMap(reversedFilename); + if (!subMap.isEmpty()) { + String potentialSuffix = subMap.lastKey(); + if (reversedFilename.startsWith(potentialSuffix)) { + result = codecs.get(potentialSuffix); + } + } + } + return result; + } + + /** + * Removes a suffix from a filename, if it has it. + * @param filename the filename to strip + * @param suffix the suffix to remove + * @return the shortened filename + */ + public static String removeSuffix(String filename, String suffix) { + if (filename.endsWith(suffix)) { + return filename.substring(0, filename.length() - suffix.length()); + } + return filename; + } + + /** + * A little test program. + * @param args + */ + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + CompressionCodecFactory factory = new CompressionCodecFactory(conf); + boolean encode = false; + for(int i=0; i < args.length; ++i) { + if ("-in".equals(args[i])) { + encode = true; + } else if ("-out".equals(args[i])) { + encode = false; + } else { + CompressionCodec codec = factory.getCodec(new Path(args[i])); + if (codec == null) { + System.out.println("Codec for " + args[i] + " not found."); + } else { + if (encode) { + CompressionOutputStream out = + codec.createOutputStream(new java.io.FileOutputStream(args[i])); + byte[] buffer = new byte[100]; + String inFilename = removeSuffix(args[i], + codec.getDefaultExtension()); + java.io.InputStream in = new java.io.FileInputStream(inFilename); + int len = in.read(buffer); + while (len > 0) { + out.write(buffer, 0, len); + len = in.read(buffer); + } + in.close(); + out.close(); + } else { + CompressionInputStream in = + codec.createInputStream(new java.io.FileInputStream(args[i])); + byte[] buffer = new byte[100]; + int len = in.read(buffer); + while (len > 0) { + System.out.write(buffer, 0, len); + len = in.read(buffer); + } + in.close(); + } + } + } + } + } +} diff --git a/src/java/org/apache/hadoop/io/compress/CompressionInputStream.java b/src/java/org/apache/hadoop/io/compress/CompressionInputStream.java new file mode 100644 index 00000000000..aabdd2b5e4d --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/CompressionInputStream.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.IOException; +import java.io.InputStream; + +/** + * A compression input stream. + * + *

Implementations are assumed to be buffered. This permits clients to + * reposition the underlying input stream then call {@link #resetState()}, + * without having to also synchronize client buffers. + */ +public abstract class CompressionInputStream extends InputStream { + /** + * The input stream to be compressed. + */ + protected final InputStream in; + + /** + * Create a compression input stream that reads + * the decompressed bytes from the given stream. + * + * @param in The input stream to be compressed. + */ + protected CompressionInputStream(InputStream in) { + this.in = in; + } + + public void close() throws IOException { + in.close(); + } + + /** + * Read bytes from the stream. + * Made abstract to prevent leakage to underlying stream. + */ + public abstract int read(byte[] b, int off, int len) throws IOException; + + /** + * Reset the decompressor to its initial state and discard any buffered data, + * as the underlying stream may have been repositioned. + */ + public abstract void resetState() throws IOException; + +} diff --git a/src/java/org/apache/hadoop/io/compress/CompressionOutputStream.java b/src/java/org/apache/hadoop/io/compress/CompressionOutputStream.java new file mode 100644 index 00000000000..3b0420f11b5 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/CompressionOutputStream.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * A compression output stream. + */ +public abstract class CompressionOutputStream extends OutputStream { + /** + * The output stream to be compressed. + */ + protected final OutputStream out; + + /** + * Create a compression output stream that writes + * the compressed bytes to the given stream. + * @param out + */ + protected CompressionOutputStream(OutputStream out) { + this.out = out; + } + + public void close() throws IOException { + finish(); + out.close(); + } + + public void flush() throws IOException { + out.flush(); + } + + /** + * Write compressed bytes to the stream. + * Made abstract to prevent leakage to underlying stream. + */ + public abstract void write(byte[] b, int off, int len) throws IOException; + + /** + * Finishes writing compressed data to the output stream + * without closing the underlying stream. + */ + public abstract void finish() throws IOException; + + /** + * Reset the compression to the initial state. + * Does not reset the underlying stream. + */ + public abstract void resetState() throws IOException; + +} diff --git a/src/java/org/apache/hadoop/io/compress/Compressor.java b/src/java/org/apache/hadoop/io/compress/Compressor.java new file mode 100644 index 00000000000..66bc4bfeeda --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/Compressor.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.IOException; + +/** + * Specification of a stream-based 'compressor' which can be + * plugged into a {@link CompressionOutputStream} to compress data. + * This is modelled after {@link java.util.zip.Deflater} + * + */ +public interface Compressor { + /** + * Sets input data for compression. + * This should be called whenever #needsInput() returns + * true indicating that more input data is required. + * + * @param b Input data + * @param off Start offset + * @param len Length + */ + public void setInput(byte[] b, int off, int len); + + /** + * Returns true if the input data buffer is empty and + * #setInput() should be called to provide more input. + * + * @return true if the input data buffer is empty and + * #setInput() should be called in order to provide more input. + */ + public boolean needsInput(); + + /** + * Sets preset dictionary for compression. A preset dictionary + * is used when the history buffer can be predetermined. + * + * @param b Dictionary data bytes + * @param off Start offset + * @param len Length + */ + public void setDictionary(byte[] b, int off, int len); + + /** + * Return number of uncompressed bytes input so far. + */ + public long getBytesRead(); + + /** + * Return number of compressed bytes output so far. + */ + public long getBytesWritten(); + + /** + * When called, indicates that compression should end + * with the current contents of the input buffer. + */ + public void finish(); + + /** + * Returns true if the end of the compressed + * data output stream has been reached. + * @return true if the end of the compressed + * data output stream has been reached. + */ + public boolean finished(); + + /** + * Fills specified buffer with compressed data. Returns actual number + * of bytes of compressed data. A return value of 0 indicates that + * needsInput() should be called in order to determine if more input + * data is required. + * + * @param b Buffer for the compressed data + * @param off Start offset of the data + * @param len Size of the buffer + * @return The actual number of bytes of compressed data. + */ + public int compress(byte[] b, int off, int len) throws IOException; + + /** + * Resets compressor so that a new set of input data can be processed. + */ + public void reset(); + + /** + * Closes the compressor and discards any unprocessed input. + */ + public void end(); +} diff --git a/src/java/org/apache/hadoop/io/compress/CompressorStream.java b/src/java/org/apache/hadoop/io/compress/CompressorStream.java new file mode 100644 index 00000000000..6917ebfd051 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/CompressorStream.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.Compressor; + +public class CompressorStream extends CompressionOutputStream { + protected Compressor compressor; + protected byte[] buffer; + protected boolean closed = false; + + public CompressorStream(OutputStream out, Compressor compressor, int bufferSize) { + super(out); + + if (out == null || compressor == null) { + throw new NullPointerException(); + } else if (bufferSize <= 0) { + throw new IllegalArgumentException("Illegal bufferSize"); + } + + this.compressor = compressor; + buffer = new byte[bufferSize]; + } + + public CompressorStream(OutputStream out, Compressor compressor) { + this(out, compressor, 512); + } + + /** + * Allow derived classes to directly set the underlying stream. + * + * @param out Underlying output stream. + */ + protected CompressorStream(OutputStream out) { + super(out); + } + + public void write(byte[] b, int off, int len) throws IOException { + // Sanity checks + if (compressor.finished()) { + throw new IOException("write beyond end of stream"); + } + if ((off | len | (off + len) | (b.length - (off + len))) < 0) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return; + } + + compressor.setInput(b, off, len); + while (!compressor.needsInput()) { + compress(); + } + } + + protected void compress() throws IOException { + int len = compressor.compress(buffer, 0, buffer.length); + if (len > 0) { + out.write(buffer, 0, len); + } + } + + public void finish() throws IOException { + if (!compressor.finished()) { + compressor.finish(); + while (!compressor.finished()) { + compress(); + } + } + } + + public void resetState() throws IOException { + compressor.reset(); + } + + public void close() throws IOException { + if (!closed) { + finish(); + out.close(); + closed = true; + } + } + + private byte[] oneByte = new byte[1]; + public void write(int b) throws IOException { + oneByte[0] = (byte)(b & 0xff); + write(oneByte, 0, oneByte.length); + } + +} diff --git a/src/java/org/apache/hadoop/io/compress/Decompressor.java b/src/java/org/apache/hadoop/io/compress/Decompressor.java new file mode 100644 index 00000000000..5832a4a741c --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/Decompressor.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.IOException; + +/** + * Specification of a stream-based 'de-compressor' which can be + * plugged into a {@link CompressionInputStream} to compress data. + * This is modelled after {@link java.util.zip.Inflater} + * + */ +public interface Decompressor { + /** + * Sets input data for decompression. + * This should be called whenever #needsInput() returns + * true indicating that more input data is required. + * + * @param b Input data + * @param off Start offset + * @param len Length + */ + public void setInput(byte[] b, int off, int len); + + /** + * Returns true if the input data buffer is empty and + * #setInput() should be called to provide more input. + * + * @return true if the input data buffer is empty and + * #setInput() should be called in order to provide more input. + */ + public boolean needsInput(); + + /** + * Sets preset dictionary for compression. A preset dictionary + * is used when the history buffer can be predetermined. + * + * @param b Dictionary data bytes + * @param off Start offset + * @param len Length + */ + public void setDictionary(byte[] b, int off, int len); + + /** + * Returns true if a preset dictionary is needed for decompression. + * @return true if a preset dictionary is needed for decompression + */ + public boolean needsDictionary(); + + /** + * Returns true if the end of the compressed + * data output stream has been reached. + * @return true if the end of the compressed + * data output stream has been reached. + */ + public boolean finished(); + + /** + * Fills specified buffer with uncompressed data. Returns actual number + * of bytes of uncompressed data. A return value of 0 indicates that + * #needsInput() should be called in order to determine if more input + * data is required. + * + * @param b Buffer for the compressed data + * @param off Start offset of the data + * @param len Size of the buffer + * @return The actual number of bytes of compressed data. + * @throws IOException + */ + public int decompress(byte[] b, int off, int len) throws IOException; + + /** + * Resets decompressor so that a new set of input data can be processed. + */ + public void reset(); + + /** + * Closes the decompressor and discards any unprocessed input. + */ + public void end(); +} diff --git a/src/java/org/apache/hadoop/io/compress/DecompressorStream.java b/src/java/org/apache/hadoop/io/compress/DecompressorStream.java new file mode 100644 index 00000000000..a84bea443e4 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/DecompressorStream.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.hadoop.io.compress.Decompressor; + +public class DecompressorStream extends CompressionInputStream { + protected Decompressor decompressor = null; + protected byte[] buffer; + protected boolean eof = false; + protected boolean closed = false; + + public DecompressorStream(InputStream in, Decompressor decompressor, int bufferSize) { + super(in); + + if (in == null || decompressor == null) { + throw new NullPointerException(); + } else if (bufferSize <= 0) { + throw new IllegalArgumentException("Illegal bufferSize"); + } + + this.decompressor = decompressor; + buffer = new byte[bufferSize]; + } + + public DecompressorStream(InputStream in, Decompressor decompressor) { + this(in, decompressor, 512); + } + + /** + * Allow derived classes to directly set the underlying stream. + * + * @param in Underlying input stream. + */ + protected DecompressorStream(InputStream in) { + super(in); + } + + private byte[] oneByte = new byte[1]; + public int read() throws IOException { + checkStream(); + return (read(oneByte, 0, oneByte.length) == -1) ? -1 : (oneByte[0] & 0xff); + } + + public int read(byte[] b, int off, int len) throws IOException { + checkStream(); + + if ((off | len | (off + len) | (b.length - (off + len))) < 0) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return 0; + } + + return decompress(b, off, len); + } + + protected int decompress(byte[] b, int off, int len) throws IOException { + int n = 0; + + while ((n = decompressor.decompress(b, off, len)) == 0) { + if (decompressor.finished() || decompressor.needsDictionary()) { + eof = true; + return -1; + } + if (decompressor.needsInput()) { + getCompressedData(); + } + } + + return n; + } + + protected void getCompressedData() throws IOException { + checkStream(); + + int n = in.read(buffer, 0, buffer.length); + if (n == -1) { + throw new EOFException("Unexpected end of input stream"); + } + + decompressor.setInput(buffer, 0, n); + } + + protected void checkStream() throws IOException { + if (closed) { + throw new IOException("Stream closed"); + } + } + + public void resetState() throws IOException { + decompressor.reset(); + } + + private byte[] skipBytes = new byte[512]; + public long skip(long n) throws IOException { + // Sanity checks + if (n < 0) { + throw new IllegalArgumentException("negative skip length"); + } + checkStream(); + + // Read 'n' bytes + int skipped = 0; + while (skipped < n) { + int len = Math.min(((int)n - skipped), skipBytes.length); + len = read(skipBytes, 0, len); + if (len == -1) { + eof = true; + break; + } + skipped += len; + } + return skipped; + } + + public int available() throws IOException { + checkStream(); + return (eof) ? 0 : 1; + } + + public void close() throws IOException { + if (!closed) { + in.close(); + closed = true; + } + } + + public boolean markSupported() { + return false; + } + + public synchronized void mark(int readlimit) { + } + + public synchronized void reset() throws IOException { + throw new IOException("mark/reset not supported"); + } + +} diff --git a/src/java/org/apache/hadoop/io/compress/DefaultCodec.java b/src/java/org/apache/hadoop/io/compress/DefaultCodec.java new file mode 100644 index 00000000000..29dc140c00c --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/DefaultCodec.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.InputStream; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.compress.zlib.*; + +public class DefaultCodec implements Configurable, CompressionCodec { + + Configuration conf; + + public void setConf(Configuration conf) { + this.conf = conf; + } + + public Configuration getConf() { + return conf; + } + + public CompressionOutputStream createOutputStream(OutputStream out) + throws IOException { + return new CompressorStream(out, createCompressor(), + conf.getInt("io.file.buffer.size", 4*1024)); + } + + public CompressionOutputStream createOutputStream(OutputStream out, + Compressor compressor) + throws IOException { + return new CompressorStream(out, compressor, + conf.getInt("io.file.buffer.size", 4*1024)); + } + + public Class getCompressorType() { + return ZlibFactory.getZlibCompressorType(conf); + } + + public Compressor createCompressor() { + return ZlibFactory.getZlibCompressor(conf); + } + + public CompressionInputStream createInputStream(InputStream in) + throws IOException { + return new DecompressorStream(in, createDecompressor(), + conf.getInt("io.file.buffer.size", 4*1024)); + } + + public CompressionInputStream createInputStream(InputStream in, + Decompressor decompressor) + throws IOException { + return new DecompressorStream(in, decompressor, + conf.getInt("io.file.buffer.size", 4*1024)); + } + + public Class getDecompressorType() { + return ZlibFactory.getZlibDecompressorType(conf); + } + + public Decompressor createDecompressor() { + return ZlibFactory.getZlibDecompressor(conf); + } + + public String getDefaultExtension() { + return ".deflate"; + } + +} diff --git a/src/java/org/apache/hadoop/io/compress/GzipCodec.java b/src/java/org/apache/hadoop/io/compress/GzipCodec.java new file mode 100644 index 00000000000..674dce280fe --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/GzipCodec.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress; + +import java.io.*; +import java.util.zip.GZIPOutputStream; +import java.util.zip.GZIPInputStream; + +import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.hadoop.io.compress.zlib.*; + +/** + * This class creates gzip compressors/decompressors. + */ +public class GzipCodec extends DefaultCodec { + /** + * A bridge that wraps around a DeflaterOutputStream to make it + * a CompressionOutputStream. + */ + protected static class GzipOutputStream extends CompressorStream { + + private static class ResetableGZIPOutputStream extends GZIPOutputStream { + + public ResetableGZIPOutputStream(OutputStream out) throws IOException { + super(out); + } + + public void resetState() throws IOException { + def.reset(); + } + } + + public GzipOutputStream(OutputStream out) throws IOException { + super(new ResetableGZIPOutputStream(out)); + } + + /** + * Allow children types to put a different type in here. + * @param out the Deflater stream to use + */ + protected GzipOutputStream(CompressorStream out) { + super(out); + } + + public void close() throws IOException { + out.close(); + } + + public void flush() throws IOException { + out.flush(); + } + + public void write(int b) throws IOException { + out.write(b); + } + + public void write(byte[] data, int offset, int length) + throws IOException { + out.write(data, offset, length); + } + + public void finish() throws IOException { + ((ResetableGZIPOutputStream) out).finish(); + } + + public void resetState() throws IOException { + ((ResetableGZIPOutputStream) out).resetState(); + } + } + + protected static class GzipInputStream extends DecompressorStream { + + private static class ResetableGZIPInputStream extends GZIPInputStream { + + public ResetableGZIPInputStream(InputStream in) throws IOException { + super(in); + } + + public void resetState() throws IOException { + inf.reset(); + } + } + + public GzipInputStream(InputStream in) throws IOException { + super(new ResetableGZIPInputStream(in)); + } + + /** + * Allow subclasses to directly set the inflater stream. + */ + protected GzipInputStream(DecompressorStream in) { + super(in); + } + + public int available() throws IOException { + return in.available(); + } + + public void close() throws IOException { + in.close(); + } + + public int read() throws IOException { + return in.read(); + } + + public int read(byte[] data, int offset, int len) throws IOException { + return in.read(data, offset, len); + } + + public long skip(long offset) throws IOException { + return in.skip(offset); + } + + public void resetState() throws IOException { + ((ResetableGZIPInputStream) in).resetState(); + } + } + + public CompressionOutputStream createOutputStream(OutputStream out) + throws IOException { + return (ZlibFactory.isNativeZlibLoaded(conf)) ? + new CompressorStream(out, createCompressor(), + conf.getInt("io.file.buffer.size", 4*1024)) : + new GzipOutputStream(out); + } + + public CompressionOutputStream createOutputStream(OutputStream out, + Compressor compressor) + throws IOException { + return (compressor != null) ? + new CompressorStream(out, compressor, + conf.getInt("io.file.buffer.size", + 4*1024)) : + createOutputStream(out); + + } + + public Compressor createCompressor() { + return (ZlibFactory.isNativeZlibLoaded(conf)) + ? new GzipZlibCompressor() + : null; + } + + public Class getCompressorType() { + return ZlibFactory.isNativeZlibLoaded(conf) + ? GzipZlibCompressor.class + : BuiltInZlibDeflater.class; + } + + public CompressionInputStream createInputStream(InputStream in) + throws IOException { + return (ZlibFactory.isNativeZlibLoaded(conf)) ? + new DecompressorStream(in, createDecompressor(), + conf.getInt("io.file.buffer.size", + 4*1024)) : + new GzipInputStream(in); + } + + public CompressionInputStream createInputStream(InputStream in, + Decompressor decompressor) + throws IOException { + return (decompressor != null) ? + new DecompressorStream(in, decompressor, + conf.getInt("io.file.buffer.size", + 4*1024)) : + createInputStream(in); + } + + public Decompressor createDecompressor() { + return (ZlibFactory.isNativeZlibLoaded(conf)) + ? new GzipZlibDecompressor() + : null; + } + + public Class getDecompressorType() { + return ZlibFactory.isNativeZlibLoaded(conf) + ? GzipZlibDecompressor.class + : BuiltInZlibInflater.class; + } + + public String getDefaultExtension() { + return ".gz"; + } + + static final class GzipZlibCompressor extends ZlibCompressor { + public GzipZlibCompressor() { + super(ZlibCompressor.CompressionLevel.DEFAULT_COMPRESSION, + ZlibCompressor.CompressionStrategy.DEFAULT_STRATEGY, + ZlibCompressor.CompressionHeader.GZIP_FORMAT, 64*1024); + } + } + + static final class GzipZlibDecompressor extends ZlibDecompressor { + public GzipZlibDecompressor() { + super(ZlibDecompressor.CompressionHeader.AUTODETECT_GZIP_ZLIB, 64*1024); + } + } + +} diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/BZip2Constants.java b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2Constants.java new file mode 100644 index 00000000000..99dc28146d3 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2Constants.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* + * This package is based on the work done by Keiron Liddle, Aftex Software + * to whom the Ant project is very grateful for his + * great code. + */ + +package org.apache.hadoop.io.compress.bzip2; + +/** + * Base class for both the compress and decompress classes. Holds common arrays, + * and static data. + *

+ * This interface is public for historical purposes. You should have no need to + * use it. + *

+ */ +public interface BZip2Constants { + + int baseBlockSize = 100000; + int MAX_ALPHA_SIZE = 258; + int MAX_CODE_LEN = 23; + int RUNA = 0; + int RUNB = 1; + int N_GROUPS = 6; + int G_SIZE = 50; + int N_ITERS = 4; + int MAX_SELECTORS = (2 + (900000 / G_SIZE)); + int NUM_OVERSHOOT_BYTES = 20; + + /** + * This array really shouldn't be here. Again, for historical purposes it + * is. + * + *

+ * FIXME: This array should be in a private or package private location, + * since it could be modified by malicious code. + *

+ */ + final int[] rNums = { 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, + 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 733, 859, 335, + 708, 621, 574, 73, 654, 730, 472, 419, 436, 278, 496, 867, 210, + 399, 680, 480, 51, 878, 465, 811, 169, 869, 675, 611, 697, 867, + 561, 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 150, 238, + 59, 379, 684, 877, 625, 169, 643, 105, 170, 607, 520, 932, 727, + 476, 693, 425, 174, 647, 73, 122, 335, 530, 442, 853, 695, 249, + 445, 515, 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 641, + 801, 220, 162, 819, 984, 589, 513, 495, 799, 161, 604, 958, 533, + 221, 400, 386, 867, 600, 782, 382, 596, 414, 171, 516, 375, 682, + 485, 911, 276, 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, + 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 469, 68, 770, + 919, 190, 373, 294, 822, 808, 206, 184, 943, 795, 384, 383, 461, + 404, 758, 839, 887, 715, 67, 618, 276, 204, 918, 873, 777, 604, + 560, 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 652, 934, 970, + 447, 318, 353, 859, 672, 112, 785, 645, 863, 803, 350, 139, 93, + 354, 99, 820, 908, 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, + 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 411, 521, 938, + 300, 821, 78, 343, 175, 128, 250, 170, 774, 972, 275, 999, 639, + 495, 78, 352, 126, 857, 956, 358, 619, 580, 124, 737, 594, 701, + 612, 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 944, 375, + 748, 52, 600, 747, 642, 182, 862, 81, 344, 805, 988, 739, 511, 655, + 814, 334, 249, 515, 897, 955, 664, 981, 649, 113, 974, 459, 893, + 228, 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 686, 754, + 806, 760, 493, 403, 415, 394, 687, 700, 946, 670, 656, 610, 738, + 392, 760, 799, 887, 653, 978, 321, 576, 617, 626, 502, 894, 679, + 243, 440, 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 707, + 151, 457, 449, 797, 195, 791, 558, 945, 679, 297, 59, 87, 824, 713, + 663, 412, 693, 342, 606, 134, 108, 571, 364, 631, 212, 174, 643, + 304, 329, 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 140, + 206, 73, 263, 980, 736, 876, 478, 430, 305, 170, 514, 364, 692, + 829, 82, 855, 953, 676, 246, 369, 970, 294, 750, 807, 827, 150, + 790, 288, 923, 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, + 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 661, 821, 976, + 991, 658, 869, 905, 758, 745, 193, 768, 550, 608, 933, 378, 286, + 215, 979, 792, 961, 61, 688, 793, 644, 986, 403, 106, 366, 905, + 644, 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 780, 773, + 635, 389, 707, 100, 626, 958, 165, 504, 920, 176, 193, 713, 857, + 265, 203, 50, 668, 108, 645, 990, 626, 197, 510, 357, 358, 850, + 858, 364, 936, 638 }; +} diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyCompressor.java b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyCompressor.java new file mode 100644 index 00000000000..2594717113c --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyCompressor.java @@ -0,0 +1,62 @@ +package org.apache.hadoop.io.compress.bzip2; + +import java.io.IOException; + +import org.apache.hadoop.io.compress.Compressor; + +/** + * This is a dummy compressor for BZip2. + */ +public class BZip2DummyCompressor implements Compressor { + + @Override + public int compress(byte[] b, int off, int len) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void end() { + throw new UnsupportedOperationException(); + } + + @Override + public void finish() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean finished() { + throw new UnsupportedOperationException(); + } + + @Override + public long getBytesRead() { + throw new UnsupportedOperationException(); + } + + @Override + public long getBytesWritten() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean needsInput() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset() { + // do nothing + } + + @Override + public void setDictionary(byte[] b, int off, int len) { + throw new UnsupportedOperationException(); + } + + @Override + public void setInput(byte[] b, int off, int len) { + throw new UnsupportedOperationException(); + } + +} diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyDecompressor.java b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyDecompressor.java new file mode 100644 index 00000000000..15308fbd038 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/bzip2/BZip2DummyDecompressor.java @@ -0,0 +1,52 @@ +package org.apache.hadoop.io.compress.bzip2; + +import java.io.IOException; + +import org.apache.hadoop.io.compress.Decompressor; + +/** + * This is a dummy decompressor for BZip2. + */ +public class BZip2DummyDecompressor implements Decompressor { + + @Override + public int decompress(byte[] b, int off, int len) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void end() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean finished() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean needsDictionary() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean needsInput() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset() { + // do nothing + } + + @Override + public void setDictionary(byte[] b, int off, int len) { + throw new UnsupportedOperationException(); + } + + @Override + public void setInput(byte[] b, int off, int len) { + throw new UnsupportedOperationException(); + } + +} diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java b/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java new file mode 100644 index 00000000000..567cb5efd3f --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java @@ -0,0 +1,969 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* + * This package is based on the work done by Keiron Liddle, Aftex Software + * to whom the Ant project is very grateful for his + * great code. + */ +package org.apache.hadoop.io.compress.bzip2; + +import java.io.InputStream; +import java.io.IOException; + +/** + * An input stream that decompresses from the BZip2 format (without the file + * header chars) to be read as any other stream. + * + *

+ * The decompression requires large amounts of memory. Thus you should call the + * {@link #close() close()} method as soon as possible, to force + * CBZip2InputStream to release the allocated memory. See + * {@link CBZip2OutputStream CBZip2OutputStream} for information about memory + * usage. + *

+ * + *

+ * CBZip2InputStream reads bytes from the compressed source stream via + * the single byte {@link java.io.InputStream#read() read()} method exclusively. + * Thus you should consider to use a buffered source stream. + *

+ * + *

+ * Instances of this class are not threadsafe. + *

+ */ +public class CBZip2InputStream extends InputStream implements BZip2Constants { + + private static void reportCRCError() throws IOException { + + throw new IOException("BZip2 CRC error"); + + } + + private void makeMaps() { + final boolean[] inUse = this.data.inUse; + final byte[] seqToUnseq = this.data.seqToUnseq; + + int nInUseShadow = 0; + + for (int i = 0; i < 256; i++) { + if (inUse[i]) + seqToUnseq[nInUseShadow++] = (byte) i; + } + + this.nInUse = nInUseShadow; + } + + /** + * Index of the last char in the block, so the block size == last + 1. + */ + private int last; + + /** + * Index in zptr[] of original string after sorting. + */ + private int origPtr; + + /** + * always: in the range 0 .. 9. The current block size is 100000 * this + * number. + */ + private int blockSize100k; + + private boolean blockRandomised; + + private int bsBuff; + private int bsLive; + private final CRC crc = new CRC(); + + private int nInUse; + + private InputStream in; + + private int currentChar = -1; + + private static final int EOF = 0; + private static final int START_BLOCK_STATE = 1; + private static final int RAND_PART_A_STATE = 2; + private static final int RAND_PART_B_STATE = 3; + private static final int RAND_PART_C_STATE = 4; + private static final int NO_RAND_PART_A_STATE = 5; + private static final int NO_RAND_PART_B_STATE = 6; + private static final int NO_RAND_PART_C_STATE = 7; + + private int currentState = START_BLOCK_STATE; + + private int storedBlockCRC, storedCombinedCRC; + private int computedBlockCRC, computedCombinedCRC; + + // Variables used by setup* methods exclusively + + private int su_count; + private int su_ch2; + private int su_chPrev; + private int su_i2; + private int su_j2; + private int su_rNToGo; + private int su_rTPos; + private int su_tPos; + private char su_z; + + /** + * All memory intensive stuff. This field is initialized by initBlock(). + */ + private CBZip2InputStream.Data data; + + /** + * Constructs a new CBZip2InputStream which decompresses bytes read from the + * specified stream. + * + *

+ * Although BZip2 headers are marked with the magic "Bz" this + * constructor expects the next byte in the stream to be the first one after + * the magic. Thus callers have to skip the first two bytes. Otherwise this + * constructor will throw an exception. + *

+ * + * @throws IOException + * if the stream content is malformed or an I/O error occurs. + * @throws NullPointerException + * if in == null + */ + public CBZip2InputStream(final InputStream in) throws IOException { + super(); + + this.in = in; + init(); + } + + public int read() throws IOException { + if (this.in != null) { + return read0(); + } else { + throw new IOException("stream closed"); + } + } + + public int read(final byte[] dest, final int offs, final int len) + throws IOException { + if (offs < 0) { + throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); + } + if (len < 0) { + throw new IndexOutOfBoundsException("len(" + len + ") < 0."); + } + if (offs + len > dest.length) { + throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + + len + ") > dest.length(" + dest.length + ")."); + } + if (this.in == null) { + throw new IOException("stream closed"); + } + + final int hi = offs + len; + int destOffs = offs; + for (int b; (destOffs < hi) && ((b = read0()) >= 0);) { + dest[destOffs++] = (byte) b; + } + + return (destOffs == offs) ? -1 : (destOffs - offs); + } + + private int read0() throws IOException { + final int retChar = this.currentChar; + + switch (this.currentState) { + case EOF: + return -1; + + case START_BLOCK_STATE: + throw new IllegalStateException(); + + case RAND_PART_A_STATE: + throw new IllegalStateException(); + + case RAND_PART_B_STATE: + setupRandPartB(); + break; + + case RAND_PART_C_STATE: + setupRandPartC(); + break; + + case NO_RAND_PART_A_STATE: + throw new IllegalStateException(); + + case NO_RAND_PART_B_STATE: + setupNoRandPartB(); + break; + + case NO_RAND_PART_C_STATE: + setupNoRandPartC(); + break; + + default: + throw new IllegalStateException(); + } + + return retChar; + } + + private void init() throws IOException { + int magic2 = this.in.read(); + if (magic2 != 'h') { + throw new IOException("Stream is not BZip2 formatted: expected 'h'" + + " as first byte but got '" + (char) magic2 + "'"); + } + + int blockSize = this.in.read(); + if ((blockSize < '1') || (blockSize > '9')) { + throw new IOException("Stream is not BZip2 formatted: illegal " + + "blocksize " + (char) blockSize); + } + + this.blockSize100k = blockSize - '0'; + + initBlock(); + setupBlock(); + } + + private void initBlock() throws IOException { + char magic0 = bsGetUByte(); + char magic1 = bsGetUByte(); + char magic2 = bsGetUByte(); + char magic3 = bsGetUByte(); + char magic4 = bsGetUByte(); + char magic5 = bsGetUByte(); + + if (magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45 + && magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90) { + complete(); // end of file + } else if (magic0 != 0x31 || // '1' + magic1 != 0x41 || // ')' + magic2 != 0x59 || // 'Y' + magic3 != 0x26 || // '&' + magic4 != 0x53 || // 'S' + magic5 != 0x59 // 'Y' + ) { + this.currentState = EOF; + throw new IOException("bad block header"); + } else { + this.storedBlockCRC = bsGetInt(); + this.blockRandomised = bsR(1) == 1; + + /** + * Allocate data here instead in constructor, so we do not allocate + * it if the input file is empty. + */ + if (this.data == null) { + this.data = new Data(this.blockSize100k); + } + + // currBlockNo++; + getAndMoveToFrontDecode(); + + this.crc.initialiseCRC(); + this.currentState = START_BLOCK_STATE; + } + } + + private void endBlock() throws IOException { + this.computedBlockCRC = this.crc.getFinalCRC(); + + // A bad CRC is considered a fatal error. + if (this.storedBlockCRC != this.computedBlockCRC) { + // make next blocks readable without error + // (repair feature, not yet documented, not tested) + this.computedCombinedCRC = (this.storedCombinedCRC << 1) + | (this.storedCombinedCRC >>> 31); + this.computedCombinedCRC ^= this.storedBlockCRC; + + reportCRCError(); + } + + this.computedCombinedCRC = (this.computedCombinedCRC << 1) + | (this.computedCombinedCRC >>> 31); + this.computedCombinedCRC ^= this.computedBlockCRC; + } + + private void complete() throws IOException { + this.storedCombinedCRC = bsGetInt(); + this.currentState = EOF; + this.data = null; + + if (this.storedCombinedCRC != this.computedCombinedCRC) { + reportCRCError(); + } + } + + public void close() throws IOException { + InputStream inShadow = this.in; + if (inShadow != null) { + try { + if (inShadow != System.in) { + inShadow.close(); + } + } finally { + this.data = null; + this.in = null; + } + } + } + + private int bsR(final int n) throws IOException { + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + if (bsLiveShadow < n) { + final InputStream inShadow = this.in; + do { + int thech = inShadow.read(); + + if (thech < 0) { + throw new IOException("unexpected end of stream"); + } + + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + } while (bsLiveShadow < n); + + this.bsBuff = bsBuffShadow; + } + + this.bsLive = bsLiveShadow - n; + return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1); + } + + private boolean bsGetBit() throws IOException { + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + if (bsLiveShadow < 1) { + int thech = this.in.read(); + + if (thech < 0) { + throw new IOException("unexpected end of stream"); + } + + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + this.bsBuff = bsBuffShadow; + } + + this.bsLive = bsLiveShadow - 1; + return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0; + } + + private char bsGetUByte() throws IOException { + return (char) bsR(8); + } + + private int bsGetInt() throws IOException { + return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8); + } + + /** + * Called by createHuffmanDecodingTables() exclusively. + */ + private static void hbCreateDecodeTables(final int[] limit, + final int[] base, final int[] perm, final char[] length, + final int minLen, final int maxLen, final int alphaSize) { + for (int i = minLen, pp = 0; i <= maxLen; i++) { + for (int j = 0; j < alphaSize; j++) { + if (length[j] == i) { + perm[pp++] = j; + } + } + } + + for (int i = MAX_CODE_LEN; --i > 0;) { + base[i] = 0; + limit[i] = 0; + } + + for (int i = 0; i < alphaSize; i++) { + base[length[i] + 1]++; + } + + for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { + b += base[i]; + base[i] = b; + } + + for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { + final int nb = base[i + 1]; + vec += nb - b; + b = nb; + limit[i] = vec - 1; + vec <<= 1; + } + + for (int i = minLen + 1; i <= maxLen; i++) { + base[i] = ((limit[i - 1] + 1) << 1) - base[i]; + } + } + + private void recvDecodingTables() throws IOException { + final Data dataShadow = this.data; + final boolean[] inUse = dataShadow.inUse; + final byte[] pos = dataShadow.recvDecodingTables_pos; + final byte[] selector = dataShadow.selector; + final byte[] selectorMtf = dataShadow.selectorMtf; + + int inUse16 = 0; + + /* Receive the mapping table */ + for (int i = 0; i < 16; i++) { + if (bsGetBit()) { + inUse16 |= 1 << i; + } + } + + for (int i = 256; --i >= 0;) { + inUse[i] = false; + } + + for (int i = 0; i < 16; i++) { + if ((inUse16 & (1 << i)) != 0) { + final int i16 = i << 4; + for (int j = 0; j < 16; j++) { + if (bsGetBit()) { + inUse[i16 + j] = true; + } + } + } + } + + makeMaps(); + final int alphaSize = this.nInUse + 2; + + /* Now the selectors */ + final int nGroups = bsR(3); + final int nSelectors = bsR(15); + + for (int i = 0; i < nSelectors; i++) { + int j = 0; + while (bsGetBit()) { + j++; + } + selectorMtf[i] = (byte) j; + } + + /* Undo the MTF values for the selectors. */ + for (int v = nGroups; --v >= 0;) { + pos[v] = (byte) v; + } + + for (int i = 0; i < nSelectors; i++) { + int v = selectorMtf[i] & 0xff; + final byte tmp = pos[v]; + while (v > 0) { + // nearly all times v is zero, 4 in most other cases + pos[v] = pos[v - 1]; + v--; + } + pos[0] = tmp; + selector[i] = tmp; + } + + final char[][] len = dataShadow.temp_charArray2d; + + /* Now the coding tables */ + for (int t = 0; t < nGroups; t++) { + int curr = bsR(5); + final char[] len_t = len[t]; + for (int i = 0; i < alphaSize; i++) { + while (bsGetBit()) { + curr += bsGetBit() ? -1 : 1; + } + len_t[i] = (char) curr; + } + } + + // finally create the Huffman tables + createHuffmanDecodingTables(alphaSize, nGroups); + } + + /** + * Called by recvDecodingTables() exclusively. + */ + private void createHuffmanDecodingTables(final int alphaSize, + final int nGroups) { + final Data dataShadow = this.data; + final char[][] len = dataShadow.temp_charArray2d; + final int[] minLens = dataShadow.minLens; + final int[][] limit = dataShadow.limit; + final int[][] base = dataShadow.base; + final int[][] perm = dataShadow.perm; + + for (int t = 0; t < nGroups; t++) { + int minLen = 32; + int maxLen = 0; + final char[] len_t = len[t]; + for (int i = alphaSize; --i >= 0;) { + final char lent = len_t[i]; + if (lent > maxLen) { + maxLen = lent; + } + if (lent < minLen) { + minLen = lent; + } + } + hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, + maxLen, alphaSize); + minLens[t] = minLen; + } + } + + private void getAndMoveToFrontDecode() throws IOException { + this.origPtr = bsR(24); + recvDecodingTables(); + + final InputStream inShadow = this.in; + final Data dataShadow = this.data; + final byte[] ll8 = dataShadow.ll8; + final int[] unzftab = dataShadow.unzftab; + final byte[] selector = dataShadow.selector; + final byte[] seqToUnseq = dataShadow.seqToUnseq; + final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; + final int[] minLens = dataShadow.minLens; + final int[][] limit = dataShadow.limit; + final int[][] base = dataShadow.base; + final int[][] perm = dataShadow.perm; + final int limitLast = this.blockSize100k * 100000; + + /* + * Setting up the unzftab entries here is not strictly necessary, but it + * does save having to do it later in a separate pass, and so saves a + * block's worth of cache misses. + */ + for (int i = 256; --i >= 0;) { + yy[i] = (char) i; + unzftab[i] = 0; + } + + int groupNo = 0; + int groupPos = G_SIZE - 1; + final int eob = this.nInUse + 1; + int nextSym = getAndMoveToFrontDecode0(0); + int bsBuffShadow = this.bsBuff; + int bsLiveShadow = this.bsLive; + int lastShadow = -1; + int zt = selector[groupNo] & 0xff; + int[] base_zt = base[zt]; + int[] limit_zt = limit[zt]; + int[] perm_zt = perm[zt]; + int minLens_zt = minLens[zt]; + + while (nextSym != eob) { + if ((nextSym == RUNA) || (nextSym == RUNB)) { + int s = -1; + + for (int n = 1; true; n <<= 1) { + if (nextSym == RUNA) { + s += n; + } else if (nextSym == RUNB) { + s += n << 1; + } else { + break; + } + + if (groupPos == 0) { + groupPos = G_SIZE - 1; + zt = selector[++groupNo] & 0xff; + base_zt = base[zt]; + limit_zt = limit[zt]; + perm_zt = perm[zt]; + minLens_zt = minLens[zt]; + } else { + groupPos--; + } + + int zn = minLens_zt; + + // Inlined: + // int zvec = bsR(zn); + while (bsLiveShadow < zn) { + final int thech = inShadow.read(); + if (thech >= 0) { + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); + } + } + int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) + & ((1 << zn) - 1); + bsLiveShadow -= zn; + + while (zvec > limit_zt[zn]) { + zn++; + while (bsLiveShadow < 1) { + final int thech = inShadow.read(); + if (thech >= 0) { + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + continue; + } else { + throw new IOException( + "unexpected end of stream"); + } + } + bsLiveShadow--; + zvec = (zvec << 1) + | ((bsBuffShadow >> bsLiveShadow) & 1); + } + nextSym = perm_zt[zvec - base_zt[zn]]; + } + + final byte ch = seqToUnseq[yy[0]]; + unzftab[ch & 0xff] += s + 1; + + while (s-- >= 0) { + ll8[++lastShadow] = ch; + } + + if (lastShadow >= limitLast) { + throw new IOException("block overrun"); + } + } else { + if (++lastShadow >= limitLast) { + throw new IOException("block overrun"); + } + + final char tmp = yy[nextSym - 1]; + unzftab[seqToUnseq[tmp] & 0xff]++; + ll8[lastShadow] = seqToUnseq[tmp]; + + /* + * This loop is hammered during decompression, hence avoid + * native method call overhead of System.arraycopy for very + * small ranges to copy. + */ + if (nextSym <= 16) { + for (int j = nextSym - 1; j > 0;) { + yy[j] = yy[--j]; + } + } else { + System.arraycopy(yy, 0, yy, 1, nextSym - 1); + } + + yy[0] = tmp; + + if (groupPos == 0) { + groupPos = G_SIZE - 1; + zt = selector[++groupNo] & 0xff; + base_zt = base[zt]; + limit_zt = limit[zt]; + perm_zt = perm[zt]; + minLens_zt = minLens[zt]; + } else { + groupPos--; + } + + int zn = minLens_zt; + + // Inlined: + // int zvec = bsR(zn); + while (bsLiveShadow < zn) { + final int thech = inShadow.read(); + if (thech >= 0) { + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); + } + } + int zvec = (bsBuffShadow >> (bsLiveShadow - zn)) + & ((1 << zn) - 1); + bsLiveShadow -= zn; + + while (zvec > limit_zt[zn]) { + zn++; + while (bsLiveShadow < 1) { + final int thech = inShadow.read(); + if (thech >= 0) { + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); + } + } + bsLiveShadow--; + zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); + } + nextSym = perm_zt[zvec - base_zt[zn]]; + } + } + + this.last = lastShadow; + this.bsLive = bsLiveShadow; + this.bsBuff = bsBuffShadow; + } + + private int getAndMoveToFrontDecode0(final int groupNo) throws IOException { + final InputStream inShadow = this.in; + final Data dataShadow = this.data; + final int zt = dataShadow.selector[groupNo] & 0xff; + final int[] limit_zt = dataShadow.limit[zt]; + int zn = dataShadow.minLens[zt]; + int zvec = bsR(zn); + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + while (zvec > limit_zt[zn]) { + zn++; + while (bsLiveShadow < 1) { + final int thech = inShadow.read(); + + if (thech >= 0) { + bsBuffShadow = (bsBuffShadow << 8) | thech; + bsLiveShadow += 8; + continue; + } else { + throw new IOException("unexpected end of stream"); + } + } + bsLiveShadow--; + zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1); + } + + this.bsLive = bsLiveShadow; + this.bsBuff = bsBuffShadow; + + return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]]; + } + + private void setupBlock() throws IOException { + if (this.data == null) { + return; + } + + final int[] cftab = this.data.cftab; + final int[] tt = this.data.initTT(this.last + 1); + final byte[] ll8 = this.data.ll8; + cftab[0] = 0; + System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); + + for (int i = 1, c = cftab[0]; i <= 256; i++) { + c += cftab[i]; + cftab[i] = c; + } + + for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { + tt[cftab[ll8[i] & 0xff]++] = i; + } + + if ((this.origPtr < 0) || (this.origPtr >= tt.length)) { + throw new IOException("stream corrupted"); + } + + this.su_tPos = tt[this.origPtr]; + this.su_count = 0; + this.su_i2 = 0; + this.su_ch2 = 256; /* not a char and not EOF */ + + if (this.blockRandomised) { + this.su_rNToGo = 0; + this.su_rTPos = 0; + setupRandPartA(); + } else { + setupNoRandPartA(); + } + } + + private void setupRandPartA() throws IOException { + if (this.su_i2 <= this.last) { + this.su_chPrev = this.su_ch2; + int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; + this.su_tPos = this.data.tt[this.su_tPos]; + if (this.su_rNToGo == 0) { + this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1; + if (++this.su_rTPos == 512) { + this.su_rTPos = 0; + } + } else { + this.su_rNToGo--; + } + this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0; + this.su_i2++; + this.currentChar = su_ch2Shadow; + this.currentState = RAND_PART_B_STATE; + this.crc.updateCRC(su_ch2Shadow); + } else { + endBlock(); + initBlock(); + setupBlock(); + } + } + + private void setupNoRandPartA() throws IOException { + if (this.su_i2 <= this.last) { + this.su_chPrev = this.su_ch2; + int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; + this.su_ch2 = su_ch2Shadow; + this.su_tPos = this.data.tt[this.su_tPos]; + this.su_i2++; + this.currentChar = su_ch2Shadow; + this.currentState = NO_RAND_PART_B_STATE; + this.crc.updateCRC(su_ch2Shadow); + } else { + this.currentState = NO_RAND_PART_A_STATE; + endBlock(); + initBlock(); + setupBlock(); + } + } + + private void setupRandPartB() throws IOException { + if (this.su_ch2 != this.su_chPrev) { + this.currentState = RAND_PART_A_STATE; + this.su_count = 1; + setupRandPartA(); + } else if (++this.su_count >= 4) { + this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); + this.su_tPos = this.data.tt[this.su_tPos]; + if (this.su_rNToGo == 0) { + this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1; + if (++this.su_rTPos == 512) { + this.su_rTPos = 0; + } + } else { + this.su_rNToGo--; + } + this.su_j2 = 0; + this.currentState = RAND_PART_C_STATE; + if (this.su_rNToGo == 1) { + this.su_z ^= 1; + } + setupRandPartC(); + } else { + this.currentState = RAND_PART_A_STATE; + setupRandPartA(); + } + } + + private void setupRandPartC() throws IOException { + if (this.su_j2 < this.su_z) { + this.currentChar = this.su_ch2; + this.crc.updateCRC(this.su_ch2); + this.su_j2++; + } else { + this.currentState = RAND_PART_A_STATE; + this.su_i2++; + this.su_count = 0; + setupRandPartA(); + } + } + + private void setupNoRandPartB() throws IOException { + if (this.su_ch2 != this.su_chPrev) { + this.su_count = 1; + setupNoRandPartA(); + } else if (++this.su_count >= 4) { + this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); + this.su_tPos = this.data.tt[this.su_tPos]; + this.su_j2 = 0; + setupNoRandPartC(); + } else { + setupNoRandPartA(); + } + } + + private void setupNoRandPartC() throws IOException { + if (this.su_j2 < this.su_z) { + int su_ch2Shadow = this.su_ch2; + this.currentChar = su_ch2Shadow; + this.crc.updateCRC(su_ch2Shadow); + this.su_j2++; + this.currentState = NO_RAND_PART_C_STATE; + } else { + this.su_i2++; + this.su_count = 0; + setupNoRandPartA(); + } + } + + private static final class Data extends Object { + + // (with blockSize 900k) + final boolean[] inUse = new boolean[256]; // 256 byte + + final byte[] seqToUnseq = new byte[256]; // 256 byte + final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte + final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte + + /** + * Freq table collected to save a pass over the data during + * decompression. + */ + final int[] unzftab = new int[256]; // 1024 byte + + final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte + final int[] minLens = new int[N_GROUPS]; // 24 byte + + final int[] cftab = new int[257]; // 1028 byte + final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte + final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 + // byte + final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte + // --------------- + // 60798 byte + + int[] tt; // 3600000 byte + byte[] ll8; // 900000 byte + + // --------------- + // 4560782 byte + // =============== + + Data(int blockSize100k) { + super(); + + this.ll8 = new byte[blockSize100k * BZip2Constants.baseBlockSize]; + } + + /** + * Initializes the {@link #tt} array. + * + * This method is called when the required length of the array is known. + * I don't initialize it at construction time to avoid unneccessary + * memory allocation when compressing small files. + */ + final int[] initTT(int length) { + int[] ttShadow = this.tt; + + // tt.length should always be >= length, but theoretically + // it can happen, if the compressor mixed small and large + // blocks. Normally only the last block will be smaller + // than others. + if ((ttShadow == null) || (ttShadow.length < length)) { + this.tt = ttShadow = new int[length]; + } + + return ttShadow; + } + + } +} diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java b/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java new file mode 100644 index 00000000000..392cf2c521d --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java @@ -0,0 +1,2081 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* + * This package is based on the work done by Keiron Liddle, Aftex Software + * to whom the Ant project is very grateful for his + * great code. + */ + +package org.apache.hadoop.io.compress.bzip2; + +import java.io.OutputStream; +import java.io.IOException; + +/** + * An output stream that compresses into the BZip2 format (without the file + * header chars) into another stream. + * + *

+ * The compression requires large amounts of memory. Thus you should call the + * {@link #close() close()} method as soon as possible, to force + * CBZip2OutputStream to release the allocated memory. + *

+ * + *

+ * You can shrink the amount of allocated memory and maybe raise the compression + * speed by choosing a lower blocksize, which in turn may cause a lower + * compression ratio. You can avoid unnecessary memory allocation by avoiding + * using a blocksize which is bigger than the size of the input. + *

+ * + *

+ * You can compute the memory usage for compressing by the following formula: + *

+ * + *
+ * <code>400k + (9 * blocksize)</code>.
+ * 
+ * + *

+ * To get the memory required for decompression by {@link CBZip2InputStream + * CBZip2InputStream} use + *

+ * + *
+ * <code>65k + (5 * blocksize)</code>.
+ * 
+ * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Memory usage by blocksize
Blocksize Compression
+ * memory usage
Decompression
+ * memory usage
100k1300k565k
200k2200k1065k
300k3100k1565k
400k4000k2065k
500k4900k2565k
600k5800k3065k
700k6700k3565k
800k7600k4065k
900k8500k4565k
+ * + *

+ * For decompression CBZip2InputStream allocates less memory if the + * bzipped input is smaller than one block. + *

+ * + *

+ * Instances of this class are not threadsafe. + *

+ * + *

+ * TODO: Update to BZip2 1.0.1 + *

+ * + */ +public class CBZip2OutputStream extends OutputStream implements BZip2Constants { + + /** + * The minimum supported blocksize == 1. + */ + public static final int MIN_BLOCKSIZE = 1; + + /** + * The maximum supported blocksize == 9. + */ + public static final int MAX_BLOCKSIZE = 9; + + /** + * This constant is accessible by subclasses for historical purposes. If you + * don't know what it means then you don't need it. + */ + protected static final int SETMASK = (1 << 21); + + /** + * This constant is accessible by subclasses for historical purposes. If you + * don't know what it means then you don't need it. + */ + protected static final int CLEARMASK = (~SETMASK); + + /** + * This constant is accessible by subclasses for historical purposes. If you + * don't know what it means then you don't need it. + */ + protected static final int GREATER_ICOST = 15; + + /** + * This constant is accessible by subclasses for historical purposes. If you + * don't know what it means then you don't need it. + */ + protected static final int LESSER_ICOST = 0; + + /** + * This constant is accessible by subclasses for historical purposes. If you + * don't know what it means then you don't need it. + */ + protected static final int SMALL_THRESH = 20; + + /** + * This constant is accessible by subclasses for historical purposes. If you + * don't know what it means then you don't need it. + */ + protected static final int DEPTH_THRESH = 10; + + /** + * This constant is accessible by subclasses for historical purposes. If you + * don't know what it means then you don't need it. + */ + protected static final int WORK_FACTOR = 30; + + /** + * This constant is accessible by subclasses for historical purposes. If you + * don't know what it means then you don't need it. + *

+ * If you are ever unlucky/improbable enough to get a stack overflow whilst + * sorting, increase the following constant and try again. In practice I + * have never seen the stack go above 27 elems, so the following limit seems + * very generous. + *

+ */ + protected static final int QSORT_STACK_SIZE = 1000; + + /** + * Knuth's increments seem to work better than Incerpi-Sedgewick here. + * Possibly because the number of elems to sort is usually small, typically + * <= 20. + */ + private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, 797161, 2391484 }; + + /** + * This method is accessible by subclasses for historical purposes. If you + * don't know what it does then you don't need it. + */ + protected static void hbMakeCodeLengths(char[] len, int[] freq, + int alphaSize, int maxLen) { + /* + * Nodes and heap entries run from 1. Entry 0 for both the heap and + * nodes is a sentinel. + */ + final int[] heap = new int[MAX_ALPHA_SIZE * 2]; + final int[] weight = new int[MAX_ALPHA_SIZE * 2]; + final int[] parent = new int[MAX_ALPHA_SIZE * 2]; + + for (int i = alphaSize; --i >= 0;) { + weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + } + + for (boolean tooLong = true; tooLong;) { + tooLong = false; + + int nNodes = alphaSize; + int nHeap = 0; + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (int i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + + int zz = nHeap; + int tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } + + // assert (nHeap < (MAX_ALPHA_SIZE + 2)) : nHeap; + + while (nHeap > 1) { + int n1 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + int yy = 0; + int zz = 1; + int tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + + int n2 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + yy = 0; + zz = 1; + tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + nNodes++; + parent[n1] = parent[n2] = nNodes; + + final int weight_n1 = weight[n1]; + final int weight_n2 = weight[n2]; + weight[nNodes] = (((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00)) | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff) + : (weight_n2 & 0x000000ff)))); + + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + + tmp = 0; + zz = nHeap; + tmp = heap[zz]; + final int weight_tmp = weight[tmp]; + while (weight_tmp < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + + } + + // assert (nNodes < (MAX_ALPHA_SIZE * 2)) : nNodes; + + for (int i = 1; i <= alphaSize; i++) { + int j = 0; + int k = i; + + for (int parent_k; (parent_k = parent[k]) >= 0;) { + k = parent_k; + j++; + } + + len[i - 1] = (char) j; + if (j > maxLen) { + tooLong = true; + } + } + + if (tooLong) { + for (int i = 1; i < alphaSize; i++) { + int j = weight[i] >> 8; + j = 1 + (j >> 1); + weight[i] = j << 8; + } + } + } + } + + private static void hbMakeCodeLengths(final byte[] len, final int[] freq, + final Data dat, final int alphaSize, final int maxLen) { + /* + * Nodes and heap entries run from 1. Entry 0 for both the heap and + * nodes is a sentinel. + */ + final int[] heap = dat.heap; + final int[] weight = dat.weight; + final int[] parent = dat.parent; + + for (int i = alphaSize; --i >= 0;) { + weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + } + + for (boolean tooLong = true; tooLong;) { + tooLong = false; + + int nNodes = alphaSize; + int nHeap = 0; + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (int i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + + int zz = nHeap; + int tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } + + while (nHeap > 1) { + int n1 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + int yy = 0; + int zz = 1; + int tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + + int n2 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + yy = 0; + zz = 1; + tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + nNodes++; + parent[n1] = parent[n2] = nNodes; + + final int weight_n1 = weight[n1]; + final int weight_n2 = weight[n2]; + weight[nNodes] = ((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00)) + | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff) + : (weight_n2 & 0x000000ff))); + + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + + tmp = 0; + zz = nHeap; + tmp = heap[zz]; + final int weight_tmp = weight[tmp]; + while (weight_tmp < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + + } + + for (int i = 1; i <= alphaSize; i++) { + int j = 0; + int k = i; + + for (int parent_k; (parent_k = parent[k]) >= 0;) { + k = parent_k; + j++; + } + + len[i - 1] = (byte) j; + if (j > maxLen) { + tooLong = true; + } + } + + if (tooLong) { + for (int i = 1; i < alphaSize; i++) { + int j = weight[i] >> 8; + j = 1 + (j >> 1); + weight[i] = j << 8; + } + } + } + } + + /** + * Index of the last char in the block, so the block size == last + 1. + */ + private int last; + + /** + * Index in fmap[] of original string after sorting. + */ + private int origPtr; + + /** + * Always: in the range 0 .. 9. The current block size is 100000 * this + * number. + */ + private final int blockSize100k; + + private boolean blockRandomised; + + private int bsBuff; + private int bsLive; + private final CRC crc = new CRC(); + + private int nInUse; + + private int nMTF; + + /* + * Used when sorting. If too many long comparisons happen, we stop sorting, + * randomise the block slightly, and try again. + */ + private int workDone; + private int workLimit; + private boolean firstAttempt; + + private int currentChar = -1; + private int runLength = 0; + + private int blockCRC; + private int combinedCRC; + private int allowableBlockSize; + + /** + * All memory intensive stuff. + */ + private CBZip2OutputStream.Data data; + + private OutputStream out; + + /** + * Chooses a blocksize based on the given length of the data to compress. + * + * @return The blocksize, between {@link #MIN_BLOCKSIZE} and + * {@link #MAX_BLOCKSIZE} both inclusive. For a negative + * inputLength this method returns MAX_BLOCKSIZE + * always. + * + * @param inputLength + * The length of the data which will be compressed by + * CBZip2OutputStream. + */ + public static int chooseBlockSize(long inputLength) { + return (inputLength > 0) ? (int) Math + .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; + } + + /** + * Constructs a new CBZip2OutputStream with a blocksize of 900k. + * + *

+ * Attention: The caller is resonsible to write the two BZip2 magic + * bytes "BZ" to the specified stream prior to calling this + * constructor. + *

+ * + * @param out * + * the destination stream. + * + * @throws IOException + * if an I/O error occurs in the specified stream. + * @throws NullPointerException + * if out == null. + */ + public CBZip2OutputStream(final OutputStream out) throws IOException { + this(out, MAX_BLOCKSIZE); + } + + /** + * Constructs a new CBZip2OutputStream with specified blocksize. + * + *

+ * Attention: The caller is resonsible to write the two BZip2 magic + * bytes "BZ" to the specified stream prior to calling this + * constructor. + *

+ * + * + * @param out + * the destination stream. + * @param blockSize + * the blockSize as 100k units. + * + * @throws IOException + * if an I/O error occurs in the specified stream. + * @throws IllegalArgumentException + * if (blockSize < 1) || (blockSize > 9). + * @throws NullPointerException + * if out == null. + * + * @see #MIN_BLOCKSIZE + * @see #MAX_BLOCKSIZE + */ + public CBZip2OutputStream(final OutputStream out, final int blockSize) + throws IOException { + super(); + + if (blockSize < 1) { + throw new IllegalArgumentException("blockSize(" + blockSize + + ") < 1"); + } + if (blockSize > 9) { + throw new IllegalArgumentException("blockSize(" + blockSize + + ") > 9"); + } + + this.blockSize100k = blockSize; + this.out = out; + init(); + } + + public void write(final int b) throws IOException { + if (this.out != null) { + write0(b); + } else { + throw new IOException("closed"); + } + } + + private void writeRun() throws IOException { + final int lastShadow = this.last; + + if (lastShadow < this.allowableBlockSize) { + final int currentCharShadow = this.currentChar; + final Data dataShadow = this.data; + dataShadow.inUse[currentCharShadow] = true; + final byte ch = (byte) currentCharShadow; + + int runLengthShadow = this.runLength; + this.crc.updateCRC(currentCharShadow, runLengthShadow); + + switch (runLengthShadow) { + case 1: + dataShadow.block[lastShadow + 2] = ch; + this.last = lastShadow + 1; + break; + + case 2: + dataShadow.block[lastShadow + 2] = ch; + dataShadow.block[lastShadow + 3] = ch; + this.last = lastShadow + 2; + break; + + case 3: { + final byte[] block = dataShadow.block; + block[lastShadow + 2] = ch; + block[lastShadow + 3] = ch; + block[lastShadow + 4] = ch; + this.last = lastShadow + 3; + } + break; + + default: { + runLengthShadow -= 4; + dataShadow.inUse[runLengthShadow] = true; + final byte[] block = dataShadow.block; + block[lastShadow + 2] = ch; + block[lastShadow + 3] = ch; + block[lastShadow + 4] = ch; + block[lastShadow + 5] = ch; + block[lastShadow + 6] = (byte) runLengthShadow; + this.last = lastShadow + 5; + } + break; + + } + } else { + endBlock(); + initBlock(); + writeRun(); + } + } + + /** + * Overriden to close the stream. + */ + protected void finalize() throws Throwable { + finish(); + super.finalize(); + } + + + public void finish() throws IOException { + if (out != null) { + try { + if (this.runLength > 0) { + writeRun(); + } + this.currentChar = -1; + endBlock(); + endCompression(); + } finally { + this.out = null; + this.data = null; + } + } + } + + public void close() throws IOException { + if (out != null) { + OutputStream outShadow = this.out; + finish(); + outShadow.close(); + } + } + + public void flush() throws IOException { + OutputStream outShadow = this.out; + if (outShadow != null) { + outShadow.flush(); + } + } + + private void init() throws IOException { + // write magic: done by caller who created this stream + // this.out.write('B'); + // this.out.write('Z'); + + this.data = new Data(this.blockSize100k); + + /* + * Write `magic' bytes h indicating file-format == huffmanised, followed + * by a digit indicating blockSize100k. + */ + bsPutUByte('h'); + bsPutUByte('0' + this.blockSize100k); + + this.combinedCRC = 0; + initBlock(); + } + + private void initBlock() { + // blockNo++; + this.crc.initialiseCRC(); + this.last = -1; + // ch = 0; + + boolean[] inUse = this.data.inUse; + for (int i = 256; --i >= 0;) { + inUse[i] = false; + } + + /* 20 is just a paranoia constant */ + this.allowableBlockSize = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20; + } + + private void endBlock() throws IOException { + this.blockCRC = this.crc.getFinalCRC(); + this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); + this.combinedCRC ^= this.blockCRC; + + // empty block at end of file + if (this.last == -1) { + return; + } + + /* sort the block and establish posn of original string */ + blockSort(); + + /* + * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 + * :-). A 32 bit value does not really give a strong enough guarantee + * that the value will not appear by chance in the compressed + * datastream. Worst-case probability of this event, for a 900k block, + * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 + * bits. For a compressed file of size 100Gb -- about 100000 blocks -- + * only a 48-bit marker will do. NB: normal compression/ decompression + * donot rely on these statistical properties. They are only important + * when trying to recover blocks from damaged files. + */ + bsPutUByte(0x31); + bsPutUByte(0x41); + bsPutUByte(0x59); + bsPutUByte(0x26); + bsPutUByte(0x53); + bsPutUByte(0x59); + + /* Now the block's CRC, so it is in a known place. */ + bsPutInt(this.blockCRC); + + /* Now a single bit indicating randomisation. */ + if (this.blockRandomised) { + bsW(1, 1); + } else { + bsW(1, 0); + } + + /* Finally, block's contents proper. */ + moveToFrontCodeAndSend(); + } + + private void endCompression() throws IOException { + /* + * Now another magic 48-bit number, 0x177245385090, to indicate the end + * of the last block. (sqrt(pi), if you want to know. I did want to use + * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me + * to feel statistically comfortable. Call me paranoid.) + */ + bsPutUByte(0x17); + bsPutUByte(0x72); + bsPutUByte(0x45); + bsPutUByte(0x38); + bsPutUByte(0x50); + bsPutUByte(0x90); + + bsPutInt(this.combinedCRC); + bsFinishedWithStream(); + } + + /** + * Returns the blocksize parameter specified at construction time. + */ + public final int getBlockSize() { + return this.blockSize100k; + } + + public void write(final byte[] buf, int offs, final int len) + throws IOException { + if (offs < 0) { + throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); + } + if (len < 0) { + throw new IndexOutOfBoundsException("len(" + len + ") < 0."); + } + if (offs + len > buf.length) { + throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + + len + ") > buf.length(" + buf.length + ")."); + } + if (this.out == null) { + throw new IOException("stream closed"); + } + + for (int hi = offs + len; offs < hi;) { + write0(buf[offs++]); + } + } + + private void write0(int b) throws IOException { + if (this.currentChar != -1) { + b &= 0xff; + if (this.currentChar == b) { + if (++this.runLength > 254) { + writeRun(); + this.currentChar = -1; + this.runLength = 0; + } + // else nothing to do + } else { + writeRun(); + this.runLength = 1; + this.currentChar = b; + } + } else { + this.currentChar = b & 0xff; + this.runLength++; + } + } + + private static void hbAssignCodes(final int[] code, final byte[] length, + final int minLen, final int maxLen, final int alphaSize) { + int vec = 0; + for (int n = minLen; n <= maxLen; n++) { + for (int i = 0; i < alphaSize; i++) { + if ((length[i] & 0xff) == n) { + code[i] = vec; + vec++; + } + } + vec <<= 1; + } + } + + private void bsFinishedWithStream() throws IOException { + while (this.bsLive > 0) { + int ch = this.bsBuff >> 24; + this.out.write(ch); // write 8-bit + this.bsBuff <<= 8; + this.bsLive -= 8; + } + } + + private void bsW(final int n, final int v) throws IOException { + final OutputStream outShadow = this.out; + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + + this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); + this.bsLive = bsLiveShadow + n; + } + + private void bsPutUByte(final int c) throws IOException { + bsW(8, c); + } + + private void bsPutInt(final int u) throws IOException { + bsW(8, (u >> 24) & 0xff); + bsW(8, (u >> 16) & 0xff); + bsW(8, (u >> 8) & 0xff); + bsW(8, u & 0xff); + } + + private void sendMTFValues() throws IOException { + final byte[][] len = this.data.sendMTFValues_len; + final int alphaSize = this.nInUse + 2; + + for (int t = N_GROUPS; --t >= 0;) { + byte[] len_t = len[t]; + for (int v = alphaSize; --v >= 0;) { + len_t[v] = GREATER_ICOST; + } + } + + /* Decide how many coding tables to use */ + // assert (this.nMTF > 0) : this.nMTF; + final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 + : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; + + /* Generate an initial set of coding tables */ + sendMTFValues0(nGroups, alphaSize); + + /* + * Iterate up to N_ITERS times to improve the tables. + */ + final int nSelectors = sendMTFValues1(nGroups, alphaSize); + + /* Compute MTF values for the selectors. */ + sendMTFValues2(nGroups, nSelectors); + + /* Assign actual codes for the tables. */ + sendMTFValues3(nGroups, alphaSize); + + /* Transmit the mapping table. */ + sendMTFValues4(); + + /* Now the selectors. */ + sendMTFValues5(nGroups, nSelectors); + + /* Now the coding tables. */ + sendMTFValues6(nGroups, alphaSize); + + /* And finally, the block data proper */ + sendMTFValues7(nSelectors); + } + + private void sendMTFValues0(final int nGroups, final int alphaSize) { + final byte[][] len = this.data.sendMTFValues_len; + final int[] mtfFreq = this.data.mtfFreq; + + int remF = this.nMTF; + int gs = 0; + + for (int nPart = nGroups; nPart > 0; nPart--) { + final int tFreq = remF / nPart; + int ge = gs - 1; + int aFreq = 0; + + for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { + aFreq += mtfFreq[++ge]; + } + + if ((ge > gs) && (nPart != nGroups) && (nPart != 1) + && (((nGroups - nPart) & 1) != 0)) { + aFreq -= mtfFreq[ge--]; + } + + final byte[] len_np = len[nPart - 1]; + for (int v = alphaSize; --v >= 0;) { + if ((v >= gs) && (v <= ge)) { + len_np[v] = LESSER_ICOST; + } else { + len_np[v] = GREATER_ICOST; + } + } + + gs = ge + 1; + remF -= aFreq; + } + } + + private int sendMTFValues1(final int nGroups, final int alphaSize) { + final Data dataShadow = this.data; + final int[][] rfreq = dataShadow.sendMTFValues_rfreq; + final int[] fave = dataShadow.sendMTFValues_fave; + final short[] cost = dataShadow.sendMTFValues_cost; + final char[] sfmap = dataShadow.sfmap; + final byte[] selector = dataShadow.selector; + final byte[][] len = dataShadow.sendMTFValues_len; + final byte[] len_0 = len[0]; + final byte[] len_1 = len[1]; + final byte[] len_2 = len[2]; + final byte[] len_3 = len[3]; + final byte[] len_4 = len[4]; + final byte[] len_5 = len[5]; + final int nMTFShadow = this.nMTF; + + int nSelectors = 0; + + for (int iter = 0; iter < N_ITERS; iter++) { + for (int t = nGroups; --t >= 0;) { + fave[t] = 0; + int[] rfreqt = rfreq[t]; + for (int i = alphaSize; --i >= 0;) { + rfreqt[i] = 0; + } + } + + nSelectors = 0; + + for (int gs = 0; gs < this.nMTF;) { + /* Set group start & end marks. */ + + /* + * Calculate the cost of this group as coded by each of the + * coding tables. + */ + + final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); + + if (nGroups == N_GROUPS) { + // unrolled version of the else-block + + short cost0 = 0; + short cost1 = 0; + short cost2 = 0; + short cost3 = 0; + short cost4 = 0; + short cost5 = 0; + + for (int i = gs; i <= ge; i++) { + final int icv = sfmap[i]; + cost0 += len_0[icv] & 0xff; + cost1 += len_1[icv] & 0xff; + cost2 += len_2[icv] & 0xff; + cost3 += len_3[icv] & 0xff; + cost4 += len_4[icv] & 0xff; + cost5 += len_5[icv] & 0xff; + } + + cost[0] = cost0; + cost[1] = cost1; + cost[2] = cost2; + cost[3] = cost3; + cost[4] = cost4; + cost[5] = cost5; + + } else { + for (int t = nGroups; --t >= 0;) { + cost[t] = 0; + } + + for (int i = gs; i <= ge; i++) { + final int icv = sfmap[i]; + for (int t = nGroups; --t >= 0;) { + cost[t] += len[t][icv] & 0xff; + } + } + } + + /* + * Find the coding table which is best for this group, and + * record its identity in the selector table. + */ + int bt = -1; + for (int t = nGroups, bc = 999999999; --t >= 0;) { + final int cost_t = cost[t]; + if (cost_t < bc) { + bc = cost_t; + bt = t; + } + } + + fave[bt]++; + selector[nSelectors] = (byte) bt; + nSelectors++; + + /* + * Increment the symbol frequencies for the selected table. + */ + final int[] rfreq_bt = rfreq[bt]; + for (int i = gs; i <= ge; i++) { + rfreq_bt[sfmap[i]]++; + } + + gs = ge + 1; + } + + /* + * Recompute the tables based on the accumulated frequencies. + */ + for (int t = 0; t < nGroups; t++) { + hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); + } + } + + return nSelectors; + } + + private void sendMTFValues2(final int nGroups, final int nSelectors) { + // assert (nGroups < 8) : nGroups; + + final Data dataShadow = this.data; + byte[] pos = dataShadow.sendMTFValues2_pos; + + for (int i = nGroups; --i >= 0;) { + pos[i] = (byte) i; + } + + for (int i = 0; i < nSelectors; i++) { + final byte ll_i = dataShadow.selector[i]; + byte tmp = pos[0]; + int j = 0; + + while (ll_i != tmp) { + j++; + byte tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + } + + pos[0] = tmp; + dataShadow.selectorMtf[i] = (byte) j; + } + } + + private void sendMTFValues3(final int nGroups, final int alphaSize) { + int[][] code = this.data.sendMTFValues_code; + byte[][] len = this.data.sendMTFValues_len; + + for (int t = 0; t < nGroups; t++) { + int minLen = 32; + int maxLen = 0; + final byte[] len_t = len[t]; + for (int i = alphaSize; --i >= 0;) { + final int l = len_t[i] & 0xff; + if (l > maxLen) { + maxLen = l; + } + if (l < minLen) { + minLen = l; + } + } + + // assert (maxLen <= 20) : maxLen; + // assert (minLen >= 1) : minLen; + + hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); + } + } + + private void sendMTFValues4() throws IOException { + final boolean[] inUse = this.data.inUse; + final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; + + for (int i = 16; --i >= 0;) { + inUse16[i] = false; + final int i16 = i * 16; + for (int j = 16; --j >= 0;) { + if (inUse[i16 + j]) { + inUse16[i] = true; + } + } + } + + for (int i = 0; i < 16; i++) { + bsW(1, inUse16[i] ? 1 : 0); + } + + final OutputStream outShadow = this.out; + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int i = 0; i < 16; i++) { + if (inUse16[i]) { + final int i16 = i * 16; + for (int j = 0; j < 16; j++) { + // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + if (inUse[i16 + j]) { + bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); + } + bsLiveShadow++; + } + } + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues5(final int nGroups, final int nSelectors) + throws IOException { + bsW(3, nGroups); + bsW(15, nSelectors); + + final OutputStream outShadow = this.out; + final byte[] selectorMtf = this.data.selectorMtf; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int i = 0; i < nSelectors; i++) { + for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { + // inlined: bsW(1, 1); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); + bsLiveShadow++; + } + + // inlined: bsW(1, 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); + bsLiveShadow++; + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues6(final int nGroups, final int alphaSize) + throws IOException { + final byte[][] len = this.data.sendMTFValues_len; + final OutputStream outShadow = this.out; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int t = 0; t < nGroups; t++) { + byte[] len_t = len[t]; + int curr = len_t[0] & 0xff; + + // inlined: bsW(5, curr); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= curr << (32 - bsLiveShadow - 5); + bsLiveShadow += 5; + + for (int i = 0; i < alphaSize; i++) { + int lti = len_t[i] & 0xff; + while (curr < lti) { + // inlined: bsW(2, 2); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); + bsLiveShadow += 2; + + curr++; /* 10 */ + } + + while (curr > lti) { + // inlined: bsW(2, 3); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); + bsLiveShadow += 2; + + curr--; /* 11 */ + } + + // inlined: bsW(1, 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); + bsLiveShadow++; + } + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues7(final int nSelectors) throws IOException { + final Data dataShadow = this.data; + final byte[][] len = dataShadow.sendMTFValues_len; + final int[][] code = dataShadow.sendMTFValues_code; + final OutputStream outShadow = this.out; + final byte[] selector = dataShadow.selector; + final char[] sfmap = dataShadow.sfmap; + final int nMTFShadow = this.nMTF; + + int selCtr = 0; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int gs = 0; gs < nMTFShadow;) { + final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); + final int selector_selCtr = selector[selCtr] & 0xff; + final int[] code_selCtr = code[selector_selCtr]; + final byte[] len_selCtr = len[selector_selCtr]; + + while (gs <= ge) { + final int sfmap_i = sfmap[gs]; + + // + // inlined: bsW(len_selCtr[sfmap_i] & 0xff, + // code_selCtr[sfmap_i]); + // + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + final int n = len_selCtr[sfmap_i] & 0xFF; + bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); + bsLiveShadow += n; + + gs++; + } + + gs = ge + 1; + selCtr++; + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void moveToFrontCodeAndSend() throws IOException { + bsW(24, this.origPtr); + generateMTFValues(); + sendMTFValues(); + } + + /** + * This is the most hammered method of this class. + * + *

+ * This is the version using unrolled loops. Normally I never use such ones + * in Java code. The unrolling has shown a noticable performance improvement + * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the + * JIT compiler of the vm. + *

+ */ + private boolean mainSimpleSort(final Data dataShadow, final int lo, + final int hi, final int d) { + final int bigN = hi - lo + 1; + if (bigN < 2) { + return this.firstAttempt && (this.workDone > this.workLimit); + } + + int hp = 0; + while (INCS[hp] < bigN) { + hp++; + } + + final int[] fmap = dataShadow.fmap; + final char[] quadrant = dataShadow.quadrant; + final byte[] block = dataShadow.block; + final int lastShadow = this.last; + final int lastPlus1 = lastShadow + 1; + final boolean firstAttemptShadow = this.firstAttempt; + final int workLimitShadow = this.workLimit; + int workDoneShadow = this.workDone; + + // Following block contains unrolled code which could be shortened by + // coding it in additional loops. + + HP: while (--hp >= 0) { + final int h = INCS[hp]; + final int mj = lo + h - 1; + + for (int i = lo + h; i <= hi;) { + // copy + for (int k = 3; (i <= hi) && (--k >= 0); i++) { + final int v = fmap[i]; + final int vd = v + d; + int j = i; + + // for (int a; + // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd, + // block, quadrant, lastShadow); + // j -= h) { + // fmap[j] = a; + // } + // + // unrolled version: + + // start inline mainGTU + boolean onceRunned = false; + int a = 0; + + HAMMER: while (true) { + if (onceRunned) { + fmap[j] = a; + if ((j -= h) <= mj) { + break HAMMER; + } + } else { + onceRunned = true; + } + + a = fmap[j - h]; + int i1 = a + d; + int i2 = vd; + + // following could be done in a loop, but + // unrolled it for performance: + if (block[i1 + 1] == block[i2 + 1]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (block[i1 + 5] == block[i2 + 5]) { + if (block[(i1 += 6)] == block[(i2 += 6)]) { + int x = lastShadow; + X: while (x > 0) { + x -= 4; + + if (block[i1 + 1] == block[i2 + 1]) { + if (quadrant[i1] == quadrant[i2]) { + if (block[i1 + 2] == block[i2 + 2]) { + if (quadrant[i1 + 1] == quadrant[i2 + 1]) { + if (block[i1 + 3] == block[i2 + 3]) { + if (quadrant[i1 + 2] == quadrant[i2 + 2]) { + if (block[i1 + 4] == block[i2 + 4]) { + if (quadrant[i1 + 3] == quadrant[i2 + 3]) { + if ((i1 += 4) >= lastPlus1) { + i1 -= lastPlus1; + } + if ((i2 += 4) >= lastPlus1) { + i2 -= lastPlus1; + } + workDoneShadow++; + continue X; + } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((quadrant[i1] > quadrant[i2])) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + + } + break HAMMER; + } // while x > 0 + else { + if ((block[i1] & 0xff) > (block[i2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } + } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) { + continue HAMMER; + } else { + break HAMMER; + } + + } // HAMMER + // end inline mainGTU + + fmap[j] = v; + } + + if (firstAttemptShadow && (i <= hi) + && (workDoneShadow > workLimitShadow)) { + break HP; + } + } + } + + this.workDone = workDoneShadow; + return firstAttemptShadow && (workDoneShadow > workLimitShadow); + } + + private static void vswap(int[] fmap, int p1, int p2, int n) { + n += p1; + while (p1 < n) { + int t = fmap[p1]; + fmap[p1++] = fmap[p2]; + fmap[p2++] = t; + } + } + + private static byte med3(byte a, byte b, byte c) { + return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c + : a); + } + + private void blockSort() { + this.workLimit = WORK_FACTOR * this.last; + this.workDone = 0; + this.blockRandomised = false; + this.firstAttempt = true; + mainSort(); + + if (this.firstAttempt && (this.workDone > this.workLimit)) { + randomiseBlock(); + this.workLimit = this.workDone = 0; + this.firstAttempt = false; + mainSort(); + } + + int[] fmap = this.data.fmap; + this.origPtr = -1; + for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { + if (fmap[i] == 0) { + this.origPtr = i; + break; + } + } + + // assert (this.origPtr != -1) : this.origPtr; + } + + /** + * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2 + */ + private void mainQSort3(final Data dataShadow, final int loSt, + final int hiSt, final int dSt) { + final int[] stack_ll = dataShadow.stack_ll; + final int[] stack_hh = dataShadow.stack_hh; + final int[] stack_dd = dataShadow.stack_dd; + final int[] fmap = dataShadow.fmap; + final byte[] block = dataShadow.block; + + stack_ll[0] = loSt; + stack_hh[0] = hiSt; + stack_dd[0] = dSt; + + for (int sp = 1; --sp >= 0;) { + final int lo = stack_ll[sp]; + final int hi = stack_hh[sp]; + final int d = stack_dd[sp]; + + if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) { + if (mainSimpleSort(dataShadow, lo, hi, d)) { + return; + } + } else { + final int d1 = d + 1; + final int med = med3(block[fmap[lo] + d1], + block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff; + + int unLo = lo; + int unHi = hi; + int ltLo = lo; + int gtHi = hi; + + while (true) { + while (unLo <= unHi) { + final int n = ((int) block[fmap[unLo] + d1] & 0xff) + - med; + if (n == 0) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[ltLo]; + fmap[ltLo++] = temp; + } else if (n < 0) { + unLo++; + } else { + break; + } + } + + while (unLo <= unHi) { + final int n = ((int) block[fmap[unHi] + d1] & 0xff) + - med; + if (n == 0) { + final int temp = fmap[unHi]; + fmap[unHi--] = fmap[gtHi]; + fmap[gtHi--] = temp; + } else if (n > 0) { + unHi--; + } else { + break; + } + } + + if (unLo <= unHi) { + final int temp = fmap[unLo]; + fmap[unLo++] = fmap[unHi]; + fmap[unHi--] = temp; + } else { + break; + } + } + + if (gtHi < ltLo) { + stack_ll[sp] = lo; + stack_hh[sp] = hi; + stack_dd[sp] = d1; + sp++; + } else { + int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) + : (unLo - ltLo); + vswap(fmap, lo, unLo - n, n); + int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) + : (gtHi - unHi); + vswap(fmap, unLo, hi - m + 1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + stack_ll[sp] = lo; + stack_hh[sp] = n; + stack_dd[sp] = d; + sp++; + + stack_ll[sp] = n + 1; + stack_hh[sp] = m - 1; + stack_dd[sp] = d1; + sp++; + + stack_ll[sp] = m; + stack_hh[sp] = hi; + stack_dd[sp] = d; + sp++; + } + } + } + } + + private void mainSort() { + final Data dataShadow = this.data; + final int[] runningOrder = dataShadow.mainSort_runningOrder; + final int[] copy = dataShadow.mainSort_copy; + final boolean[] bigDone = dataShadow.mainSort_bigDone; + final int[] ftab = dataShadow.ftab; + final byte[] block = dataShadow.block; + final int[] fmap = dataShadow.fmap; + final char[] quadrant = dataShadow.quadrant; + final int lastShadow = this.last; + final int workLimitShadow = this.workLimit; + final boolean firstAttemptShadow = this.firstAttempt; + + // Set up the 2-byte frequency table + for (int i = 65537; --i >= 0;) { + ftab[i] = 0; + } + + /* + * In the various block-sized structures, live data runs from 0 to + * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area + * for block. + */ + for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) { + block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1]; + } + for (int i = lastShadow + NUM_OVERSHOOT_BYTES +1; --i >= 0;) { + quadrant[i] = 0; + } + block[0] = block[lastShadow + 1]; + + // Complete the initial radix sort: + + int c1 = block[0] & 0xff; + for (int i = 0; i <= lastShadow; i++) { + final int c2 = block[i + 1] & 0xff; + ftab[(c1 << 8) + c2]++; + c1 = c2; + } + + for (int i = 1; i <= 65536; i++) + ftab[i] += ftab[i - 1]; + + c1 = block[1] & 0xff; + for (int i = 0; i < lastShadow; i++) { + final int c2 = block[i + 2] & 0xff; + fmap[--ftab[(c1 << 8) + c2]] = i; + c1 = c2; + } + + fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow; + + /* + * Now ftab contains the first loc of every small bucket. Calculate the + * running order, from smallest to largest big bucket. + */ + for (int i = 256; --i >= 0;) { + bigDone[i] = false; + runningOrder[i] = i; + } + + for (int h = 364; h != 1;) { + h /= 3; + for (int i = h; i <= 255; i++) { + final int vv = runningOrder[i]; + final int a = ftab[(vv + 1) << 8] - ftab[vv << 8]; + final int b = h - 1; + int j = i; + for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j + - h]) { + runningOrder[j] = ro; + j -= h; + if (j <= b) { + break; + } + } + runningOrder[j] = vv; + } + } + + /* + * The main sorting loop. + */ + for (int i = 0; i <= 255; i++) { + /* + * Process big buckets, starting with the least full. + */ + final int ss = runningOrder[i]; + + // Step 1: + /* + * Complete the big bucket [ss] by quicksorting any unsorted small + * buckets [ss, j]. Hopefully previous pointer-scanning phases have + * already completed many of the small buckets [ss, j], so we don't + * have to sort them at all. + */ + for (int j = 0; j <= 255; j++) { + final int sb = (ss << 8) + j; + final int ftab_sb = ftab[sb]; + if ((ftab_sb & SETMASK) != SETMASK) { + final int lo = ftab_sb & CLEARMASK; + final int hi = (ftab[sb + 1] & CLEARMASK) - 1; + if (hi > lo) { + mainQSort3(dataShadow, lo, hi, 2); + if (firstAttemptShadow + && (this.workDone > workLimitShadow)) { + return; + } + } + ftab[sb] = ftab_sb | SETMASK; + } + } + + // Step 2: + // Now scan this big bucket so as to synthesise the + // sorted order for small buckets [t, ss] for all t != ss. + + for (int j = 0; j <= 255; j++) { + copy[j] = ftab[(j << 8) + ss] & CLEARMASK; + } + + for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) { + final int fmap_j = fmap[j]; + c1 = block[fmap_j] & 0xff; + if (!bigDone[c1]) { + fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1); + copy[c1]++; + } + } + + for (int j = 256; --j >= 0;) + ftab[(j << 8) + ss] |= SETMASK; + + // Step 3: + /* + * The ss big bucket is now done. Record this fact, and update the + * quadrant descriptors. Remember to update quadrants in the + * overshoot area too, if necessary. The "if (i < 255)" test merely + * skips this updating for the last bucket processed, since updating + * for the last bucket is pointless. + */ + bigDone[ss] = true; + + if (i < 255) { + final int bbStart = ftab[ss << 8] & CLEARMASK; + final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart; + int shifts = 0; + + while ((bbSize >> shifts) > 65534) { + shifts++; + } + + for (int j = 0; j < bbSize; j++) { + final int a2update = fmap[bbStart + j]; + final char qVal = (char) (j >> shifts); + quadrant[a2update] = qVal; + if (a2update < NUM_OVERSHOOT_BYTES) { + quadrant[a2update + lastShadow + 1] = qVal; + } + } + } + + } + } + + private void randomiseBlock() { + final boolean[] inUse = this.data.inUse; + final byte[] block = this.data.block; + final int lastShadow = this.last; + + for (int i = 256; --i >= 0;) + inUse[i] = false; + + int rNToGo = 0; + int rTPos = 0; + for (int i = 0, j = 1; i <= lastShadow; i = j, j++) { + if (rNToGo == 0) { + rNToGo = (char) BZip2Constants.rNums[rTPos]; + if (++rTPos == 512) { + rTPos = 0; + } + } + + rNToGo--; + block[j] ^= ((rNToGo == 1) ? 1 : 0); + + // handle 16 bit signed numbers + inUse[block[j] & 0xff] = true; + } + + this.blockRandomised = true; + } + + private void generateMTFValues() { + final int lastShadow = this.last; + final Data dataShadow = this.data; + final boolean[] inUse = dataShadow.inUse; + final byte[] block = dataShadow.block; + final int[] fmap = dataShadow.fmap; + final char[] sfmap = dataShadow.sfmap; + final int[] mtfFreq = dataShadow.mtfFreq; + final byte[] unseqToSeq = dataShadow.unseqToSeq; + final byte[] yy = dataShadow.generateMTFValues_yy; + + // make maps + int nInUseShadow = 0; + for (int i = 0; i < 256; i++) { + if (inUse[i]) { + unseqToSeq[i] = (byte) nInUseShadow; + nInUseShadow++; + } + } + this.nInUse = nInUseShadow; + + final int eob = nInUseShadow + 1; + + for (int i = eob; i >= 0; i--) { + mtfFreq[i] = 0; + } + + for (int i = nInUseShadow; --i >= 0;) { + yy[i] = (byte) i; + } + + int wr = 0; + int zPend = 0; + + for (int i = 0; i <= lastShadow; i++) { + final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; + byte tmp = yy[0]; + int j = 0; + + while (ll_i != tmp) { + j++; + byte tmp2 = tmp; + tmp = yy[j]; + yy[j] = tmp2; + } + yy[0] = tmp; + + if (j == 0) { + zPend++; + } else { + if (zPend > 0) { + zPend--; + while (true) { + if ((zPend & 1) == 0) { + sfmap[wr] = RUNA; + wr++; + mtfFreq[RUNA]++; + } else { + sfmap[wr] = RUNB; + wr++; + mtfFreq[RUNB]++; + } + + if (zPend >= 2) { + zPend = (zPend - 2) >> 1; + } else { + break; + } + } + zPend = 0; + } + sfmap[wr] = (char) (j + 1); + wr++; + mtfFreq[j + 1]++; + } + } + + if (zPend > 0) { + zPend--; + while (true) { + if ((zPend & 1) == 0) { + sfmap[wr] = RUNA; + wr++; + mtfFreq[RUNA]++; + } else { + sfmap[wr] = RUNB; + wr++; + mtfFreq[RUNB]++; + } + + if (zPend >= 2) { + zPend = (zPend - 2) >> 1; + } else { + break; + } + } + } + + sfmap[wr] = (char) eob; + mtfFreq[eob]++; + this.nMTF = wr + 1; + } + + private static final class Data extends Object { + + // with blockSize 900k + final boolean[] inUse = new boolean[256]; // 256 byte + final byte[] unseqToSeq = new byte[256]; // 256 byte + final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte + final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte + final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte + + final byte[] generateMTFValues_yy = new byte[256]; // 256 byte + final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 + // byte + final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 + // byte + final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte + final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte + final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 + // byte + final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte + final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte + + final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte + final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte + final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte + + final int[] mainSort_runningOrder = new int[256]; // 1024 byte + final int[] mainSort_copy = new int[256]; // 1024 byte + final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte + + final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte + final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte + final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte + + final int[] ftab = new int[65537]; // 262148 byte + // ------------ + // 333408 byte + + final byte[] block; // 900021 byte + final int[] fmap; // 3600000 byte + final char[] sfmap; // 3600000 byte + // ------------ + // 8433529 byte + // ============ + + /** + * Array instance identical to sfmap, both are used only temporarily and + * indepently, so we do not need to allocate additional memory. + */ + final char[] quadrant; + + Data(int blockSize100k) { + super(); + + final int n = blockSize100k * BZip2Constants.baseBlockSize; + this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; + this.fmap = new int[n]; + this.sfmap = new char[2 * n]; + this.quadrant = this.sfmap; + } + + } + +} diff --git a/src/java/org/apache/hadoop/io/compress/bzip2/CRC.java b/src/java/org/apache/hadoop/io/compress/bzip2/CRC.java new file mode 100644 index 00000000000..a9eaf205804 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/bzip2/CRC.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* + * This package is based on the work done by Keiron Liddle, Aftex Software + * to whom the Ant project is very grateful for his + * great code. + */ + +package org.apache.hadoop.io.compress.bzip2; + +/** + * A simple class the hold and calculate the CRC for sanity checking of the + * data. + * + */ +final class CRC { + static final int crc32Table[] = { 0x00000000, 0x04c11db7, 0x09823b6e, + 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, + 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, + 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7, + 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, + 0x52568b75, 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, + 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0, + 0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, + 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, + 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, + 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, + 0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, + 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, + 0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, + 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, 0x128e9dcf, + 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, + 0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, + 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, + 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, + 0x495a2dd4, 0x44190b0d, 0x40d816ba, 0xaca5c697, 0xa864db20, + 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, + 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, + 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7, + 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, + 0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, + 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, + 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, + 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1, + 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, + 0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, + 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, 0x0315d626, + 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, + 0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, + 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, + 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, + 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, + 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, + 0xa379dd7b, 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, + 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099, + 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, + 0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, + 0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, + 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, + 0x065e2082, 0x0b1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6, + 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, + 0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, + 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, + 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, + 0xf9278673, 0xfde69bc4, 0x89b8fd09, 0x8d79e0be, 0x803ac667, + 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, + 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, + 0xb8757bda, 0xb5365d03, 0xb1f740b4 }; + + CRC() { + initialiseCRC(); + } + + void initialiseCRC() { + globalCrc = 0xffffffff; + } + + int getFinalCRC() { + return ~globalCrc; + } + + int getGlobalCRC() { + return globalCrc; + } + + void setGlobalCRC(int newCrc) { + globalCrc = newCrc; + } + + void updateCRC(int inCh) { + int temp = (globalCrc >> 24) ^ inCh; + if (temp < 0) { + temp = 256 + temp; + } + globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp]; + } + + void updateCRC(int inCh, int repeat) { + int globalCrcShadow = this.globalCrc; + while (repeat-- > 0) { + int temp = (globalCrcShadow >> 24) ^ inCh; + globalCrcShadow = (globalCrcShadow << 8) + ^ crc32Table[(temp >= 0) ? temp : (temp + 256)]; + } + this.globalCrc = globalCrcShadow; + } + + int globalCrc; +} diff --git a/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java b/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java new file mode 100644 index 00000000000..f27e831a58b --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress.zlib; + +import java.io.IOException; +import java.util.zip.Deflater; + +import org.apache.hadoop.io.compress.Compressor; + +/** + * A wrapper around java.util.zip.Deflater to make it conform + * to org.apache.hadoop.io.compress.Compressor interface. + * + */ +public class BuiltInZlibDeflater extends Deflater implements Compressor { + + public BuiltInZlibDeflater(int level, boolean nowrap) { + super(level, nowrap); + } + + public BuiltInZlibDeflater(int level) { + super(level); + } + + public BuiltInZlibDeflater() { + super(); + } + + public synchronized int compress(byte[] b, int off, int len) + throws IOException { + return super.deflate(b, off, len); + } +} diff --git a/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibInflater.java b/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibInflater.java new file mode 100644 index 00000000000..0223587ad01 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibInflater.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress.zlib; + +import java.io.IOException; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; + +import org.apache.hadoop.io.compress.Decompressor; + +/** + * A wrapper around java.util.zip.Inflater to make it conform + * to org.apache.hadoop.io.compress.Decompressor interface. + * + */ +public class BuiltInZlibInflater extends Inflater implements Decompressor { + + public BuiltInZlibInflater(boolean nowrap) { + super(nowrap); + } + + public BuiltInZlibInflater() { + super(); + } + + public synchronized int decompress(byte[] b, int off, int len) + throws IOException { + try { + return super.inflate(b, off, len); + } catch (DataFormatException dfe) { + throw new IOException(dfe.getMessage()); + } + } +} diff --git a/src/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java b/src/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java new file mode 100644 index 00000000000..754af216ad2 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/zlib/ZlibCompressor.java @@ -0,0 +1,378 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress.zlib; + +import java.io.IOException; +import java.nio.Buffer; +import java.nio.ByteBuffer; + +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.util.NativeCodeLoader; + +/** + * A {@link Compressor} based on the popular + * zlib compression algorithm. + * http://www.zlib.net/ + * + */ +public class ZlibCompressor implements Compressor { + private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64*1024; + + // HACK - Use this as a global lock in the JNI layer + private static Class clazz = ZlibCompressor.class; + + private long stream; + private CompressionLevel level; + private CompressionStrategy strategy; + private CompressionHeader windowBits; + private int directBufferSize; + private byte[] userBuf = null; + private int userBufOff = 0, userBufLen = 0; + private Buffer uncompressedDirectBuf = null; + private int uncompressedDirectBufOff = 0, uncompressedDirectBufLen = 0; + private Buffer compressedDirectBuf = null; + private boolean finish, finished; + + /** + * The compression level for zlib library. + */ + public static enum CompressionLevel { + /** + * Compression level for no compression. + */ + NO_COMPRESSION (0), + + /** + * Compression level for fastest compression. + */ + BEST_SPEED (1), + + /** + * Compression level for best compression. + */ + BEST_COMPRESSION (9), + + /** + * Default compression level. + */ + DEFAULT_COMPRESSION (-1); + + + private final int compressionLevel; + + CompressionLevel(int level) { + compressionLevel = level; + } + + int compressionLevel() { + return compressionLevel; + } + }; + + /** + * The compression level for zlib library. + */ + public static enum CompressionStrategy { + /** + * Compression strategy best used for data consisting mostly of small + * values with a somewhat random distribution. Forces more Huffman coding + * and less string matching. + */ + FILTERED (1), + + /** + * Compression strategy for Huffman coding only. + */ + HUFFMAN_ONLY (2), + + /** + * Compression strategy to limit match distances to one + * (run-length encoding). + */ + RLE (3), + + /** + * Compression strategy to prevent the use of dynamic Huffman codes, + * allowing for a simpler decoder for special applications. + */ + FIXED (4), + + /** + * Default compression strategy. + */ + DEFAULT_STRATEGY (0); + + + private final int compressionStrategy; + + CompressionStrategy(int strategy) { + compressionStrategy = strategy; + } + + int compressionStrategy() { + return compressionStrategy; + } + }; + + /** + * The type of header for compressed data. + */ + public static enum CompressionHeader { + /** + * No headers/trailers/checksums. + */ + NO_HEADER (-15), + + /** + * Default headers/trailers/checksums. + */ + DEFAULT_HEADER (15), + + /** + * Simple gzip headers/trailers. + */ + GZIP_FORMAT (31); + + private final int windowBits; + + CompressionHeader(int windowBits) { + this.windowBits = windowBits; + } + + public int windowBits() { + return windowBits; + } + } + + private static boolean nativeZlibLoaded = false; + + static { + if (NativeCodeLoader.isNativeCodeLoaded()) { + try { + // Initialize the native library + initIDs(); + nativeZlibLoaded = true; + } catch (Throwable t) { + // Ignore failure to load/initialize native-zlib + } + } + } + + static boolean isNativeZlibLoaded() { + return nativeZlibLoaded; + } + + /** + * Creates a new compressor using the specified compression level. + * Compressed data will be generated in ZLIB format. + * + * @param level Compression level #CompressionLevel + * @param strategy Compression strategy #CompressionStrategy + * @param header Compression header #CompressionHeader + * @param directBufferSize Size of the direct buffer to be used. + */ + public ZlibCompressor(CompressionLevel level, CompressionStrategy strategy, + CompressionHeader header, int directBufferSize) { + this.level = level; + this.strategy = strategy; + this.windowBits = header; + this.directBufferSize = directBufferSize; + + uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); + compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); + compressedDirectBuf.position(directBufferSize); + + stream = init(this.level.compressionLevel(), + this.strategy.compressionStrategy(), + this.windowBits.windowBits()); + } + + /** + * Creates a new compressor with the default compression level. + * Compressed data will be generated in ZLIB format. + */ + public ZlibCompressor() { + this(CompressionLevel.DEFAULT_COMPRESSION, + CompressionStrategy.DEFAULT_STRATEGY, + CompressionHeader.DEFAULT_HEADER, + DEFAULT_DIRECT_BUFFER_SIZE); + } + + public synchronized void setInput(byte[] b, int off, int len) { + if (b== null) { + throw new NullPointerException(); + } + if (off < 0 || len < 0 || off > b.length - len) { + throw new ArrayIndexOutOfBoundsException(); + } + + this.userBuf = b; + this.userBufOff = off; + this.userBufLen = len; + setInputFromSavedData(); + + // Reinitialize zlib's output direct buffer + compressedDirectBuf.limit(directBufferSize); + compressedDirectBuf.position(directBufferSize); + } + + synchronized void setInputFromSavedData() { + uncompressedDirectBufOff = 0; + uncompressedDirectBufLen = userBufLen; + if (uncompressedDirectBufLen > directBufferSize) { + uncompressedDirectBufLen = directBufferSize; + } + + // Reinitialize zlib's input direct buffer + uncompressedDirectBuf.rewind(); + ((ByteBuffer)uncompressedDirectBuf).put(userBuf, userBufOff, + uncompressedDirectBufLen); + + // Note how much data is being fed to zlib + userBufOff += uncompressedDirectBufLen; + userBufLen -= uncompressedDirectBufLen; + } + + public synchronized void setDictionary(byte[] b, int off, int len) { + if (stream == 0 || b == null) { + throw new NullPointerException(); + } + if (off < 0 || len < 0 || off > b.length - len) { + throw new ArrayIndexOutOfBoundsException(); + } + setDictionary(stream, b, off, len); + } + + public synchronized boolean needsInput() { + // Consume remaining compressed data? + if (compressedDirectBuf.remaining() > 0) { + return false; + } + + // Check if zlib has consumed all input + if (uncompressedDirectBufLen <= 0) { + // Check if we have consumed all user-input + if (userBufLen <= 0) { + return true; + } else { + setInputFromSavedData(); + } + } + + return false; + } + + public synchronized void finish() { + finish = true; + } + + public synchronized boolean finished() { + // Check if 'zlib' says its 'finished' and + // all compressed data has been consumed + return (finished && compressedDirectBuf.remaining() == 0); + } + + public synchronized int compress(byte[] b, int off, int len) + throws IOException { + if (b == null) { + throw new NullPointerException(); + } + if (off < 0 || len < 0 || off > b.length - len) { + throw new ArrayIndexOutOfBoundsException(); + } + + int n = 0; + + // Check if there is compressed data + n = compressedDirectBuf.remaining(); + if (n > 0) { + n = Math.min(n, len); + ((ByteBuffer)compressedDirectBuf).get(b, off, n); + return n; + } + + // Re-initialize the zlib's output direct buffer + compressedDirectBuf.rewind(); + compressedDirectBuf.limit(directBufferSize); + + // Compress data + n = deflateBytesDirect(); + compressedDirectBuf.limit(n); + + // Get atmost 'len' bytes + n = Math.min(n, len); + ((ByteBuffer)compressedDirectBuf).get(b, off, n); + + return n; + } + + /** + * Returns the total number of compressed bytes output so far. + * + * @return the total (non-negative) number of compressed bytes output so far + */ + public synchronized long getBytesWritten() { + checkStream(); + return getBytesWritten(stream); + } + + /** + * Returns the total number of uncompressed bytes input so far.

+ * + * @return the total (non-negative) number of uncompressed bytes input so far + */ + public synchronized long getBytesRead() { + checkStream(); + return getBytesRead(stream); + } + + public synchronized void reset() { + checkStream(); + reset(stream); + finish = false; + finished = false; + uncompressedDirectBuf.rewind(); + uncompressedDirectBufOff = uncompressedDirectBufLen = 0; + compressedDirectBuf.limit(directBufferSize); + compressedDirectBuf.position(directBufferSize); + userBufOff = userBufLen = 0; + } + + public synchronized void end() { + if (stream != 0) { + end(stream); + stream = 0; + } + } + + private void checkStream() { + if (stream == 0) + throw new NullPointerException(); + } + + private native static void initIDs(); + private native static long init(int level, int strategy, int windowBits); + private native static void setDictionary(long strm, byte[] b, int off, + int len); + private native int deflateBytesDirect(); + private native static long getBytesRead(long strm); + private native static long getBytesWritten(long strm); + private native static void reset(long strm); + private native static void end(long strm); +} diff --git a/src/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java b/src/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java new file mode 100644 index 00000000000..56738252284 --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/zlib/ZlibDecompressor.java @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress.zlib; + +import java.io.IOException; +import java.nio.Buffer; +import java.nio.ByteBuffer; + +import org.apache.hadoop.io.compress.Decompressor; +import org.apache.hadoop.util.NativeCodeLoader; + +/** + * A {@link Decompressor} based on the popular + * zlib compression algorithm. + * http://www.zlib.net/ + * + */ +public class ZlibDecompressor implements Decompressor { + private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64*1024; + + // HACK - Use this as a global lock in the JNI layer + private static Class clazz = ZlibDecompressor.class; + + private long stream; + private CompressionHeader header; + private int directBufferSize; + private Buffer compressedDirectBuf = null; + private int compressedDirectBufOff, compressedDirectBufLen; + private Buffer uncompressedDirectBuf = null; + private byte[] userBuf = null; + private int userBufOff = 0, userBufLen = 0; + private boolean finished; + private boolean needDict; + + /** + * The headers to detect from compressed data. + */ + public static enum CompressionHeader { + /** + * No headers/trailers/checksums. + */ + NO_HEADER (-15), + + /** + * Default headers/trailers/checksums. + */ + DEFAULT_HEADER (15), + + /** + * Simple gzip headers/trailers. + */ + GZIP_FORMAT (31), + + /** + * Autodetect gzip/zlib headers/trailers. + */ + AUTODETECT_GZIP_ZLIB (47); + + private final int windowBits; + + CompressionHeader(int windowBits) { + this.windowBits = windowBits; + } + + public int windowBits() { + return windowBits; + } + } + + private static boolean nativeZlibLoaded = false; + + static { + if (NativeCodeLoader.isNativeCodeLoaded()) { + try { + // Initialize the native library + initIDs(); + nativeZlibLoaded = true; + } catch (Throwable t) { + // Ignore failure to load/initialize native-zlib + } + } + } + + static boolean isNativeZlibLoaded() { + return nativeZlibLoaded; + } + + /** + * Creates a new decompressor. + */ + public ZlibDecompressor(CompressionHeader header, int directBufferSize) { + this.header = header; + this.directBufferSize = directBufferSize; + compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); + uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); + uncompressedDirectBuf.position(directBufferSize); + + stream = init(this.header.windowBits()); + } + + public ZlibDecompressor() { + this(CompressionHeader.DEFAULT_HEADER, DEFAULT_DIRECT_BUFFER_SIZE); + } + + public synchronized void setInput(byte[] b, int off, int len) { + if (b == null) { + throw new NullPointerException(); + } + if (off < 0 || len < 0 || off > b.length - len) { + throw new ArrayIndexOutOfBoundsException(); + } + + this.userBuf = b; + this.userBufOff = off; + this.userBufLen = len; + + setInputFromSavedData(); + + // Reinitialize zlib's output direct buffer + uncompressedDirectBuf.limit(directBufferSize); + uncompressedDirectBuf.position(directBufferSize); + } + + synchronized void setInputFromSavedData() { + compressedDirectBufOff = 0; + compressedDirectBufLen = userBufLen; + if (compressedDirectBufLen > directBufferSize) { + compressedDirectBufLen = directBufferSize; + } + + // Reinitialize zlib's input direct buffer + compressedDirectBuf.rewind(); + ((ByteBuffer)compressedDirectBuf).put(userBuf, userBufOff, + compressedDirectBufLen); + + // Note how much data is being fed to zlib + userBufOff += compressedDirectBufLen; + userBufLen -= compressedDirectBufLen; + } + + public synchronized void setDictionary(byte[] b, int off, int len) { + if (stream == 0 || b == null) { + throw new NullPointerException(); + } + if (off < 0 || len < 0 || off > b.length - len) { + throw new ArrayIndexOutOfBoundsException(); + } + setDictionary(stream, b, off, len); + needDict = false; + } + + public synchronized boolean needsInput() { + // Consume remanining compressed data? + if (uncompressedDirectBuf.remaining() > 0) { + return false; + } + + // Check if zlib has consumed all input + if (compressedDirectBufLen <= 0) { + // Check if we have consumed all user-input + if (userBufLen <= 0) { + return true; + } else { + setInputFromSavedData(); + } + } + + return false; + } + + public synchronized boolean needsDictionary() { + return needDict; + } + + public synchronized boolean finished() { + // Check if 'zlib' says its 'finished' and + // all compressed data has been consumed + return (finished && uncompressedDirectBuf.remaining() == 0); + } + + public synchronized int decompress(byte[] b, int off, int len) + throws IOException { + if (b == null) { + throw new NullPointerException(); + } + if (off < 0 || len < 0 || off > b.length - len) { + throw new ArrayIndexOutOfBoundsException(); + } + + int n = 0; + + // Check if there is uncompressed data + n = uncompressedDirectBuf.remaining(); + if (n > 0) { + n = Math.min(n, len); + ((ByteBuffer)uncompressedDirectBuf).get(b, off, n); + return n; + } + + // Re-initialize the zlib's output direct buffer + uncompressedDirectBuf.rewind(); + uncompressedDirectBuf.limit(directBufferSize); + + // Decompress data + n = inflateBytesDirect(); + uncompressedDirectBuf.limit(n); + + // Get atmost 'len' bytes + n = Math.min(n, len); + ((ByteBuffer)uncompressedDirectBuf).get(b, off, n); + + return n; + } + + /** + * Returns the total number of compressed bytes output so far. + * + * @return the total (non-negative) number of compressed bytes output so far + */ + public synchronized long getBytesWritten() { + checkStream(); + return getBytesWritten(stream); + } + + /** + * Returns the total number of uncompressed bytes input so far.

+ * + * @return the total (non-negative) number of uncompressed bytes input so far + */ + public synchronized long getBytesRead() { + checkStream(); + return getBytesRead(stream); + } + + public synchronized void reset() { + checkStream(); + reset(stream); + finished = false; + needDict = false; + compressedDirectBufOff = compressedDirectBufLen = 0; + uncompressedDirectBuf.limit(directBufferSize); + uncompressedDirectBuf.position(directBufferSize); + userBufOff = userBufLen = 0; + } + + public synchronized void end() { + if (stream != 0) { + end(stream); + stream = 0; + } + } + + protected void finalize() { + end(); + } + + private void checkStream() { + if (stream == 0) + throw new NullPointerException(); + } + + private native static void initIDs(); + private native static long init(int windowBits); + private native static void setDictionary(long strm, byte[] b, int off, + int len); + private native int inflateBytesDirect(); + private native static long getBytesRead(long strm); + private native static long getBytesWritten(long strm); + private native static void reset(long strm); + private native static void end(long strm); +} diff --git a/src/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java b/src/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java new file mode 100644 index 00000000000..e3ce3ec1afe --- /dev/null +++ b/src/java/org/apache/hadoop/io/compress/zlib/ZlibFactory.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.compress.zlib; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; +import org.apache.hadoop.util.NativeCodeLoader; + +/** + * A collection of factories to create the right + * zlib/gzip compressor/decompressor instances. + * + */ +public class ZlibFactory { + private static final Log LOG = + LogFactory.getLog(ZlibFactory.class); + + private static boolean nativeZlibLoaded = false; + + static { + if (NativeCodeLoader.isNativeCodeLoaded()) { + nativeZlibLoaded = ZlibCompressor.isNativeZlibLoaded() && + ZlibDecompressor.isNativeZlibLoaded(); + + if (nativeZlibLoaded) { + LOG.info("Successfully loaded & initialized native-zlib library"); + } else { + LOG.warn("Failed to load/initialize native-zlib library"); + } + } + } + + /** + * Check if native-zlib code is loaded & initialized correctly and + * can be loaded for this job. + * + * @param conf configuration + * @return true if native-zlib is loaded & initialized + * and can be loaded for this job, else false + */ + public static boolean isNativeZlibLoaded(Configuration conf) { + return nativeZlibLoaded && conf.getBoolean("hadoop.native.lib", true); + } + + /** + * Return the appropriate type of the zlib compressor. + * + * @param conf configuration + * @return the appropriate type of the zlib compressor. + */ + public static Class + getZlibCompressorType(Configuration conf) { + return (isNativeZlibLoaded(conf)) ? + ZlibCompressor.class : BuiltInZlibDeflater.class; + } + + /** + * Return the appropriate implementation of the zlib compressor. + * + * @param conf configuration + * @return the appropriate implementation of the zlib compressor. + */ + public static Compressor getZlibCompressor(Configuration conf) { + return (isNativeZlibLoaded(conf)) ? + new ZlibCompressor() : new BuiltInZlibDeflater(); + } + + /** + * Return the appropriate type of the zlib decompressor. + * + * @param conf configuration + * @return the appropriate type of the zlib decompressor. + */ + public static Class + getZlibDecompressorType(Configuration conf) { + return (isNativeZlibLoaded(conf)) ? + ZlibDecompressor.class : BuiltInZlibInflater.class; + } + + /** + * Return the appropriate implementation of the zlib decompressor. + * + * @param conf configuration + * @return the appropriate implementation of the zlib decompressor. + */ + public static Decompressor getZlibDecompressor(Configuration conf) { + return (isNativeZlibLoaded(conf)) ? + new ZlibDecompressor() : new BuiltInZlibInflater(); + } + +} diff --git a/src/java/org/apache/hadoop/io/package.html b/src/java/org/apache/hadoop/io/package.html new file mode 100644 index 00000000000..ce4ca1f352a --- /dev/null +++ b/src/java/org/apache/hadoop/io/package.html @@ -0,0 +1,24 @@ + + + + + +Generic i/o code for use when reading and writing data to the network, +to databases, and to files. + + diff --git a/src/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/src/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java new file mode 100644 index 00000000000..19b68538539 --- /dev/null +++ b/src/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.retry; + +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Collections; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.StringUtils; + +class RetryInvocationHandler implements InvocationHandler { + public static final Log LOG = LogFactory.getLog(RetryInvocationHandler.class); + private Object implementation; + + private RetryPolicy defaultPolicy; + private Map methodNameToPolicyMap; + + public RetryInvocationHandler(Object implementation, RetryPolicy retryPolicy) { + this.implementation = implementation; + this.defaultPolicy = retryPolicy; + this.methodNameToPolicyMap = Collections.emptyMap(); + } + + public RetryInvocationHandler(Object implementation, Map methodNameToPolicyMap) { + this.implementation = implementation; + this.defaultPolicy = RetryPolicies.TRY_ONCE_THEN_FAIL; + this.methodNameToPolicyMap = methodNameToPolicyMap; + } + + public Object invoke(Object proxy, Method method, Object[] args) + throws Throwable { + RetryPolicy policy = methodNameToPolicyMap.get(method.getName()); + if (policy == null) { + policy = defaultPolicy; + } + + int retries = 0; + while (true) { + try { + return invokeMethod(method, args); + } catch (Exception e) { + if (!policy.shouldRetry(e, retries++)) { + LOG.info("Exception while invoking " + method.getName() + + " of " + implementation.getClass() + ". Not retrying." + + StringUtils.stringifyException(e)); + if (!method.getReturnType().equals(Void.TYPE)) { + throw e; // non-void methods can't fail without an exception + } + return null; + } + LOG.debug("Exception while invoking " + method.getName() + + " of " + implementation.getClass() + ". Retrying." + + StringUtils.stringifyException(e)); + } + } + } + + private Object invokeMethod(Method method, Object[] args) throws Throwable { + try { + if (!method.isAccessible()) { + method.setAccessible(true); + } + return method.invoke(implementation, args); + } catch (InvocationTargetException e) { + throw e.getCause(); + } + } + +} diff --git a/src/java/org/apache/hadoop/io/retry/RetryPolicies.java b/src/java/org/apache/hadoop/io/retry/RetryPolicies.java new file mode 100644 index 00000000000..30a78885da8 --- /dev/null +++ b/src/java/org/apache/hadoop/io/retry/RetryPolicies.java @@ -0,0 +1,258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.retry; + +import java.util.HashMap; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.Map.Entry; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.ipc.RemoteException; + +/** + *

+ * A collection of useful implementations of {@link RetryPolicy}. + *

+ */ +public class RetryPolicies { + + /** + *

+ * Try once, and fail by re-throwing the exception. + * This corresponds to having no retry mechanism in place. + *

+ */ + public static final RetryPolicy TRY_ONCE_THEN_FAIL = new TryOnceThenFail(); + + /** + *

+ * Try once, and fail silently for void methods, or by + * re-throwing the exception for non-void methods. + *

+ */ + public static final RetryPolicy TRY_ONCE_DONT_FAIL = new TryOnceDontFail(); + + /** + *

+ * Keep trying forever. + *

+ */ + public static final RetryPolicy RETRY_FOREVER = new RetryForever(); + + /** + *

+ * Keep trying a limited number of times, waiting a fixed time between attempts, + * and then fail by re-throwing the exception. + *

+ */ + public static final RetryPolicy retryUpToMaximumCountWithFixedSleep(int maxRetries, long sleepTime, TimeUnit timeUnit) { + return new RetryUpToMaximumCountWithFixedSleep(maxRetries, sleepTime, timeUnit); + } + + /** + *

+ * Keep trying for a maximum time, waiting a fixed time between attempts, + * and then fail by re-throwing the exception. + *

+ */ + public static final RetryPolicy retryUpToMaximumTimeWithFixedSleep(long maxTime, long sleepTime, TimeUnit timeUnit) { + return new RetryUpToMaximumTimeWithFixedSleep(maxTime, sleepTime, timeUnit); + } + + /** + *

+ * Keep trying a limited number of times, waiting a growing amount of time between attempts, + * and then fail by re-throwing the exception. + * The time between attempts is sleepTime mutliplied by the number of tries so far. + *

+ */ + public static final RetryPolicy retryUpToMaximumCountWithProportionalSleep(int maxRetries, long sleepTime, TimeUnit timeUnit) { + return new RetryUpToMaximumCountWithProportionalSleep(maxRetries, sleepTime, timeUnit); + } + + /** + *

+ * Keep trying a limited number of times, waiting a growing amount of time between attempts, + * and then fail by re-throwing the exception. + * The time between attempts is sleepTime mutliplied by a random + * number in the range of [0, 2 to the number of retries) + *

+ */ + public static final RetryPolicy exponentialBackoffRetry( + int maxRetries, long sleepTime, TimeUnit timeUnit) { + return new ExponentialBackoffRetry(maxRetries, sleepTime, timeUnit); + } + + /** + *

+ * Set a default policy with some explicit handlers for specific exceptions. + *

+ */ + public static final RetryPolicy retryByException(RetryPolicy defaultPolicy, + Map, RetryPolicy> exceptionToPolicyMap) { + return new ExceptionDependentRetry(defaultPolicy, exceptionToPolicyMap); + } + + /** + *

+ * A retry policy for RemoteException + * Set a default policy with some explicit handlers for specific exceptions. + *

+ */ + public static final RetryPolicy retryByRemoteException( + RetryPolicy defaultPolicy, + Map, RetryPolicy> exceptionToPolicyMap) { + return new RemoteExceptionDependentRetry(defaultPolicy, exceptionToPolicyMap); + } + + static class TryOnceThenFail implements RetryPolicy { + public boolean shouldRetry(Exception e, int retries) throws Exception { + throw e; + } + } + static class TryOnceDontFail implements RetryPolicy { + public boolean shouldRetry(Exception e, int retries) throws Exception { + return false; + } + } + + static class RetryForever implements RetryPolicy { + public boolean shouldRetry(Exception e, int retries) throws Exception { + return true; + } + } + + static abstract class RetryLimited implements RetryPolicy { + int maxRetries; + long sleepTime; + TimeUnit timeUnit; + + public RetryLimited(int maxRetries, long sleepTime, TimeUnit timeUnit) { + this.maxRetries = maxRetries; + this.sleepTime = sleepTime; + this.timeUnit = timeUnit; + } + + public boolean shouldRetry(Exception e, int retries) throws Exception { + if (retries >= maxRetries) { + throw e; + } + try { + timeUnit.sleep(calculateSleepTime(retries)); + } catch (InterruptedException ie) { + // retry + } + return true; + } + + protected abstract long calculateSleepTime(int retries); + } + + static class RetryUpToMaximumCountWithFixedSleep extends RetryLimited { + public RetryUpToMaximumCountWithFixedSleep(int maxRetries, long sleepTime, TimeUnit timeUnit) { + super(maxRetries, sleepTime, timeUnit); + } + + @Override + protected long calculateSleepTime(int retries) { + return sleepTime; + } + } + + static class RetryUpToMaximumTimeWithFixedSleep extends RetryUpToMaximumCountWithFixedSleep { + public RetryUpToMaximumTimeWithFixedSleep(long maxTime, long sleepTime, TimeUnit timeUnit) { + super((int) (maxTime / sleepTime), sleepTime, timeUnit); + } + } + + static class RetryUpToMaximumCountWithProportionalSleep extends RetryLimited { + public RetryUpToMaximumCountWithProportionalSleep(int maxRetries, long sleepTime, TimeUnit timeUnit) { + super(maxRetries, sleepTime, timeUnit); + } + + @Override + protected long calculateSleepTime(int retries) { + return sleepTime * (retries + 1); + } + } + + static class ExceptionDependentRetry implements RetryPolicy { + + RetryPolicy defaultPolicy; + Map, RetryPolicy> exceptionToPolicyMap; + + public ExceptionDependentRetry(RetryPolicy defaultPolicy, + Map, RetryPolicy> exceptionToPolicyMap) { + this.defaultPolicy = defaultPolicy; + this.exceptionToPolicyMap = exceptionToPolicyMap; + } + + public boolean shouldRetry(Exception e, int retries) throws Exception { + RetryPolicy policy = exceptionToPolicyMap.get(e.getClass()); + if (policy == null) { + policy = defaultPolicy; + } + return policy.shouldRetry(e, retries); + } + + } + + static class RemoteExceptionDependentRetry implements RetryPolicy { + + RetryPolicy defaultPolicy; + Map exceptionNameToPolicyMap; + + public RemoteExceptionDependentRetry(RetryPolicy defaultPolicy, + Map, + RetryPolicy> exceptionToPolicyMap) { + this.defaultPolicy = defaultPolicy; + this.exceptionNameToPolicyMap = new HashMap(); + for (Entry, RetryPolicy> e : + exceptionToPolicyMap.entrySet()) { + exceptionNameToPolicyMap.put(e.getKey().getName(), e.getValue()); + } + } + + public boolean shouldRetry(Exception e, int retries) throws Exception { + RetryPolicy policy = null; + if (e instanceof RemoteException) { + policy = exceptionNameToPolicyMap.get( + ((RemoteException) e).getClassName()); + } + if (policy == null) { + policy = defaultPolicy; + } + return policy.shouldRetry(e, retries); + } + } + + static class ExponentialBackoffRetry extends RetryLimited { + private Random r = new Random(); + public ExponentialBackoffRetry( + int maxRetries, long sleepTime, TimeUnit timeUnit) { + super(maxRetries, sleepTime, timeUnit); + } + + @Override + protected long calculateSleepTime(int retries) { + return sleepTime*r.nextInt(1<<(retries+1)); + } + } +} diff --git a/src/java/org/apache/hadoop/io/retry/RetryPolicy.java b/src/java/org/apache/hadoop/io/retry/RetryPolicy.java new file mode 100644 index 00000000000..26d3267bc2a --- /dev/null +++ b/src/java/org/apache/hadoop/io/retry/RetryPolicy.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.retry; + +/** + *

+ * Specifies a policy for retrying method failures. + * Implementations of this interface should be immutable. + *

+ */ +public interface RetryPolicy { + /** + *

+ * Determines whether the framework should retry a + * method for the given exception, and the number + * of retries that have been made for that operation + * so far. + *

+ * @param e The exception that caused the method to fail. + * @param retries The number of times the method has been retried. + * @return true if the method should be retried, + * false if the method should not be retried + * but shouldn't fail with an exception (only for void methods). + * @throws Exception The re-thrown exception e indicating + * that the method failed and should not be retried further. + */ + public boolean shouldRetry(Exception e, int retries) throws Exception; +} diff --git a/src/java/org/apache/hadoop/io/retry/RetryProxy.java b/src/java/org/apache/hadoop/io/retry/RetryProxy.java new file mode 100644 index 00000000000..937f832213c --- /dev/null +++ b/src/java/org/apache/hadoop/io/retry/RetryProxy.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.retry; + +import java.lang.reflect.Proxy; +import java.util.Map; + +/** + *

+ * A factory for creating retry proxies. + *

+ */ +public class RetryProxy { + /** + *

+ * Create a proxy for an interface of an implementation class + * using the same retry policy for each method in the interface. + *

+ * @param iface the interface that the retry will implement + * @param implementation the instance whose methods should be retried + * @param retryPolicy the policy for retirying method call failures + * @return the retry proxy + */ + public static Object create(Class iface, Object implementation, + RetryPolicy retryPolicy) { + return Proxy.newProxyInstance( + implementation.getClass().getClassLoader(), + new Class[] { iface }, + new RetryInvocationHandler(implementation, retryPolicy) + ); + } + + /** + *

+ * Create a proxy for an interface of an implementation class + * using the a set of retry policies specified by method name. + * If no retry policy is defined for a method then a default of + * {@link RetryPolicies#TRY_ONCE_THEN_FAIL} is used. + *

+ * @param iface the interface that the retry will implement + * @param implementation the instance whose methods should be retried + * @param methodNameToPolicyMap a map of method names to retry policies + * @return the retry proxy + */ + public static Object create(Class iface, Object implementation, + Map methodNameToPolicyMap) { + return Proxy.newProxyInstance( + implementation.getClass().getClassLoader(), + new Class[] { iface }, + new RetryInvocationHandler(implementation, methodNameToPolicyMap) + ); + } +} diff --git a/src/java/org/apache/hadoop/io/retry/package.html b/src/java/org/apache/hadoop/io/retry/package.html new file mode 100644 index 00000000000..ae553fc7a62 --- /dev/null +++ b/src/java/org/apache/hadoop/io/retry/package.html @@ -0,0 +1,48 @@ + + + + + + +

+A mechanism for selectively retrying methods that throw exceptions under certain circumstances. +

+ +

+Typical usage is +

+ +
+UnreliableImplementation unreliableImpl = new UnreliableImplementation();
+UnreliableInterface unreliable = (UnreliableInterface)
+  RetryProxy.create(UnreliableInterface.class, unreliableImpl,
+    RetryPolicies.retryUpToMaximumCountWithFixedSleep(4, 10, TimeUnit.SECONDS));
+unreliable.call();
+
+ +

+This will retry any method called on unreliable four times - in this case the call() +method - sleeping 10 seconds between +each retry. There are a number of {@link org.apache.hadoop.io.retry.RetryPolicies retry policies} +available, or you can implement a custom one by implementing {@link org.apache.hadoop.io.retry.RetryPolicy}. +It is also possible to specify retry policies on a +{@link org.apache.hadoop.io.retry.RetryProxy#create(Class, Object, Map) per-method basis}. +

+ + + diff --git a/src/java/org/apache/hadoop/io/serializer/Deserializer.java b/src/java/org/apache/hadoop/io/serializer/Deserializer.java new file mode 100644 index 00000000000..1234a57b2b4 --- /dev/null +++ b/src/java/org/apache/hadoop/io/serializer/Deserializer.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.serializer; + +import java.io.IOException; +import java.io.InputStream; + +/** + *

+ * Provides a facility for deserializing objects of type from an + * {@link InputStream}. + *

+ * + *

+ * Deserializers are stateful, but must not buffer the input since + * other producers may read from the input between calls to + * {@link #deserialize(Object)}. + *

+ * @param + */ +public interface Deserializer { + /** + *

Prepare the deserializer for reading.

+ */ + void open(InputStream in) throws IOException; + + /** + *

+ * Deserialize the next object from the underlying input stream. + * If the object t is non-null then this deserializer + * may set its internal state to the next object read from the input + * stream. Otherwise, if the object t is null a new + * deserialized object will be created. + *

+ * @return the deserialized object + */ + T deserialize(T t) throws IOException; + + /** + *

Close the underlying input stream and clear up any resources.

+ */ + void close() throws IOException; +} diff --git a/src/java/org/apache/hadoop/io/serializer/DeserializerComparator.java b/src/java/org/apache/hadoop/io/serializer/DeserializerComparator.java new file mode 100644 index 00000000000..70e8b689e9c --- /dev/null +++ b/src/java/org/apache/hadoop/io/serializer/DeserializerComparator.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.serializer; + +import java.io.IOException; +import java.util.Comparator; + +import org.apache.hadoop.io.InputBuffer; +import org.apache.hadoop.io.RawComparator; + +/** + *

+ * A {@link RawComparator} that uses a {@link Deserializer} to deserialize + * the objects to be compared so that the standard {@link Comparator} can + * be used to compare them. + *

+ *

+ * One may optimize compare-intensive operations by using a custom + * implementation of {@link RawComparator} that operates directly + * on byte representations. + *

+ * @param + */ +public abstract class DeserializerComparator implements RawComparator { + + private InputBuffer buffer = new InputBuffer(); + private Deserializer deserializer; + + private T key1; + private T key2; + + protected DeserializerComparator(Deserializer deserializer) + throws IOException { + + this.deserializer = deserializer; + this.deserializer.open(buffer); + } + + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { + try { + + buffer.reset(b1, s1, l1); + key1 = deserializer.deserialize(key1); + + buffer.reset(b2, s2, l2); + key2 = deserializer.deserialize(key2); + + } catch (IOException e) { + throw new RuntimeException(e); + } + return compare(key1, key2); + } + +} diff --git a/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java b/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java new file mode 100644 index 00000000000..b44b4b1db76 --- /dev/null +++ b/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.serializer; + +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.OutputStream; +import java.io.Serializable; + +/** + *

+ * An experimental {@link Serialization} for Java {@link Serializable} classes. + *

+ * @see JavaSerializationComparator + */ +public class JavaSerialization implements Serialization { + + static class JavaSerializationDeserializer + implements Deserializer { + + private ObjectInputStream ois; + + public void open(InputStream in) throws IOException { + ois = new ObjectInputStream(in) { + @Override protected void readStreamHeader() { + // no header + } + }; + } + + @SuppressWarnings("unchecked") + public T deserialize(T object) throws IOException { + try { + // ignore passed-in object + return (T) ois.readObject(); + } catch (ClassNotFoundException e) { + throw new IOException(e.toString()); + } + } + + public void close() throws IOException { + ois.close(); + } + + } + + static class JavaSerializationSerializer + implements Serializer { + + private ObjectOutputStream oos; + + public void open(OutputStream out) throws IOException { + oos = new ObjectOutputStream(out) { + @Override protected void writeStreamHeader() { + // no header + } + }; + } + + public void serialize(Serializable object) throws IOException { + oos.reset(); // clear (class) back-references + oos.writeObject(object); + } + + public void close() throws IOException { + oos.close(); + } + + } + + public boolean accept(Class c) { + return Serializable.class.isAssignableFrom(c); + } + + public Deserializer getDeserializer(Class c) { + return new JavaSerializationDeserializer(); + } + + public Serializer getSerializer(Class c) { + return new JavaSerializationSerializer(); + } + +} diff --git a/src/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java b/src/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java new file mode 100644 index 00000000000..f3de2b10c32 --- /dev/null +++ b/src/java/org/apache/hadoop/io/serializer/JavaSerializationComparator.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.serializer; + +import java.io.IOException; +import java.io.Serializable; + +import org.apache.hadoop.io.RawComparator; + +/** + *

+ * A {@link RawComparator} that uses a {@link JavaSerialization} + * {@link Deserializer} to deserialize objects that are then compared via + * their {@link Comparable} interfaces. + *

+ * @param + * @see JavaSerialization + */ +public class JavaSerializationComparator> + extends DeserializerComparator { + + public JavaSerializationComparator() throws IOException { + super(new JavaSerialization.JavaSerializationDeserializer()); + } + + public int compare(T o1, T o2) { + return o1.compareTo(o2); + } + +} diff --git a/src/java/org/apache/hadoop/io/serializer/Serialization.java b/src/java/org/apache/hadoop/io/serializer/Serialization.java new file mode 100644 index 00000000000..6e724bd78b1 --- /dev/null +++ b/src/java/org/apache/hadoop/io/serializer/Serialization.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.serializer; + +/** + *

+ * Encapsulates a {@link Serializer}/{@link Deserializer} pair. + *

+ * @param + */ +public interface Serialization { + + /** + * Allows clients to test whether this {@link Serialization} + * supports the given class. + */ + boolean accept(Class c); + + /** + * @return a {@link Serializer} for the given class. + */ + Serializer getSerializer(Class c); + + /** + * @return a {@link Deserializer} for the given class. + */ + Deserializer getDeserializer(Class c); +} diff --git a/src/java/org/apache/hadoop/io/serializer/SerializationFactory.java b/src/java/org/apache/hadoop/io/serializer/SerializationFactory.java new file mode 100644 index 00000000000..f4ba54b4e49 --- /dev/null +++ b/src/java/org/apache/hadoop/io/serializer/SerializationFactory.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.serializer; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; + +/** + *

+ * A factory for {@link Serialization}s. + *

+ */ +public class SerializationFactory extends Configured { + + private static final Log LOG = + LogFactory.getLog(SerializationFactory.class.getName()); + + private List> serializations = new ArrayList>(); + + /** + *

+ * Serializations are found by reading the io.serializations + * property from conf, which is a comma-delimited list of + * classnames. + *

+ */ + public SerializationFactory(Configuration conf) { + super(conf); + for (String serializerName : conf.getStrings("io.serializations", + new String[]{"org.apache.hadoop.io.serializer.WritableSerialization"})) { + add(conf, serializerName); + } + } + + @SuppressWarnings("unchecked") + private void add(Configuration conf, String serializationName) { + try { + + Class serializionClass = + (Class) conf.getClassByName(serializationName); + serializations.add((Serialization) + ReflectionUtils.newInstance(serializionClass, getConf())); + } catch (ClassNotFoundException e) { + LOG.warn("Serilization class not found: " + + StringUtils.stringifyException(e)); + } + } + + public Serializer getSerializer(Class c) { + return getSerialization(c).getSerializer(c); + } + + public Deserializer getDeserializer(Class c) { + return getSerialization(c).getDeserializer(c); + } + + @SuppressWarnings("unchecked") + public Serialization getSerialization(Class c) { + for (Serialization serialization : serializations) { + if (serialization.accept(c)) { + return (Serialization) serialization; + } + } + return null; + } +} diff --git a/src/java/org/apache/hadoop/io/serializer/Serializer.java b/src/java/org/apache/hadoop/io/serializer/Serializer.java new file mode 100644 index 00000000000..b3243f5b6b8 --- /dev/null +++ b/src/java/org/apache/hadoop/io/serializer/Serializer.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.serializer; + +import java.io.IOException; +import java.io.OutputStream; + +/** + *

+ * Provides a facility for serializing objects of type to an + * {@link OutputStream}. + *

+ * + *

+ * Serializers are stateful, but must not buffer the output since + * other producers may write to the output between calls to + * {@link #serialize(Object)}. + *

+ * @param + */ +public interface Serializer { + /** + *

Prepare the serializer for writing.

+ */ + void open(OutputStream out) throws IOException; + + /** + *

Serialize t to the underlying output stream.

+ */ + void serialize(T t) throws IOException; + + /** + *

Close the underlying output stream and clear up any resources.

+ */ + void close() throws IOException; +} diff --git a/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java b/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java new file mode 100644 index 00000000000..47586e8c2dd --- /dev/null +++ b/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.serializer; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.util.ReflectionUtils; + +/** + * A {@link Serialization} for {@link Writable}s that delegates to + * {@link Writable#write(java.io.DataOutput)} and + * {@link Writable#readFields(java.io.DataInput)}. + */ +public class WritableSerialization extends Configured + implements Serialization { + + static class WritableDeserializer extends Configured + implements Deserializer { + + private Class writableClass; + private DataInputStream dataIn; + + public WritableDeserializer(Configuration conf, Class c) { + setConf(conf); + this.writableClass = c; + } + + public void open(InputStream in) { + if (in instanceof DataInputStream) { + dataIn = (DataInputStream) in; + } else { + dataIn = new DataInputStream(in); + } + } + + public Writable deserialize(Writable w) throws IOException { + Writable writable; + if (w == null) { + writable + = (Writable) ReflectionUtils.newInstance(writableClass, getConf()); + } else { + writable = w; + } + writable.readFields(dataIn); + return writable; + } + + public void close() throws IOException { + dataIn.close(); + } + + } + + static class WritableSerializer implements Serializer { + + private DataOutputStream dataOut; + + public void open(OutputStream out) { + if (out instanceof DataOutputStream) { + dataOut = (DataOutputStream) out; + } else { + dataOut = new DataOutputStream(out); + } + } + + public void serialize(Writable w) throws IOException { + w.write(dataOut); + } + + public void close() throws IOException { + dataOut.close(); + } + + } + + public boolean accept(Class c) { + return Writable.class.isAssignableFrom(c); + } + + public Deserializer getDeserializer(Class c) { + return new WritableDeserializer(getConf(), c); + } + + public Serializer getSerializer(Class c) { + return new WritableSerializer(); + } + +} diff --git a/src/java/org/apache/hadoop/io/serializer/package.html b/src/java/org/apache/hadoop/io/serializer/package.html new file mode 100644 index 00000000000..58c8a3a5c3b --- /dev/null +++ b/src/java/org/apache/hadoop/io/serializer/package.html @@ -0,0 +1,37 @@ + + + + + + +

+This package provides a mechanism for using different serialization frameworks +in Hadoop. The property "io.serializations" defines a list of +{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create +{@link org.apache.hadoop.io.serializer.Serializer}s and +{@link org.apache.hadoop.io.serializer.Deserializer}s. +

+ +

+To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +

+ + + diff --git a/src/java/org/apache/hadoop/ipc/Client.java b/src/java/org/apache/hadoop/ipc/Client.java new file mode 100644 index 00000000000..1d01faf673f --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/Client.java @@ -0,0 +1,914 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +import java.net.Socket; +import java.net.InetSocketAddress; +import java.net.SocketTimeoutException; +import java.net.UnknownHostException; +import java.net.ConnectException; + +import java.io.IOException; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.FilterInputStream; +import java.io.InputStream; + +import java.util.Hashtable; +import java.util.Iterator; +import java.util.Map.Entry; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +import javax.net.SocketFactory; + +import org.apache.commons.logging.*; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.ReflectionUtils; + +/** A client for an IPC service. IPC calls take a single {@link Writable} as a + * parameter, and return a {@link Writable} as their value. A service runs on + * a port and is defined by a parameter class and a value class. + * + * @see Server + */ +public class Client { + + public static final Log LOG = + LogFactory.getLog(Client.class); + private Hashtable connections = + new Hashtable(); + + private Class valueClass; // class of call values + private int counter; // counter for call ids + private AtomicBoolean running = new AtomicBoolean(true); // if client runs + final private Configuration conf; + final private int maxIdleTime; //connections will be culled if it was idle for + //maxIdleTime msecs + final private int maxRetries; //the max. no. of retries for socket connections + private boolean tcpNoDelay; // if T then disable Nagle's Algorithm + private int pingInterval; // how often sends ping to the server in msecs + + private SocketFactory socketFactory; // how to create sockets + private int refCount = 1; + + final private static String PING_INTERVAL_NAME = "ipc.ping.interval"; + final static int DEFAULT_PING_INTERVAL = 60000; // 1 min + final static int PING_CALL_ID = -1; + + /** + * set the ping interval value in configuration + * + * @param conf Configuration + * @param pingInterval the ping interval + */ + final public static void setPingInterval(Configuration conf, int pingInterval) { + conf.setInt(PING_INTERVAL_NAME, pingInterval); + } + + /** + * Get the ping interval from configuration; + * If not set in the configuration, return the default value. + * + * @param conf Configuration + * @return the ping interval + */ + final static int getPingInterval(Configuration conf) { + return conf.getInt(PING_INTERVAL_NAME, DEFAULT_PING_INTERVAL); + } + + /** + * Increment this client's reference count + * + */ + synchronized void incCount() { + refCount++; + } + + /** + * Decrement this client's reference count + * + */ + synchronized void decCount() { + refCount--; + } + + /** + * Return if this client has no reference + * + * @return true if this client has no reference; false otherwise + */ + synchronized boolean isZeroReference() { + return refCount==0; + } + + /** A call waiting for a value. */ + private class Call { + int id; // call id + Writable param; // parameter + Writable value; // value, null if error + IOException error; // exception, null if value + boolean done; // true when call is done + + protected Call(Writable param) { + this.param = param; + synchronized (Client.this) { + this.id = counter++; + } + } + + /** Indicate when the call is complete and the + * value or error are available. Notifies by default. */ + protected synchronized void callComplete() { + this.done = true; + notify(); // notify caller + } + + /** Set the exception when there is an error. + * Notify the caller the call is done. + * + * @param error exception thrown by the call; either local or remote + */ + public synchronized void setException(IOException error) { + this.error = error; + callComplete(); + } + + /** Set the return value when there is no error. + * Notify the caller the call is done. + * + * @param value return value of the call. + */ + public synchronized void setValue(Writable value) { + this.value = value; + callComplete(); + } + + public synchronized Writable getValue() { + return value; + } + } + + /** Thread that reads responses and notifies callers. Each connection owns a + * socket connected to a remote address. Calls are multiplexed through this + * socket: responses may be delivered out of order. */ + private class Connection extends Thread { + private InetSocketAddress server; // server ip:port + private ConnectionHeader header; // connection header + private ConnectionId remoteId; // connection id + + private Socket socket = null; // connected socket + private DataInputStream in; + private DataOutputStream out; + + // currently active calls + private Hashtable calls = new Hashtable(); + private AtomicLong lastActivity = new AtomicLong();// last I/O activity time + private AtomicBoolean shouldCloseConnection = new AtomicBoolean(); // indicate if the connection is closed + private IOException closeException; // close reason + + public Connection(ConnectionId remoteId) throws IOException { + this.remoteId = remoteId; + this.server = remoteId.getAddress(); + if (server.isUnresolved()) { + throw new UnknownHostException("unknown host: " + + remoteId.getAddress().getHostName()); + } + + UserGroupInformation ticket = remoteId.getTicket(); + Class protocol = remoteId.getProtocol(); + header = + new ConnectionHeader(protocol == null ? null : protocol.getName(), ticket); + + this.setName("IPC Client (" + socketFactory.hashCode() +") connection to " + + remoteId.getAddress().toString() + + " from " + ((ticket==null)?"an unknown user":ticket.getUserName())); + this.setDaemon(true); + } + + /** Update lastActivity with the current time. */ + private void touch() { + lastActivity.set(System.currentTimeMillis()); + } + + /** + * Add a call to this connection's call queue and notify + * a listener; synchronized. + * Returns false if called during shutdown. + * @param call to add + * @return true if the call was added. + */ + private synchronized boolean addCall(Call call) { + if (shouldCloseConnection.get()) + return false; + calls.put(call.id, call); + notify(); + return true; + } + + /** This class sends a ping to the remote side when timeout on + * reading. If no failure is detected, it retries until at least + * a byte is read. + */ + private class PingInputStream extends FilterInputStream { + /* constructor */ + protected PingInputStream(InputStream in) { + super(in); + } + + /* Process timeout exception + * if the connection is not going to be closed, send a ping. + * otherwise, throw the timeout exception. + */ + private void handleTimeout(SocketTimeoutException e) throws IOException { + if (shouldCloseConnection.get() || !running.get()) { + throw e; + } else { + sendPing(); + } + } + + /** Read a byte from the stream. + * Send a ping if timeout on read. Retries if no failure is detected + * until a byte is read. + * @throws IOException for any IO problem other than socket timeout + */ + public int read() throws IOException { + do { + try { + return super.read(); + } catch (SocketTimeoutException e) { + handleTimeout(e); + } + } while (true); + } + + /** Read bytes into a buffer starting from offset off + * Send a ping if timeout on read. Retries if no failure is detected + * until a byte is read. + * + * @return the total number of bytes read; -1 if the connection is closed. + */ + public int read(byte[] buf, int off, int len) throws IOException { + do { + try { + return super.read(buf, off, len); + } catch (SocketTimeoutException e) { + handleTimeout(e); + } + } while (true); + } + } + + /** Connect to the server and set up the I/O streams. It then sends + * a header to the server and starts + * the connection thread that waits for responses. + */ + private synchronized void setupIOstreams() { + if (socket != null || shouldCloseConnection.get()) { + return; + } + + short ioFailures = 0; + short timeoutFailures = 0; + try { + if (LOG.isDebugEnabled()) { + LOG.debug("Connecting to "+server); + } + while (true) { + try { + this.socket = socketFactory.createSocket(); + this.socket.setTcpNoDelay(tcpNoDelay); + // connection time out is 20s + NetUtils.connect(this.socket, remoteId.getAddress(), 20000); + this.socket.setSoTimeout(pingInterval); + break; + } catch (SocketTimeoutException toe) { + /* The max number of retries is 45, + * which amounts to 20s*45 = 15 minutes retries. + */ + handleConnectionFailure(timeoutFailures++, 45, toe); + } catch (IOException ie) { + handleConnectionFailure(ioFailures++, maxRetries, ie); + } + } + this.in = new DataInputStream(new BufferedInputStream + (new PingInputStream(NetUtils.getInputStream(socket)))); + this.out = new DataOutputStream + (new BufferedOutputStream(NetUtils.getOutputStream(socket))); + writeHeader(); + + // update last activity time + touch(); + + // start the receiver thread after the socket connection has been set up + start(); + } catch (IOException e) { + markClosed(e); + close(); + } + } + + /* Handle connection failures + * + * If the current number of retries is equal to the max number of retries, + * stop retrying and throw the exception; Otherwise backoff 1 second and + * try connecting again. + * + * This Method is only called from inside setupIOstreams(), which is + * synchronized. Hence the sleep is synchronized; the locks will be retained. + * + * @param curRetries current number of retries + * @param maxRetries max number of retries allowed + * @param ioe failure reason + * @throws IOException if max number of retries is reached + */ + private void handleConnectionFailure( + int curRetries, int maxRetries, IOException ioe) throws IOException { + // close the current connection + try { + socket.close(); + } catch (IOException e) { + LOG.warn("Not able to close a socket", e); + } + // set socket to null so that the next call to setupIOstreams + // can start the process of connect all over again. + socket = null; + + // throw the exception if the maximum number of retries is reached + if (curRetries >= maxRetries) { + throw ioe; + } + + // otherwise back off and retry + try { + Thread.sleep(1000); + } catch (InterruptedException ignored) {} + + LOG.info("Retrying connect to server: " + server + + ". Already tried " + curRetries + " time(s)."); + } + + /* Write the header for each connection + * Out is not synchronized because only the first thread does this. + */ + private void writeHeader() throws IOException { + // Write out the header and version + out.write(Server.HEADER.array()); + out.write(Server.CURRENT_VERSION); + + // Write out the ConnectionHeader + DataOutputBuffer buf = new DataOutputBuffer(); + header.write(buf); + + // Write out the payload length + int bufLen = buf.getLength(); + out.writeInt(bufLen); + out.write(buf.getData(), 0, bufLen); + } + + /* wait till someone signals us to start reading RPC response or + * it is idle too long, it is marked as to be closed, + * or the client is marked as not running. + * + * Return true if it is time to read a response; false otherwise. + */ + private synchronized boolean waitForWork() { + if (calls.isEmpty() && !shouldCloseConnection.get() && running.get()) { + long timeout = maxIdleTime- + (System.currentTimeMillis()-lastActivity.get()); + if (timeout>0) { + try { + wait(timeout); + } catch (InterruptedException e) {} + } + } + + if (!calls.isEmpty() && !shouldCloseConnection.get() && running.get()) { + return true; + } else if (shouldCloseConnection.get()) { + return false; + } else if (calls.isEmpty()) { // idle connection closed or stopped + markClosed(null); + return false; + } else { // get stopped but there are still pending requests + markClosed((IOException)new IOException().initCause( + new InterruptedException())); + return false; + } + } + + public InetSocketAddress getRemoteAddress() { + return server; + } + + /* Send a ping to the server if the time elapsed + * since last I/O activity is equal to or greater than the ping interval + */ + private synchronized void sendPing() throws IOException { + long curTime = System.currentTimeMillis(); + if ( curTime - lastActivity.get() >= pingInterval) { + lastActivity.set(curTime); + synchronized (out) { + out.writeInt(PING_CALL_ID); + out.flush(); + } + } + } + + public void run() { + if (LOG.isDebugEnabled()) + LOG.debug(getName() + ": starting, having connections " + + connections.size()); + + while (waitForWork()) {//wait here for work - read or close connection + receiveResponse(); + } + + close(); + + if (LOG.isDebugEnabled()) + LOG.debug(getName() + ": stopped, remaining connections " + + connections.size()); + } + + /** Initiates a call by sending the parameter to the remote server. + * Note: this is not called from the Connection thread, but by other + * threads. + */ + public void sendParam(Call call) { + if (shouldCloseConnection.get()) { + return; + } + + DataOutputBuffer d=null; + try { + synchronized (this.out) { + if (LOG.isDebugEnabled()) + LOG.debug(getName() + " sending #" + call.id); + + //for serializing the + //data to be written + d = new DataOutputBuffer(); + d.writeInt(call.id); + call.param.write(d); + byte[] data = d.getData(); + int dataLength = d.getLength(); + out.writeInt(dataLength); //first put the data length + out.write(data, 0, dataLength);//write the data + out.flush(); + } + } catch(IOException e) { + markClosed(e); + } finally { + //the buffer is just an in-memory buffer, but it is still polite to + // close early + IOUtils.closeStream(d); + } + } + + /* Receive a response. + * Because only one receiver, so no synchronization on in. + */ + private void receiveResponse() { + if (shouldCloseConnection.get()) { + return; + } + touch(); + + try { + int id = in.readInt(); // try to read an id + + if (LOG.isDebugEnabled()) + LOG.debug(getName() + " got value #" + id); + + Call call = calls.remove(id); + + int state = in.readInt(); // read call status + if (state == Status.SUCCESS.state) { + Writable value = ReflectionUtils.newInstance(valueClass, conf); + value.readFields(in); // read value + call.setValue(value); + } else if (state == Status.ERROR.state) { + call.setException(new RemoteException(WritableUtils.readString(in), + WritableUtils.readString(in))); + } else if (state == Status.FATAL.state) { + // Close the connection + markClosed(new RemoteException(WritableUtils.readString(in), + WritableUtils.readString(in))); + } + } catch (IOException e) { + markClosed(e); + } + } + + private synchronized void markClosed(IOException e) { + if (shouldCloseConnection.compareAndSet(false, true)) { + closeException = e; + notifyAll(); + } + } + + /** Close the connection. */ + private synchronized void close() { + if (!shouldCloseConnection.get()) { + LOG.error("The connection is not in the closed state"); + return; + } + + // release the resources + // first thing to do;take the connection out of the connection list + synchronized (connections) { + if (connections.get(remoteId) == this) { + connections.remove(remoteId); + } + } + + // close the streams and therefore the socket + IOUtils.closeStream(out); + IOUtils.closeStream(in); + + // clean up all calls + if (closeException == null) { + if (!calls.isEmpty()) { + LOG.warn( + "A connection is closed for no cause and calls are not empty"); + + // clean up calls anyway + closeException = new IOException("Unexpected closed connection"); + cleanupCalls(); + } + } else { + // log the info + if (LOG.isDebugEnabled()) { + LOG.debug("closing ipc connection to " + server + ": " + + closeException.getMessage(),closeException); + } + + // cleanup calls + cleanupCalls(); + } + if (LOG.isDebugEnabled()) + LOG.debug(getName() + ": closed"); + } + + /* Cleanup all calls and mark them as done */ + private void cleanupCalls() { + Iterator> itor = calls.entrySet().iterator() ; + while (itor.hasNext()) { + Call c = itor.next().getValue(); + c.setException(closeException); // local exception + itor.remove(); + } + } + } + + /** Call implementation used for parallel calls. */ + private class ParallelCall extends Call { + private ParallelResults results; + private int index; + + public ParallelCall(Writable param, ParallelResults results, int index) { + super(param); + this.results = results; + this.index = index; + } + + /** Deliver result to result collector. */ + protected void callComplete() { + results.callComplete(this); + } + } + + /** Result collector for parallel calls. */ + private static class ParallelResults { + private Writable[] values; + private int size; + private int count; + + public ParallelResults(int size) { + this.values = new Writable[size]; + this.size = size; + } + + /** Collect a result. */ + public synchronized void callComplete(ParallelCall call) { + values[call.index] = call.getValue(); // store the value + count++; // count it + if (count == size) // if all values are in + notify(); // then notify waiting caller + } + } + + /** Construct an IPC client whose values are of the given {@link Writable} + * class. */ + public Client(Class valueClass, Configuration conf, + SocketFactory factory) { + this.valueClass = valueClass; + this.maxIdleTime = + conf.getInt("ipc.client.connection.maxidletime", 10000); //10s + this.maxRetries = conf.getInt("ipc.client.connect.max.retries", 10); + this.tcpNoDelay = conf.getBoolean("ipc.client.tcpnodelay", false); + this.pingInterval = getPingInterval(conf); + if (LOG.isDebugEnabled()) { + LOG.debug("The ping interval is" + this.pingInterval + "ms."); + } + this.conf = conf; + this.socketFactory = factory; + } + + /** + * Construct an IPC client with the default SocketFactory + * @param valueClass + * @param conf + */ + public Client(Class valueClass, Configuration conf) { + this(valueClass, conf, NetUtils.getDefaultSocketFactory(conf)); + } + + /** Return the socket factory of this client + * + * @return this client's socket factory + */ + SocketFactory getSocketFactory() { + return socketFactory; + } + + /** Stop all threads related to this client. No further calls may be made + * using this client. */ + public void stop() { + if (LOG.isDebugEnabled()) { + LOG.debug("Stopping client"); + } + + if (!running.compareAndSet(true, false)) { + return; + } + + // wake up all connections + synchronized (connections) { + for (Connection conn : connections.values()) { + conn.interrupt(); + } + } + + // wait until all connections are closed + while (!connections.isEmpty()) { + try { + Thread.sleep(100); + } catch (InterruptedException e) { + } + } + } + + /** Make a call, passing param, to the IPC server running at + * address, returning the value. Throws exceptions if there are + * network problems or if the remote code threw an exception. + * @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead + */ + @Deprecated + public Writable call(Writable param, InetSocketAddress address) + throws InterruptedException, IOException { + return call(param, address, null); + } + + /** Make a call, passing param, to the IPC server running at + * address with the ticket credentials, returning + * the value. + * Throws exceptions if there are network problems or if the remote code + * threw an exception. + * @deprecated Use {@link #call(Writable, InetSocketAddress, Class, UserGroupInformation)} instead + */ + @Deprecated + public Writable call(Writable param, InetSocketAddress addr, + UserGroupInformation ticket) + throws InterruptedException, IOException { + return call(param, addr, null, ticket); + } + + /** Make a call, passing param, to the IPC server running at + * address which is servicing the protocol protocol, + * with the ticket credentials, returning the value. + * Throws exceptions if there are network problems or if the remote code + * threw an exception. */ + public Writable call(Writable param, InetSocketAddress addr, + Class protocol, UserGroupInformation ticket) + throws InterruptedException, IOException { + Call call = new Call(param); + Connection connection = getConnection(addr, protocol, ticket, call); + connection.sendParam(call); // send the parameter + boolean interrupted = false; + synchronized (call) { + while (!call.done) { + try { + call.wait(); // wait for the result + } catch (InterruptedException ie) { + // save the fact that we were interrupted + interrupted = true; + } + } + + if (interrupted) { + // set the interrupt flag now that we are done waiting + Thread.currentThread().interrupt(); + } + + if (call.error != null) { + if (call.error instanceof RemoteException) { + call.error.fillInStackTrace(); + throw call.error; + } else { // local exception + throw wrapException(addr, call.error); + } + } else { + return call.value; + } + } + } + + /** + * Take an IOException and the address we were trying to connect to + * and return an IOException with the input exception as the cause. + * The new exception provides the stack trace of the place where + * the exception is thrown and some extra diagnostics information. + * If the exception is ConnectException or SocketTimeoutException, + * return a new one of the same type; Otherwise return an IOException. + * + * @param addr target address + * @param exception the relevant exception + * @return an exception to throw + */ + private IOException wrapException(InetSocketAddress addr, + IOException exception) { + if (exception instanceof ConnectException) { + //connection refused; include the host:port in the error + return (ConnectException)new ConnectException( + "Call to " + addr + " failed on connection exception: " + exception) + .initCause(exception); + } else if (exception instanceof SocketTimeoutException) { + return (SocketTimeoutException)new SocketTimeoutException( + "Call to " + addr + " failed on socket timeout exception: " + + exception).initCause(exception); + } else { + return (IOException)new IOException( + "Call to " + addr + " failed on local exception: " + exception) + .initCause(exception); + + } + } + + /** + * Makes a set of calls in parallel. Each parameter is sent to the + * corresponding address. When all values are available, or have timed out + * or errored, the collected results are returned in an array. The array + * contains nulls for calls that timed out or errored. + * @deprecated Use {@link #call(Writable[], InetSocketAddress[], Class, UserGroupInformation)} instead + */ + @Deprecated + public Writable[] call(Writable[] params, InetSocketAddress[] addresses) + throws IOException { + return call(params, addresses, null, null); + } + + /** Makes a set of calls in parallel. Each parameter is sent to the + * corresponding address. When all values are available, or have timed out + * or errored, the collected results are returned in an array. The array + * contains nulls for calls that timed out or errored. */ + public Writable[] call(Writable[] params, InetSocketAddress[] addresses, + Class protocol, UserGroupInformation ticket) + throws IOException { + if (addresses.length == 0) return new Writable[0]; + + ParallelResults results = new ParallelResults(params.length); + synchronized (results) { + for (int i = 0; i < params.length; i++) { + ParallelCall call = new ParallelCall(params[i], results, i); + try { + Connection connection = + getConnection(addresses[i], protocol, ticket, call); + connection.sendParam(call); // send each parameter + } catch (IOException e) { + // log errors + LOG.info("Calling "+addresses[i]+" caught: " + + e.getMessage(),e); + results.size--; // wait for one fewer result + } + } + while (results.count != results.size) { + try { + results.wait(); // wait for all results + } catch (InterruptedException e) {} + } + + return results.values; + } + } + + /** Get a connection from the pool, or create a new one and add it to the + * pool. Connections to a given host/port are reused. */ + private Connection getConnection(InetSocketAddress addr, + Class protocol, + UserGroupInformation ticket, + Call call) + throws IOException { + if (!running.get()) { + // the client is stopped + throw new IOException("The client is stopped"); + } + Connection connection; + /* we could avoid this allocation for each RPC by having a + * connectionsId object and with set() method. We need to manage the + * refs for keys in HashMap properly. For now its ok. + */ + ConnectionId remoteId = new ConnectionId(addr, protocol, ticket); + do { + synchronized (connections) { + connection = connections.get(remoteId); + if (connection == null) { + connection = new Connection(remoteId); + connections.put(remoteId, connection); + } + } + } while (!connection.addCall(call)); + + //we don't invoke the method below inside "synchronized (connections)" + //block above. The reason for that is if the server happens to be slow, + //it will take longer to establish a connection and that will slow the + //entire system down. + connection.setupIOstreams(); + return connection; + } + + /** + * This class holds the address and the user ticket. The client connections + * to servers are uniquely identified by + */ + private static class ConnectionId { + InetSocketAddress address; + UserGroupInformation ticket; + Class protocol; + private static final int PRIME = 16777619; + + ConnectionId(InetSocketAddress address, Class protocol, + UserGroupInformation ticket) { + this.protocol = protocol; + this.address = address; + this.ticket = ticket; + } + + InetSocketAddress getAddress() { + return address; + } + + Class getProtocol() { + return protocol; + } + + UserGroupInformation getTicket() { + return ticket; + } + + + @Override + public boolean equals(Object obj) { + if (obj instanceof ConnectionId) { + ConnectionId id = (ConnectionId) obj; + return address.equals(id.address) && protocol == id.protocol && + ticket == id.ticket; + //Note : ticket is a ref comparision. + } + return false; + } + + @Override + public int hashCode() { + return (address.hashCode() + PRIME * System.identityHashCode(protocol)) ^ + System.identityHashCode(ticket); + } + } +} diff --git a/src/java/org/apache/hadoop/ipc/ConnectionHeader.java b/src/java/org/apache/hadoop/ipc/ConnectionHeader.java new file mode 100644 index 00000000000..44b113b7edc --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/ConnectionHeader.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ipc; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.security.UnixUserGroupInformation; +import org.apache.hadoop.security.UserGroupInformation; + +/** + * The IPC connection header sent by the client to the server + * on connection establishment. + */ +class ConnectionHeader implements Writable { + public static final Log LOG = LogFactory.getLog(ConnectionHeader.class); + + private String protocol; + private UserGroupInformation ugi = new UnixUserGroupInformation(); + + public ConnectionHeader() {} + + /** + * Create a new {@link ConnectionHeader} with the given protocol + * and {@link UserGroupInformation}. + * @param protocol protocol used for communication between the IPC client + * and the server + * @param ugi {@link UserGroupInformation} of the client communicating with + * the server + */ + public ConnectionHeader(String protocol, UserGroupInformation ugi) { + this.protocol = protocol; + this.ugi = ugi; + } + + @Override + public void readFields(DataInput in) throws IOException { + protocol = Text.readString(in); + if (protocol.isEmpty()) { + protocol = null; + } + + boolean ugiPresent = in.readBoolean(); + if (ugiPresent) { + ugi.readFields(in); + } else { + ugi = null; + } + } + + @Override + public void write(DataOutput out) throws IOException { + Text.writeString(out, (protocol == null) ? "" : protocol); + if (ugi != null) { + out.writeBoolean(true); + ugi.write(out); + } else { + out.writeBoolean(false); + } + } + + public String getProtocol() { + return protocol; + } + + public UserGroupInformation getUgi() { + return ugi; + } + + public String toString() { + return protocol + "-" + ugi; + } +} diff --git a/src/java/org/apache/hadoop/ipc/RPC.java b/src/java/org/apache/hadoop/ipc/RPC.java new file mode 100644 index 00000000000..94b0ec82e2e --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/RPC.java @@ -0,0 +1,575 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +import java.lang.reflect.Proxy; +import java.lang.reflect.Method; +import java.lang.reflect.Array; +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.InvocationTargetException; + +import java.net.ConnectException; +import java.net.InetSocketAddress; +import java.net.SocketTimeoutException; +import java.io.*; +import java.util.Map; +import java.util.HashMap; + +import javax.net.SocketFactory; +import javax.security.auth.Subject; +import javax.security.auth.login.LoginException; + +import org.apache.commons.logging.*; + +import org.apache.hadoop.io.*; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authorize.AuthorizationException; +import org.apache.hadoop.security.authorize.ServiceAuthorizationManager; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.metrics.util.MetricsTimeVaryingRate; + +/** A simple RPC mechanism. + * + * A protocol is a Java interface. All parameters and return types must + * be one of: + * + *
  • a primitive type, boolean, byte, + * char, short, int, long, + * float, double, or void; or
  • + * + *
  • a {@link String}; or
  • + * + *
  • a {@link Writable}; or
  • + * + *
  • an array of the above types
+ * + * All methods in the protocol should throw only IOException. No field data of + * the protocol instance is transmitted. + */ +public class RPC { + private static final Log LOG = + LogFactory.getLog(RPC.class); + + private RPC() {} // no public ctor + + + /** A method invocation, including the method name and its parameters.*/ + private static class Invocation implements Writable, Configurable { + private String methodName; + private Class[] parameterClasses; + private Object[] parameters; + private Configuration conf; + + public Invocation() {} + + public Invocation(Method method, Object[] parameters) { + this.methodName = method.getName(); + this.parameterClasses = method.getParameterTypes(); + this.parameters = parameters; + } + + /** The name of the method invoked. */ + public String getMethodName() { return methodName; } + + /** The parameter classes. */ + public Class[] getParameterClasses() { return parameterClasses; } + + /** The parameter instances. */ + public Object[] getParameters() { return parameters; } + + public void readFields(DataInput in) throws IOException { + methodName = UTF8.readString(in); + parameters = new Object[in.readInt()]; + parameterClasses = new Class[parameters.length]; + ObjectWritable objectWritable = new ObjectWritable(); + for (int i = 0; i < parameters.length; i++) { + parameters[i] = ObjectWritable.readObject(in, objectWritable, this.conf); + parameterClasses[i] = objectWritable.getDeclaredClass(); + } + } + + public void write(DataOutput out) throws IOException { + UTF8.writeString(out, methodName); + out.writeInt(parameterClasses.length); + for (int i = 0; i < parameterClasses.length; i++) { + ObjectWritable.writeObject(out, parameters[i], parameterClasses[i], + conf); + } + } + + public String toString() { + StringBuffer buffer = new StringBuffer(); + buffer.append(methodName); + buffer.append("("); + for (int i = 0; i < parameters.length; i++) { + if (i != 0) + buffer.append(", "); + buffer.append(parameters[i]); + } + buffer.append(")"); + return buffer.toString(); + } + + public void setConf(Configuration conf) { + this.conf = conf; + } + + public Configuration getConf() { + return this.conf; + } + + } + + /* Cache a client using its socket factory as the hash key */ + static private class ClientCache { + private Map clients = + new HashMap(); + + /** + * Construct & cache an IPC client with the user-provided SocketFactory + * if no cached client exists. + * + * @param conf Configuration + * @return an IPC client + */ + private synchronized Client getClient(Configuration conf, + SocketFactory factory) { + // Construct & cache client. The configuration is only used for timeout, + // and Clients have connection pools. So we can either (a) lose some + // connection pooling and leak sockets, or (b) use the same timeout for all + // configurations. Since the IPC is usually intended globally, not + // per-job, we choose (a). + Client client = clients.get(factory); + if (client == null) { + client = new Client(ObjectWritable.class, conf, factory); + clients.put(factory, client); + } else { + client.incCount(); + } + return client; + } + + /** + * Construct & cache an IPC client with the default SocketFactory + * if no cached client exists. + * + * @param conf Configuration + * @return an IPC client + */ + private synchronized Client getClient(Configuration conf) { + return getClient(conf, SocketFactory.getDefault()); + } + + /** + * Stop a RPC client connection + * A RPC client is closed only when its reference count becomes zero. + */ + private void stopClient(Client client) { + synchronized (this) { + client.decCount(); + if (client.isZeroReference()) { + clients.remove(client.getSocketFactory()); + } + } + if (client.isZeroReference()) { + client.stop(); + } + } + } + + private static ClientCache CLIENTS=new ClientCache(); + + private static class Invoker implements InvocationHandler { + private InetSocketAddress address; + private UserGroupInformation ticket; + private Client client; + private boolean isClosed = false; + + public Invoker(InetSocketAddress address, UserGroupInformation ticket, + Configuration conf, SocketFactory factory) { + this.address = address; + this.ticket = ticket; + this.client = CLIENTS.getClient(conf, factory); + } + + public Object invoke(Object proxy, Method method, Object[] args) + throws Throwable { + final boolean logDebug = LOG.isDebugEnabled(); + long startTime = 0; + if (logDebug) { + startTime = System.currentTimeMillis(); + } + + ObjectWritable value = (ObjectWritable) + client.call(new Invocation(method, args), address, + method.getDeclaringClass(), ticket); + if (logDebug) { + long callTime = System.currentTimeMillis() - startTime; + LOG.debug("Call: " + method.getName() + " " + callTime); + } + return value.get(); + } + + /* close the IPC client that's responsible for this invoker's RPCs */ + synchronized private void close() { + if (!isClosed) { + isClosed = true; + CLIENTS.stopClient(client); + } + } + } + + /** + * A version mismatch for the RPC protocol. + */ + public static class VersionMismatch extends IOException { + private String interfaceName; + private long clientVersion; + private long serverVersion; + + /** + * Create a version mismatch exception + * @param interfaceName the name of the protocol mismatch + * @param clientVersion the client's version of the protocol + * @param serverVersion the server's version of the protocol + */ + public VersionMismatch(String interfaceName, long clientVersion, + long serverVersion) { + super("Protocol " + interfaceName + " version mismatch. (client = " + + clientVersion + ", server = " + serverVersion + ")"); + this.interfaceName = interfaceName; + this.clientVersion = clientVersion; + this.serverVersion = serverVersion; + } + + /** + * Get the interface name + * @return the java class name + * (eg. org.apache.hadoop.mapred.InterTrackerProtocol) + */ + public String getInterfaceName() { + return interfaceName; + } + + /** + * Get the client's preferred version + */ + public long getClientVersion() { + return clientVersion; + } + + /** + * Get the server's agreed to version. + */ + public long getServerVersion() { + return serverVersion; + } + } + + public static VersionedProtocol waitForProxy(Class protocol, + long clientVersion, + InetSocketAddress addr, + Configuration conf + ) throws IOException { + return waitForProxy(protocol, clientVersion, addr, conf, Long.MAX_VALUE); + } + + /** + * Get a proxy connection to a remote server + * @param protocol protocol class + * @param clientVersion client version + * @param addr remote address + * @param conf configuration to use + * @param timeout time in milliseconds before giving up + * @return the proxy + * @throws IOException if the far end through a RemoteException + */ + static VersionedProtocol waitForProxy(Class protocol, + long clientVersion, + InetSocketAddress addr, + Configuration conf, + long timeout + ) throws IOException { + long startTime = System.currentTimeMillis(); + IOException ioe; + while (true) { + try { + return getProxy(protocol, clientVersion, addr, conf); + } catch(ConnectException se) { // namenode has not been started + LOG.info("Server at " + addr + " not available yet, Zzzzz..."); + ioe = se; + } catch(SocketTimeoutException te) { // namenode is busy + LOG.info("Problem connecting to server: " + addr); + ioe = te; + } + // check if timed out + if (System.currentTimeMillis()-timeout >= startTime) { + throw ioe; + } + + // wait for retry + try { + Thread.sleep(1000); + } catch (InterruptedException ie) { + // IGNORE + } + } + } + /** Construct a client-side proxy object that implements the named protocol, + * talking to a server at the named address. */ + public static VersionedProtocol getProxy(Class protocol, + long clientVersion, InetSocketAddress addr, Configuration conf, + SocketFactory factory) throws IOException { + UserGroupInformation ugi = null; + try { + ugi = UserGroupInformation.login(conf); + } catch (LoginException le) { + throw new RuntimeException("Couldn't login!"); + } + return getProxy(protocol, clientVersion, addr, ugi, conf, factory); + } + + /** Construct a client-side proxy object that implements the named protocol, + * talking to a server at the named address. */ + public static VersionedProtocol getProxy(Class protocol, + long clientVersion, InetSocketAddress addr, UserGroupInformation ticket, + Configuration conf, SocketFactory factory) throws IOException { + + VersionedProtocol proxy = + (VersionedProtocol) Proxy.newProxyInstance( + protocol.getClassLoader(), new Class[] { protocol }, + new Invoker(addr, ticket, conf, factory)); + long serverVersion = proxy.getProtocolVersion(protocol.getName(), + clientVersion); + if (serverVersion == clientVersion) { + return proxy; + } else { + throw new VersionMismatch(protocol.getName(), clientVersion, + serverVersion); + } + } + + /** + * Construct a client-side proxy object with the default SocketFactory + * + * @param protocol + * @param clientVersion + * @param addr + * @param conf + * @return a proxy instance + * @throws IOException + */ + public static VersionedProtocol getProxy(Class protocol, + long clientVersion, InetSocketAddress addr, Configuration conf) + throws IOException { + + return getProxy(protocol, clientVersion, addr, conf, NetUtils + .getDefaultSocketFactory(conf)); + } + + /** + * Stop this proxy and release its invoker's resource + * @param proxy the proxy to be stopped + */ + public static void stopProxy(VersionedProtocol proxy) { + if (proxy!=null) { + ((Invoker)Proxy.getInvocationHandler(proxy)).close(); + } + } + + /** + * Expert: Make multiple, parallel calls to a set of servers. + * @deprecated Use {@link #call(Method, Object[][], InetSocketAddress[], UserGroupInformation, Configuration)} instead + */ + public static Object[] call(Method method, Object[][] params, + InetSocketAddress[] addrs, Configuration conf) + throws IOException { + return call(method, params, addrs, null, conf); + } + + /** Expert: Make multiple, parallel calls to a set of servers. */ + public static Object[] call(Method method, Object[][] params, + InetSocketAddress[] addrs, + UserGroupInformation ticket, Configuration conf) + throws IOException { + + Invocation[] invocations = new Invocation[params.length]; + for (int i = 0; i < params.length; i++) + invocations[i] = new Invocation(method, params[i]); + Client client = CLIENTS.getClient(conf); + try { + Writable[] wrappedValues = + client.call(invocations, addrs, method.getDeclaringClass(), ticket); + + if (method.getReturnType() == Void.TYPE) { + return null; + } + + Object[] values = + (Object[])Array.newInstance(method.getReturnType(), wrappedValues.length); + for (int i = 0; i < values.length; i++) + if (wrappedValues[i] != null) + values[i] = ((ObjectWritable)wrappedValues[i]).get(); + + return values; + } finally { + CLIENTS.stopClient(client); + } + } + + /** Construct a server for a protocol implementation instance listening on a + * port and address. */ + public static Server getServer(final Object instance, final String bindAddress, final int port, Configuration conf) + throws IOException { + return getServer(instance, bindAddress, port, 1, false, conf); + } + + /** Construct a server for a protocol implementation instance listening on a + * port and address. */ + public static Server getServer(final Object instance, final String bindAddress, final int port, + final int numHandlers, + final boolean verbose, Configuration conf) + throws IOException { + return new Server(instance, conf, bindAddress, port, numHandlers, verbose); + } + + /** An RPC Server. */ + public static class Server extends org.apache.hadoop.ipc.Server { + private Object instance; + private boolean verbose; + private boolean authorize = false; + + /** Construct an RPC server. + * @param instance the instance whose methods will be called + * @param conf the configuration to use + * @param bindAddress the address to bind on to listen for connection + * @param port the port to listen for connections on + */ + public Server(Object instance, Configuration conf, String bindAddress, int port) + throws IOException { + this(instance, conf, bindAddress, port, 1, false); + } + + private static String classNameBase(String className) { + String[] names = className.split("\\.", -1); + if (names == null || names.length == 0) { + return className; + } + return names[names.length-1]; + } + + /** Construct an RPC server. + * @param instance the instance whose methods will be called + * @param conf the configuration to use + * @param bindAddress the address to bind on to listen for connection + * @param port the port to listen for connections on + * @param numHandlers the number of method handler threads to run + * @param verbose whether each call should be logged + */ + public Server(Object instance, Configuration conf, String bindAddress, int port, + int numHandlers, boolean verbose) throws IOException { + super(bindAddress, port, Invocation.class, numHandlers, conf, classNameBase(instance.getClass().getName())); + this.instance = instance; + this.verbose = verbose; + this.authorize = + conf.getBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, + false); + } + + public Writable call(Class protocol, Writable param, long receivedTime) + throws IOException { + try { + Invocation call = (Invocation)param; + if (verbose) log("Call: " + call); + + Method method = + protocol.getMethod(call.getMethodName(), + call.getParameterClasses()); + method.setAccessible(true); + + long startTime = System.currentTimeMillis(); + Object value = method.invoke(instance, call.getParameters()); + int processingTime = (int) (System.currentTimeMillis() - startTime); + int qTime = (int) (startTime-receivedTime); + if (LOG.isDebugEnabled()) { + LOG.debug("Served: " + call.getMethodName() + + " queueTime= " + qTime + + " procesingTime= " + processingTime); + } + rpcMetrics.rpcQueueTime.inc(qTime); + rpcMetrics.rpcProcessingTime.inc(processingTime); + + MetricsTimeVaryingRate m = + (MetricsTimeVaryingRate) rpcMetrics.registry.get(call.getMethodName()); + if (m == null) { + try { + m = new MetricsTimeVaryingRate(call.getMethodName(), + rpcMetrics.registry); + } catch (IllegalArgumentException iae) { + // the metrics has been registered; re-fetch the handle + LOG.info("Error register " + call.getMethodName(), iae); + m = (MetricsTimeVaryingRate) rpcMetrics.registry.get( + call.getMethodName()); + } + } + m.inc(processingTime); + + if (verbose) log("Return: "+value); + + return new ObjectWritable(method.getReturnType(), value); + + } catch (InvocationTargetException e) { + Throwable target = e.getTargetException(); + if (target instanceof IOException) { + throw (IOException)target; + } else { + IOException ioe = new IOException(target.toString()); + ioe.setStackTrace(target.getStackTrace()); + throw ioe; + } + } catch (Throwable e) { + IOException ioe = new IOException(e.toString()); + ioe.setStackTrace(e.getStackTrace()); + throw ioe; + } + } + + @Override + public void authorize(Subject user, ConnectionHeader connection) + throws AuthorizationException { + if (authorize) { + Class protocol = null; + try { + protocol = getProtocolClass(connection.getProtocol(), getConf()); + } catch (ClassNotFoundException cfne) { + throw new AuthorizationException("Unknown protocol: " + + connection.getProtocol()); + } + ServiceAuthorizationManager.authorize(user, protocol); + } + } + } + + private static void log(String value) { + if (value!= null && value.length() > 55) + value = value.substring(0, 55)+"..."; + LOG.info(value); + } +} diff --git a/src/java/org/apache/hadoop/ipc/RemoteException.java b/src/java/org/apache/hadoop/ipc/RemoteException.java new file mode 100644 index 00000000000..214b2f66b61 --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/RemoteException.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +import java.io.IOException; +import java.lang.reflect.Constructor; + +import org.xml.sax.Attributes; +import org.znerd.xmlenc.XMLOutputter; + +public class RemoteException extends IOException { + /** For java.io.Serializable */ + private static final long serialVersionUID = 1L; + + private String className; + + public RemoteException(String className, String msg) { + super(msg); + this.className = className; + } + + public String getClassName() { + return className; + } + + /** + * If this remote exception wraps up one of the lookupTypes + * then return this exception. + *

+ * Unwraps any IOException. + * + * @param lookupTypes the desired exception class. + * @return IOException, which is either the lookupClass exception or this. + */ + public IOException unwrapRemoteException(Class... lookupTypes) { + if(lookupTypes == null) + return this; + for(Class lookupClass : lookupTypes) { + if(!lookupClass.getName().equals(getClassName())) + continue; + try { + return instantiateException(lookupClass.asSubclass(IOException.class)); + } catch(Exception e) { + // cannot instantiate lookupClass, just return this + return this; + } + } + // wrapped up exception is not in lookupTypes, just return this + return this; + } + + /** + * Instantiate and return the exception wrapped up by this remote exception. + * + *

This unwraps any Throwable that has a constructor taking + * a String as a parameter. + * Otherwise it returns this. + * + * @return Throwable + */ + public IOException unwrapRemoteException() { + try { + Class realClass = Class.forName(getClassName()); + return instantiateException(realClass.asSubclass(IOException.class)); + } catch(Exception e) { + // cannot instantiate the original exception, just return this + } + return this; + } + + private IOException instantiateException(Class cls) + throws Exception { + Constructor cn = cls.getConstructor(String.class); + cn.setAccessible(true); + String firstLine = this.getMessage(); + int eol = firstLine.indexOf('\n'); + if (eol>=0) { + firstLine = firstLine.substring(0, eol); + } + IOException ex = cn.newInstance(firstLine); + ex.initCause(this); + return ex; + } + + /** Write the object to XML format */ + public void writeXml(String path, XMLOutputter doc) throws IOException { + doc.startTag(RemoteException.class.getSimpleName()); + doc.attribute("path", path); + doc.attribute("class", getClassName()); + String msg = getLocalizedMessage(); + int i = msg.indexOf("\n"); + if (i >= 0) { + msg = msg.substring(0, i); + } + doc.attribute("message", msg.substring(msg.indexOf(":") + 1).trim()); + doc.endTag(); + } + + /** Create RemoteException from attributes */ + public static RemoteException valueOf(Attributes attrs) { + return new RemoteException(attrs.getValue("class"), + attrs.getValue("message")); + } +} diff --git a/src/java/org/apache/hadoop/ipc/Server.java b/src/java/org/apache/hadoop/ipc/Server.java new file mode 100644 index 00000000000..890569897b4 --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/Server.java @@ -0,0 +1,1255 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +import java.io.IOException; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; + +import java.nio.ByteBuffer; +import java.nio.channels.CancelledKeyException; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.SelectionKey; +import java.nio.channels.Selector; +import java.nio.channels.ServerSocketChannel; +import java.nio.channels.SocketChannel; +import java.nio.channels.WritableByteChannel; + +import java.net.BindException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.net.UnknownHostException; + +import java.security.PrivilegedActionException; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.LinkedBlockingQueue; + +import javax.security.auth.Subject; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.ipc.metrics.RpcMetrics; +import org.apache.hadoop.security.authorize.AuthorizationException; + +/** An abstract IPC service. IPC calls take a single {@link Writable} as a + * parameter, and return a {@link Writable} as their value. A service runs on + * a port and is defined by a parameter class and a value class. + * + * @see Client + */ +public abstract class Server { + + /** + * The first four bytes of Hadoop RPC connections + */ + public static final ByteBuffer HEADER = ByteBuffer.wrap("hrpc".getBytes()); + + // 1 : Introduce ping and server does not throw away RPCs + // 3 : Introduce the protocol into the RPC connection header + public static final byte CURRENT_VERSION = 3; + + /** + * How many calls/handler are allowed in the queue. + */ + private static final int MAX_QUEUE_SIZE_PER_HANDLER = 100; + + public static final Log LOG = LogFactory.getLog(Server.class); + + private static final ThreadLocal SERVER = new ThreadLocal(); + + private static final Map> PROTOCOL_CACHE = + new ConcurrentHashMap>(); + + static Class getProtocolClass(String protocolName, Configuration conf) + throws ClassNotFoundException { + Class protocol = PROTOCOL_CACHE.get(protocolName); + if (protocol == null) { + protocol = conf.getClassByName(protocolName); + PROTOCOL_CACHE.put(protocolName, protocol); + } + return protocol; + } + + /** Returns the server instance called under or null. May be called under + * {@link #call(Writable, long)} implementations, and under {@link Writable} + * methods of paramters and return values. Permits applications to access + * the server context.*/ + public static Server get() { + return SERVER.get(); + } + + /** This is set to Call object before Handler invokes an RPC and reset + * after the call returns. + */ + private static final ThreadLocal CurCall = new ThreadLocal(); + + /** Returns the remote side ip address when invoked inside an RPC + * Returns null incase of an error. + */ + public static InetAddress getRemoteIp() { + Call call = CurCall.get(); + if (call != null) { + return call.connection.socket.getInetAddress(); + } + return null; + } + /** Returns remote address as a string when invoked inside an RPC. + * Returns null in case of an error. + */ + public static String getRemoteAddress() { + InetAddress addr = getRemoteIp(); + return (addr == null) ? null : addr.getHostAddress(); + } + + private String bindAddress; + private int port; // port we listen on + private int handlerCount; // number of handler threads + private Class paramClass; // class of call parameters + private int maxIdleTime; // the maximum idle time after + // which a client may be disconnected + private int thresholdIdleConnections; // the number of idle connections + // after which we will start + // cleaning up idle + // connections + int maxConnectionsToNuke; // the max number of + // connections to nuke + //during a cleanup + + protected RpcMetrics rpcMetrics; + + private Configuration conf; + + private int maxQueueSize; + private int socketSendBufferSize; + private final boolean tcpNoDelay; // if T then disable Nagle's Algorithm + + volatile private boolean running = true; // true while server runs + private BlockingQueue callQueue; // queued calls + + private List connectionList = + Collections.synchronizedList(new LinkedList()); + //maintain a list + //of client connections + private Listener listener = null; + private Responder responder = null; + private int numConnections = 0; + private Handler[] handlers = null; + + /** + * A convenience method to bind to a given address and report + * better exceptions if the address is not a valid host. + * @param socket the socket to bind + * @param address the address to bind to + * @param backlog the number of connections allowed in the queue + * @throws BindException if the address can't be bound + * @throws UnknownHostException if the address isn't a valid host name + * @throws IOException other random errors from bind + */ + public static void bind(ServerSocket socket, InetSocketAddress address, + int backlog) throws IOException { + try { + socket.bind(address, backlog); + } catch (BindException e) { + BindException bindException = new BindException("Problem binding to " + address + + " : " + e.getMessage()); + bindException.initCause(e); + throw bindException; + } catch (SocketException e) { + // If they try to bind to a different host's address, give a better + // error message. + if ("Unresolved address".equals(e.getMessage())) { + throw new UnknownHostException("Invalid hostname for server: " + + address.getHostName()); + } else { + throw e; + } + } + } + + /** A call queued for handling. */ + private static class Call { + private int id; // the client's call id + private Writable param; // the parameter passed + private Connection connection; // connection to client + private long timestamp; // the time received when response is null + // the time served when response is not null + private ByteBuffer response; // the response for this call + + public Call(int id, Writable param, Connection connection) { + this.id = id; + this.param = param; + this.connection = connection; + this.timestamp = System.currentTimeMillis(); + this.response = null; + } + + @Override + public String toString() { + return param.toString() + " from " + connection.toString(); + } + + public void setResponse(ByteBuffer response) { + this.response = response; + } + } + + /** Listens on the socket. Creates jobs for the handler threads*/ + private class Listener extends Thread { + + private ServerSocketChannel acceptChannel = null; //the accept channel + private Selector selector = null; //the selector that we use for the server + private InetSocketAddress address; //the address we bind at + private Random rand = new Random(); + private long lastCleanupRunTime = 0; //the last time when a cleanup connec- + //-tion (for idle connections) ran + private long cleanupInterval = 10000; //the minimum interval between + //two cleanup runs + private int backlogLength = conf.getInt("ipc.server.listen.queue.size", 128); + + public Listener() throws IOException { + address = new InetSocketAddress(bindAddress, port); + // Create a new server socket and set to non blocking mode + acceptChannel = ServerSocketChannel.open(); + acceptChannel.configureBlocking(false); + + // Bind the server socket to the local host and port + bind(acceptChannel.socket(), address, backlogLength); + port = acceptChannel.socket().getLocalPort(); //Could be an ephemeral port + // create a selector; + selector= Selector.open(); + + // Register accepts on the server socket with the selector. + acceptChannel.register(selector, SelectionKey.OP_ACCEPT); + this.setName("IPC Server listener on " + port); + this.setDaemon(true); + } + /** cleanup connections from connectionList. Choose a random range + * to scan and also have a limit on the number of the connections + * that will be cleanedup per run. The criteria for cleanup is the time + * for which the connection was idle. If 'force' is true then all + * connections will be looked at for the cleanup. + */ + private void cleanupConnections(boolean force) { + if (force || numConnections > thresholdIdleConnections) { + long currentTime = System.currentTimeMillis(); + if (!force && (currentTime - lastCleanupRunTime) < cleanupInterval) { + return; + } + int start = 0; + int end = numConnections - 1; + if (!force) { + start = rand.nextInt() % numConnections; + end = rand.nextInt() % numConnections; + int temp; + if (end < start) { + temp = start; + start = end; + end = temp; + } + } + int i = start; + int numNuked = 0; + while (i <= end) { + Connection c; + synchronized (connectionList) { + try { + c = connectionList.get(i); + } catch (Exception e) {return;} + } + if (c.timedOut(currentTime)) { + if (LOG.isDebugEnabled()) + LOG.debug(getName() + ": disconnecting client " + c.getHostAddress()); + closeConnection(c); + numNuked++; + end--; + c = null; + if (!force && numNuked == maxConnectionsToNuke) break; + } + else i++; + } + lastCleanupRunTime = System.currentTimeMillis(); + } + } + + @Override + public void run() { + LOG.info(getName() + ": starting"); + SERVER.set(Server.this); + while (running) { + SelectionKey key = null; + try { + getSelector().select(); + Iterator iter = getSelector().selectedKeys().iterator(); + while (iter.hasNext()) { + key = iter.next(); + iter.remove(); + try { + if (key.isValid()) { + if (key.isAcceptable()) + doAccept(key); + else if (key.isReadable()) + doRead(key); + } + } catch (IOException e) { + } + key = null; + } + } catch (OutOfMemoryError e) { + // we can run out of memory if we have too many threads + // log the event and sleep for a minute and give + // some thread(s) a chance to finish + LOG.warn("Out of Memory in server select", e); + closeCurrentConnection(key, e); + cleanupConnections(true); + try { Thread.sleep(60000); } catch (Exception ie) {} + } catch (InterruptedException e) { + if (running) { // unexpected -- log it + LOG.info(getName() + " caught: " + + StringUtils.stringifyException(e)); + } + } catch (Exception e) { + closeCurrentConnection(key, e); + } + cleanupConnections(false); + } + LOG.info("Stopping " + this.getName()); + + synchronized (this) { + try { + acceptChannel.close(); + selector.close(); + } catch (IOException e) { } + + selector= null; + acceptChannel= null; + + // clean up all connections + while (!connectionList.isEmpty()) { + closeConnection(connectionList.remove(0)); + } + } + } + + private void closeCurrentConnection(SelectionKey key, Throwable e) { + if (key != null) { + Connection c = (Connection)key.attachment(); + if (c != null) { + if (LOG.isDebugEnabled()) + LOG.debug(getName() + ": disconnecting client " + c.getHostAddress()); + closeConnection(c); + c = null; + } + } + } + + InetSocketAddress getAddress() { + return (InetSocketAddress)acceptChannel.socket().getLocalSocketAddress(); + } + + void doAccept(SelectionKey key) throws IOException, OutOfMemoryError { + Connection c = null; + ServerSocketChannel server = (ServerSocketChannel) key.channel(); + // accept up to 10 connections + for (int i=0; i<10; i++) { + SocketChannel channel = server.accept(); + if (channel==null) return; + + channel.configureBlocking(false); + channel.socket().setTcpNoDelay(tcpNoDelay); + SelectionKey readKey = channel.register(getSelector(), + SelectionKey.OP_READ); + c = new Connection(readKey, channel, System.currentTimeMillis()); + readKey.attach(c); + synchronized (connectionList) { + connectionList.add(numConnections, c); + numConnections++; + } + if (LOG.isDebugEnabled()) + LOG.debug("Server connection from " + c.toString() + + "; # active connections: " + numConnections + + "; # queued calls: " + callQueue.size()); + } + } + + void doRead(SelectionKey key) throws InterruptedException { + int count = 0; + Connection c = (Connection)key.attachment(); + if (c == null) { + return; + } + c.setLastContact(System.currentTimeMillis()); + + try { + count = c.readAndProcess(); + } catch (InterruptedException ieo) { + LOG.info(getName() + ": readAndProcess caught InterruptedException", ieo); + throw ieo; + } catch (Exception e) { + LOG.info(getName() + ": readAndProcess threw exception " + e + ". Count of bytes read: " + count, e); + count = -1; //so that the (count < 0) block is executed + } + if (count < 0) { + if (LOG.isDebugEnabled()) + LOG.debug(getName() + ": disconnecting client " + + c.getHostAddress() + ". Number of active connections: "+ + numConnections); + closeConnection(c); + c = null; + } + else { + c.setLastContact(System.currentTimeMillis()); + } + } + + synchronized void doStop() { + if (selector != null) { + selector.wakeup(); + Thread.yield(); + } + if (acceptChannel != null) { + try { + acceptChannel.socket().close(); + } catch (IOException e) { + LOG.info(getName() + ":Exception in closing listener socket. " + e); + } + } + } + + synchronized Selector getSelector() { return selector; } + } + + // Sends responses of RPC back to clients. + private class Responder extends Thread { + private Selector writeSelector; + private int pending; // connections waiting to register + + final static int PURGE_INTERVAL = 900000; // 15mins + + Responder() throws IOException { + this.setName("IPC Server Responder"); + this.setDaemon(true); + writeSelector = Selector.open(); // create a selector + pending = 0; + } + + @Override + public void run() { + LOG.info(getName() + ": starting"); + SERVER.set(Server.this); + long lastPurgeTime = 0; // last check for old calls. + + while (running) { + try { + waitPending(); // If a channel is being registered, wait. + writeSelector.select(PURGE_INTERVAL); + Iterator iter = writeSelector.selectedKeys().iterator(); + while (iter.hasNext()) { + SelectionKey key = iter.next(); + iter.remove(); + try { + if (key.isValid() && key.isWritable()) { + doAsyncWrite(key); + } + } catch (IOException e) { + LOG.info(getName() + ": doAsyncWrite threw exception " + e); + } + } + long now = System.currentTimeMillis(); + if (now < lastPurgeTime + PURGE_INTERVAL) { + continue; + } + lastPurgeTime = now; + // + // If there were some calls that have not been sent out for a + // long time, discard them. + // + LOG.debug("Checking for old call responses."); + ArrayList calls; + + // get the list of channels from list of keys. + synchronized (writeSelector.keys()) { + calls = new ArrayList(writeSelector.keys().size()); + iter = writeSelector.keys().iterator(); + while (iter.hasNext()) { + SelectionKey key = iter.next(); + Call call = (Call)key.attachment(); + if (call != null && key.channel() == call.connection.channel) { + calls.add(call); + } + } + } + + for(Call call : calls) { + try { + doPurge(call, now); + } catch (IOException e) { + LOG.warn("Error in purging old calls " + e); + } + } + } catch (OutOfMemoryError e) { + // + // we can run out of memory if we have too many threads + // log the event and sleep for a minute and give + // some thread(s) a chance to finish + // + LOG.warn("Out of Memory in server select", e); + try { Thread.sleep(60000); } catch (Exception ie) {} + } catch (Exception e) { + LOG.warn("Exception in Responder " + + StringUtils.stringifyException(e)); + } + } + LOG.info("Stopping " + this.getName()); + } + + private void doAsyncWrite(SelectionKey key) throws IOException { + Call call = (Call)key.attachment(); + if (call == null) { + return; + } + if (key.channel() != call.connection.channel) { + throw new IOException("doAsyncWrite: bad channel"); + } + + synchronized(call.connection.responseQueue) { + if (processResponse(call.connection.responseQueue, false)) { + try { + key.interestOps(0); + } catch (CancelledKeyException e) { + /* The Listener/reader might have closed the socket. + * We don't explicitly cancel the key, so not sure if this will + * ever fire. + * This warning could be removed. + */ + LOG.warn("Exception while changing ops : " + e); + } + } + } + } + + // + // Remove calls that have been pending in the responseQueue + // for a long time. + // + private void doPurge(Call call, long now) throws IOException { + LinkedList responseQueue = call.connection.responseQueue; + synchronized (responseQueue) { + Iterator iter = responseQueue.listIterator(0); + while (iter.hasNext()) { + call = iter.next(); + if (now > call.timestamp + PURGE_INTERVAL) { + closeConnection(call.connection); + break; + } + } + } + } + + // Processes one response. Returns true if there are no more pending + // data for this channel. + // + private boolean processResponse(LinkedList responseQueue, + boolean inHandler) throws IOException { + boolean error = true; + boolean done = false; // there is more data for this channel. + int numElements = 0; + Call call = null; + try { + synchronized (responseQueue) { + // + // If there are no items for this channel, then we are done + // + numElements = responseQueue.size(); + if (numElements == 0) { + error = false; + return true; // no more data for this channel. + } + // + // Extract the first call + // + call = responseQueue.removeFirst(); + SocketChannel channel = call.connection.channel; + if (LOG.isDebugEnabled()) { + LOG.debug(getName() + ": responding to #" + call.id + " from " + + call.connection); + } + // + // Send as much data as we can in the non-blocking fashion + // + int numBytes = channelWrite(channel, call.response); + if (numBytes < 0) { + return true; + } + if (!call.response.hasRemaining()) { + call.connection.decRpcCount(); + if (numElements == 1) { // last call fully processes. + done = true; // no more data for this channel. + } else { + done = false; // more calls pending to be sent. + } + if (LOG.isDebugEnabled()) { + LOG.debug(getName() + ": responding to #" + call.id + " from " + + call.connection + " Wrote " + numBytes + " bytes."); + } + } else { + // + // If we were unable to write the entire response out, then + // insert in Selector queue. + // + call.connection.responseQueue.addFirst(call); + + if (inHandler) { + // set the serve time when the response has to be sent later + call.timestamp = System.currentTimeMillis(); + + incPending(); + try { + // Wakeup the thread blocked on select, only then can the call + // to channel.register() complete. + writeSelector.wakeup(); + channel.register(writeSelector, SelectionKey.OP_WRITE, call); + } catch (ClosedChannelException e) { + //Its ok. channel might be closed else where. + done = true; + } finally { + decPending(); + } + } + if (LOG.isDebugEnabled()) { + LOG.debug(getName() + ": responding to #" + call.id + " from " + + call.connection + " Wrote partial " + numBytes + + " bytes."); + } + } + error = false; // everything went off well + } + } finally { + if (error && call != null) { + LOG.warn(getName()+", call " + call + ": output error"); + done = true; // error. no more data for this channel. + closeConnection(call.connection); + } + } + return done; + } + + // + // Enqueue a response from the application. + // + void doRespond(Call call) throws IOException { + synchronized (call.connection.responseQueue) { + call.connection.responseQueue.addLast(call); + if (call.connection.responseQueue.size() == 1) { + processResponse(call.connection.responseQueue, true); + } + } + } + + private synchronized void incPending() { // call waiting to be enqueued. + pending++; + } + + private synchronized void decPending() { // call done enqueueing. + pending--; + notify(); + } + + private synchronized void waitPending() throws InterruptedException { + while (pending > 0) { + wait(); + } + } + } + + /** Reads calls from a connection and queues them for handling. */ + private class Connection { + private boolean versionRead = false; //if initial signature and + //version are read + private boolean headerRead = false; //if the connection header that + //follows version is read. + + private SocketChannel channel; + private ByteBuffer data; + private ByteBuffer dataLengthBuffer; + private LinkedList responseQueue; + private volatile int rpcCount = 0; // number of outstanding rpcs + private long lastContact; + private int dataLength; + private Socket socket; + // Cache the remote host & port info so that even if the socket is + // disconnected, we can say where it used to connect to. + private String hostAddress; + private int remotePort; + + ConnectionHeader header = new ConnectionHeader(); + Class protocol; + + Subject user = null; + + // Fake 'call' for failed authorization response + private static final int AUTHROIZATION_FAILED_CALLID = -1; + private final Call authFailedCall = + new Call(AUTHROIZATION_FAILED_CALLID, null, null); + private ByteArrayOutputStream authFailedResponse = new ByteArrayOutputStream(); + + public Connection(SelectionKey key, SocketChannel channel, + long lastContact) { + this.channel = channel; + this.lastContact = lastContact; + this.data = null; + this.dataLengthBuffer = ByteBuffer.allocate(4); + this.socket = channel.socket(); + InetAddress addr = socket.getInetAddress(); + if (addr == null) { + this.hostAddress = "*Unknown*"; + } else { + this.hostAddress = addr.getHostAddress(); + } + this.remotePort = socket.getPort(); + this.responseQueue = new LinkedList(); + if (socketSendBufferSize != 0) { + try { + socket.setSendBufferSize(socketSendBufferSize); + } catch (IOException e) { + LOG.warn("Connection: unable to set socket send buffer size to " + + socketSendBufferSize); + } + } + } + + @Override + public String toString() { + return getHostAddress() + ":" + remotePort; + } + + public String getHostAddress() { + return hostAddress; + } + + public void setLastContact(long lastContact) { + this.lastContact = lastContact; + } + + public long getLastContact() { + return lastContact; + } + + /* Return true if the connection has no outstanding rpc */ + private boolean isIdle() { + return rpcCount == 0; + } + + /* Decrement the outstanding RPC count */ + private void decRpcCount() { + rpcCount--; + } + + /* Increment the outstanding RPC count */ + private void incRpcCount() { + rpcCount++; + } + + private boolean timedOut(long currentTime) { + if (isIdle() && currentTime - lastContact > maxIdleTime) + return true; + return false; + } + + public int readAndProcess() throws IOException, InterruptedException { + while (true) { + /* Read at most one RPC. If the header is not read completely yet + * then iterate until we read first RPC or until there is no data left. + */ + int count = -1; + if (dataLengthBuffer.remaining() > 0) { + count = channelRead(channel, dataLengthBuffer); + if (count < 0 || dataLengthBuffer.remaining() > 0) + return count; + } + + if (!versionRead) { + //Every connection is expected to send the header. + ByteBuffer versionBuffer = ByteBuffer.allocate(1); + count = channelRead(channel, versionBuffer); + if (count <= 0) { + return count; + } + int version = versionBuffer.get(0); + + dataLengthBuffer.flip(); + if (!HEADER.equals(dataLengthBuffer) || version != CURRENT_VERSION) { + //Warning is ok since this is not supposed to happen. + LOG.warn("Incorrect header or version mismatch from " + + hostAddress + ":" + remotePort + + " got version " + version + + " expected version " + CURRENT_VERSION); + return -1; + } + dataLengthBuffer.clear(); + versionRead = true; + continue; + } + + if (data == null) { + dataLengthBuffer.flip(); + dataLength = dataLengthBuffer.getInt(); + + if (dataLength == Client.PING_CALL_ID) { + dataLengthBuffer.clear(); + return 0; //ping message + } + data = ByteBuffer.allocate(dataLength); + incRpcCount(); // Increment the rpc count + } + + count = channelRead(channel, data); + + if (data.remaining() == 0) { + dataLengthBuffer.clear(); + data.flip(); + if (headerRead) { + processData(); + data = null; + return count; + } else { + processHeader(); + headerRead = true; + data = null; + + // Authorize the connection + try { + authorize(user, header); + + if (LOG.isDebugEnabled()) { + LOG.debug("Successfully authorized " + header); + } + } catch (AuthorizationException ae) { + authFailedCall.connection = this; + setupResponse(authFailedResponse, authFailedCall, + Status.FATAL, null, + ae.getClass().getName(), ae.getMessage()); + responder.doRespond(authFailedCall); + + // Close this connection + return -1; + } + + continue; + } + } + return count; + } + } + + /// Reads the connection header following version + private void processHeader() throws IOException { + DataInputStream in = + new DataInputStream(new ByteArrayInputStream(data.array())); + header.readFields(in); + try { + String protocolClassName = header.getProtocol(); + if (protocolClassName != null) { + protocol = getProtocolClass(header.getProtocol(), conf); + } + } catch (ClassNotFoundException cnfe) { + throw new IOException("Unknown protocol: " + header.getProtocol()); + } + + // TODO: Get the user name from the GSS API for Kerberbos-based security + // Create the user subject + user = SecurityUtil.getSubject(header.getUgi()); + } + + private void processData() throws IOException, InterruptedException { + DataInputStream dis = + new DataInputStream(new ByteArrayInputStream(data.array())); + int id = dis.readInt(); // try to read an id + + if (LOG.isDebugEnabled()) + LOG.debug(" got #" + id); + + Writable param = ReflectionUtils.newInstance(paramClass, conf); // read param + param.readFields(dis); + + Call call = new Call(id, param, this); + callQueue.put(call); // queue the call; maybe blocked here + } + + private synchronized void close() throws IOException { + data = null; + dataLengthBuffer = null; + if (!channel.isOpen()) + return; + try {socket.shutdownOutput();} catch(Exception e) { + LOG.warn("Ignoring socket shutdown exception"); + } + if (channel.isOpen()) { + try {channel.close();} catch(Exception e) {} + } + try {socket.close();} catch(Exception e) {} + } + } + + /** Handles queued calls . */ + private class Handler extends Thread { + public Handler(int instanceNumber) { + this.setDaemon(true); + this.setName("IPC Server handler "+ instanceNumber + " on " + port); + } + + @Override + public void run() { + LOG.info(getName() + ": starting"); + SERVER.set(Server.this); + ByteArrayOutputStream buf = new ByteArrayOutputStream(10240); + while (running) { + try { + final Call call = callQueue.take(); // pop the queue; maybe blocked here + + if (LOG.isDebugEnabled()) + LOG.debug(getName() + ": has #" + call.id + " from " + + call.connection); + + String errorClass = null; + String error = null; + Writable value = null; + + CurCall.set(call); + try { + // Make the call as the user via Subject.doAs, thus associating + // the call with the Subject + value = + Subject.doAs(call.connection.user, + new PrivilegedExceptionAction() { + @Override + public Writable run() throws Exception { + // make the call + return call(call.connection.protocol, + call.param, call.timestamp); + + } + } + ); + + } catch (PrivilegedActionException pae) { + Exception e = pae.getException(); + LOG.info(getName()+", call "+call+": error: " + e, e); + errorClass = e.getClass().getName(); + error = StringUtils.stringifyException(e); + } catch (Throwable e) { + LOG.info(getName()+", call "+call+": error: " + e, e); + errorClass = e.getClass().getName(); + error = StringUtils.stringifyException(e); + } + CurCall.set(null); + + setupResponse(buf, call, + (error == null) ? Status.SUCCESS : Status.ERROR, + value, errorClass, error); + responder.doRespond(call); + } catch (InterruptedException e) { + if (running) { // unexpected -- log it + LOG.info(getName() + " caught: " + + StringUtils.stringifyException(e)); + } + } catch (Exception e) { + LOG.info(getName() + " caught: " + + StringUtils.stringifyException(e)); + } + } + LOG.info(getName() + ": exiting"); + } + + } + + protected Server(String bindAddress, int port, + Class paramClass, int handlerCount, + Configuration conf) + throws IOException + { + this(bindAddress, port, paramClass, handlerCount, conf, Integer.toString(port)); + } + /** Constructs a server listening on the named port and address. Parameters passed must + * be of the named class. The handlerCount determines + * the number of handler threads that will be used to process calls. + * + */ + protected Server(String bindAddress, int port, + Class paramClass, int handlerCount, + Configuration conf, String serverName) + throws IOException { + this.bindAddress = bindAddress; + this.conf = conf; + this.port = port; + this.paramClass = paramClass; + this.handlerCount = handlerCount; + this.socketSendBufferSize = 0; + this.maxQueueSize = handlerCount * MAX_QUEUE_SIZE_PER_HANDLER; + this.callQueue = new LinkedBlockingQueue(maxQueueSize); + this.maxIdleTime = 2*conf.getInt("ipc.client.connection.maxidletime", 1000); + this.maxConnectionsToNuke = conf.getInt("ipc.client.kill.max", 10); + this.thresholdIdleConnections = conf.getInt("ipc.client.idlethreshold", 4000); + + // Start the listener here and let it bind to the port + listener = new Listener(); + this.port = listener.getAddress().getPort(); + this.rpcMetrics = new RpcMetrics(serverName, + Integer.toString(this.port), this); + this.tcpNoDelay = conf.getBoolean("ipc.server.tcpnodelay", false); + + + // Create the responder here + responder = new Responder(); + } + + private void closeConnection(Connection connection) { + synchronized (connectionList) { + if (connectionList.remove(connection)) + numConnections--; + } + try { + connection.close(); + } catch (IOException e) { + } + } + + /** + * Setup response for the IPC Call. + * + * @param response buffer to serialize the response into + * @param call {@link Call} to which we are setting up the response + * @param status {@link Status} of the IPC call + * @param rv return value for the IPC Call, if the call was successful + * @param errorClass error class, if the the call failed + * @param error error message, if the call failed + * @throws IOException + */ + private void setupResponse(ByteArrayOutputStream response, + Call call, Status status, + Writable rv, String errorClass, String error) + throws IOException { + response.reset(); + DataOutputStream out = new DataOutputStream(response); + out.writeInt(call.id); // write call id + out.writeInt(status.state); // write status + + if (status == Status.SUCCESS) { + rv.write(out); + } else { + WritableUtils.writeString(out, errorClass); + WritableUtils.writeString(out, error); + } + call.setResponse(ByteBuffer.wrap(response.toByteArray())); + } + + Configuration getConf() { + return conf; + } + + /** Sets the socket buffer size used for responding to RPCs */ + public void setSocketSendBufSize(int size) { this.socketSendBufferSize = size; } + + /** Starts the service. Must be called before any calls will be handled. */ + public synchronized void start() throws IOException { + responder.start(); + listener.start(); + handlers = new Handler[handlerCount]; + + for (int i = 0; i < handlerCount; i++) { + handlers[i] = new Handler(i); + handlers[i].start(); + } + } + + /** Stops the service. No new calls will be handled after this is called. */ + public synchronized void stop() { + LOG.info("Stopping server on " + port); + running = false; + if (handlers != null) { + for (int i = 0; i < handlerCount; i++) { + if (handlers[i] != null) { + handlers[i].interrupt(); + } + } + } + listener.interrupt(); + listener.doStop(); + responder.interrupt(); + notifyAll(); + if (this.rpcMetrics != null) { + this.rpcMetrics.shutdown(); + } + } + + /** Wait for the server to be stopped. + * Does not wait for all subthreads to finish. + * See {@link #stop()}. + */ + public synchronized void join() throws InterruptedException { + while (running) { + wait(); + } + } + + /** + * Return the socket (ip+port) on which the RPC server is listening to. + * @return the socket (ip+port) on which the RPC server is listening to. + */ + public synchronized InetSocketAddress getListenerAddress() { + return listener.getAddress(); + } + + /** + * Called for each call. + * @deprecated Use {@link #call(Class, Writable, long)} instead + */ + @Deprecated + public Writable call(Writable param, long receiveTime) throws IOException { + return call(null, param, receiveTime); + } + + /** Called for each call. */ + public abstract Writable call(Class protocol, + Writable param, long receiveTime) + throws IOException; + + /** + * Authorize the incoming client connection. + * + * @param user client user + * @param connection incoming connection + * @throws AuthorizationException when the client isn't authorized to talk the protocol + */ + public void authorize(Subject user, ConnectionHeader connection) + throws AuthorizationException {} + + /** + * The number of open RPC conections + * @return the number of open rpc connections + */ + public int getNumOpenConnections() { + return numConnections; + } + + /** + * The number of rpc calls in the queue. + * @return The number of rpc calls in the queue. + */ + public int getCallQueueLen() { + return callQueue.size(); + } + + + /** + * When the read or write buffer size is larger than this limit, i/o will be + * done in chunks of this size. Most RPC requests and responses would be + * be smaller. + */ + private static int NIO_BUFFER_LIMIT = 8*1024; //should not be more than 64KB. + + /** + * This is a wrapper around {@link WritableByteChannel#write(ByteBuffer)}. + * If the amount of data is large, it writes to channel in smaller chunks. + * This is to avoid jdk from creating many direct buffers as the size of + * buffer increases. This also minimizes extra copies in NIO layer + * as a result of multiple write operations required to write a large + * buffer. + * + * @see WritableByteChannel#write(ByteBuffer) + */ + private static int channelWrite(WritableByteChannel channel, + ByteBuffer buffer) throws IOException { + + return (buffer.remaining() <= NIO_BUFFER_LIMIT) ? + channel.write(buffer) : channelIO(null, channel, buffer); + } + + + /** + * This is a wrapper around {@link ReadableByteChannel#read(ByteBuffer)}. + * If the amount of data is large, it writes to channel in smaller chunks. + * This is to avoid jdk from creating many direct buffers as the size of + * ByteBuffer increases. There should not be any performance degredation. + * + * @see ReadableByteChannel#read(ByteBuffer) + */ + private static int channelRead(ReadableByteChannel channel, + ByteBuffer buffer) throws IOException { + + return (buffer.remaining() <= NIO_BUFFER_LIMIT) ? + channel.read(buffer) : channelIO(channel, null, buffer); + } + + /** + * Helper for {@link #channelRead(ReadableByteChannel, ByteBuffer)} + * and {@link #channelWrite(WritableByteChannel, ByteBuffer)}. Only + * one of readCh or writeCh should be non-null. + * + * @see #channelRead(ReadableByteChannel, ByteBuffer) + * @see #channelWrite(WritableByteChannel, ByteBuffer) + */ + private static int channelIO(ReadableByteChannel readCh, + WritableByteChannel writeCh, + ByteBuffer buf) throws IOException { + + int originalLimit = buf.limit(); + int initialRemaining = buf.remaining(); + int ret = 0; + + while (buf.remaining() > 0) { + try { + int ioSize = Math.min(buf.remaining(), NIO_BUFFER_LIMIT); + buf.limit(buf.position() + ioSize); + + ret = (readCh == null) ? writeCh.write(buf) : readCh.read(buf); + + if (ret < ioSize) { + break; + } + + } finally { + buf.limit(originalLimit); + } + } + + int nBytes = initialRemaining - buf.remaining(); + return (nBytes > 0) ? nBytes : ret; + } +} diff --git a/src/java/org/apache/hadoop/ipc/Status.java b/src/java/org/apache/hadoop/ipc/Status.java new file mode 100644 index 00000000000..16fd871ffa6 --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/Status.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ipc; + +/** + * Status of a Hadoop IPC call. + */ +enum Status { + SUCCESS (0), + ERROR (1), + FATAL (-1); + + int state; + private Status(int state) { + this.state = state; + } +} diff --git a/src/java/org/apache/hadoop/ipc/VersionedProtocol.java b/src/java/org/apache/hadoop/ipc/VersionedProtocol.java new file mode 100644 index 00000000000..ef5187522f7 --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/VersionedProtocol.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +import java.io.IOException; + +/** + * Superclass of all protocols that use Hadoop RPC. + * Subclasses of this interface are also supposed to have + * a static final long versionID field. + */ +public interface VersionedProtocol { + + /** + * Return protocol version corresponding to protocol interface. + * @param protocol The classname of the protocol interface + * @param clientVersion The version of the protocol that the client speaks + * @return the version that the server will speak + */ + public long getProtocolVersion(String protocol, + long clientVersion) throws IOException; +} diff --git a/src/java/org/apache/hadoop/ipc/metrics/RpcActivityMBean.java b/src/java/org/apache/hadoop/ipc/metrics/RpcActivityMBean.java new file mode 100644 index 00000000000..e2b33b78743 --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/metrics/RpcActivityMBean.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ipc.metrics; + +import javax.management.ObjectName; + +import org.apache.hadoop.metrics.util.MBeanUtil; +import org.apache.hadoop.metrics.util.MetricsDynamicMBeanBase; +import org.apache.hadoop.metrics.util.MetricsRegistry; + + + +/** + * + * This is the JMX MBean for reporting the RPC layer Activity. + * The MBean is register using the name + * "hadoop:service=,name=RpcActivityForPort" + * + * Many of the activity metrics are sampled and averaged on an interval + * which can be specified in the metrics config file. + *

+ * For the metrics that are sampled and averaged, one must specify + * a metrics context that does periodic update calls. Most metrics contexts do. + * The default Null metrics context however does NOT. So if you aren't + * using any other metrics context then you can turn on the viewing and averaging + * of sampled metrics by specifying the following two lines + * in the hadoop-meterics.properties file: + *

+ *        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
+ *        rpc.period=10
+ *  
+ *

+ * Note that the metrics are collected regardless of the context used. + * The context with the update thread is used to average the data periodically + * + * + * + * Impl details: We use a dynamic mbean that gets the list of the metrics + * from the metrics registry passed as an argument to the constructor + */ + +public class RpcActivityMBean extends MetricsDynamicMBeanBase { + final private ObjectName mbeanName; + + /** + * + * @param mr - the metrics registry that has all the metrics + * @param serviceName - the service name for the rpc service + * @param port - the rpc port. + */ + public RpcActivityMBean(final MetricsRegistry mr, final String serviceName, final String port) { + + + super(mr, "Rpc layer statistics"); + mbeanName = MBeanUtil.registerMBean(serviceName, + "RpcActivityForPort" + port, this); + } + + + public void shutdown() { + if (mbeanName != null) + MBeanUtil.unregisterMBean(mbeanName); + } + +} diff --git a/src/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java b/src/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java new file mode 100644 index 00000000000..a1fbccd06d4 --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ipc.metrics; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.metrics.MetricsContext; +import org.apache.hadoop.metrics.MetricsRecord; +import org.apache.hadoop.metrics.MetricsUtil; +import org.apache.hadoop.metrics.Updater; +import org.apache.hadoop.metrics.util.MetricsBase; +import org.apache.hadoop.metrics.util.MetricsIntValue; +import org.apache.hadoop.metrics.util.MetricsRegistry; +import org.apache.hadoop.metrics.util.MetricsTimeVaryingRate; + +/** + * + * This class is for maintaining the various RPC statistics + * and publishing them through the metrics interfaces. + * This also registers the JMX MBean for RPC. + *

+ * This class has a number of metrics variables that are publicly accessible; + * these variables (objects) have methods to update their values; + * for example: + *

{@link #rpcQueueTime}.inc(time) + * + */ +public class RpcMetrics implements Updater { + public MetricsRegistry registry = new MetricsRegistry(); + private MetricsRecord metricsRecord; + private Server myServer; + private static Log LOG = LogFactory.getLog(RpcMetrics.class); + RpcActivityMBean rpcMBean; + + public RpcMetrics(String hostName, String port, Server server) { + myServer = server; + MetricsContext context = MetricsUtil.getContext("rpc"); + metricsRecord = MetricsUtil.createRecord(context, "metrics"); + + metricsRecord.setTag("port", port); + + LOG.info("Initializing RPC Metrics with hostName=" + + hostName + ", port=" + port); + + context.registerUpdater(this); + + // Need to clean up the interface to RpcMgt - don't need both metrics and server params + rpcMBean = new RpcActivityMBean(registry, hostName, port); + } + + + /** + * The metrics variables are public: + * - they can be set directly by calling their set/inc methods + * -they can also be read directly - e.g. JMX does this. + */ + + public MetricsTimeVaryingRate rpcQueueTime = + new MetricsTimeVaryingRate("RpcQueueTime", registry); + public MetricsTimeVaryingRate rpcProcessingTime = + new MetricsTimeVaryingRate("RpcProcessingTime", registry); + public MetricsIntValue numOpenConnections = + new MetricsIntValue("NumOpenConnections", registry); + public MetricsIntValue callQueueLen = + new MetricsIntValue("callQueueLen", registry); + + /** + * Push the metrics to the monitoring subsystem on doUpdate() call. + */ + public void doUpdates(MetricsContext context) { + + synchronized (this) { + // ToFix - fix server to use the following two metrics directly so + // the metrics do not have be copied here. + numOpenConnections.set(myServer.getNumOpenConnections()); + callQueueLen.set(myServer.getCallQueueLen()); + for (MetricsBase m : registry.getMetricsList()) { + m.pushMetric(metricsRecord); + } + } + metricsRecord.update(); + } + + public void shutdown() { + if (rpcMBean != null) + rpcMBean.shutdown(); + } +} diff --git a/src/java/org/apache/hadoop/ipc/metrics/RpcMgt.java b/src/java/org/apache/hadoop/ipc/metrics/RpcMgt.java new file mode 100644 index 00000000000..443c1947fe2 --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/metrics/RpcMgt.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ipc.metrics; + + +import javax.management.ObjectName; + +import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.metrics.util.MBeanUtil; + + +/** + * This class implements the RpcMgt MBean + * + */ +class RpcMgt implements RpcMgtMBean { + private RpcMetrics myMetrics; + private Server myServer; + private ObjectName mbeanName; + + RpcMgt(final String serviceName, final String port, + final RpcMetrics metrics, Server server) { + myMetrics = metrics; + myServer = server; + mbeanName = MBeanUtil.registerMBean(serviceName, + "RpcStatisticsForPort" + port, this); + } + + public void shutdown() { + if (mbeanName != null) + MBeanUtil.unregisterMBean(mbeanName); + } + + /** + * @inheritDoc + */ + public long getRpcOpsAvgProcessingTime() { + return myMetrics.rpcProcessingTime.getPreviousIntervalAverageTime(); + } + + /** + * @inheritDoc + */ + public long getRpcOpsAvgProcessingTimeMax() { + return myMetrics.rpcProcessingTime.getMaxTime(); + } + + /** + * @inheritDoc + */ + public long getRpcOpsAvgProcessingTimeMin() { + return myMetrics.rpcProcessingTime.getMinTime(); + } + + /** + * @inheritDoc + */ + public long getRpcOpsAvgQueueTime() { + return myMetrics.rpcQueueTime.getPreviousIntervalAverageTime(); + } + + /** + * @inheritDoc + */ + public long getRpcOpsAvgQueueTimeMax() { + return myMetrics.rpcQueueTime.getMaxTime(); + } + + /** + * @inheritDoc + */ + public long getRpcOpsAvgQueueTimeMin() { + return myMetrics.rpcQueueTime.getMinTime(); + } + + /** + * @inheritDoc + */ + public int getRpcOpsNumber() { + return myMetrics.rpcProcessingTime.getPreviousIntervalNumOps() ; + } + + /** + * @inheritDoc + */ + public int getNumOpenConnections() { + return myServer.getNumOpenConnections(); + } + + /** + * @inheritDoc + */ + public int getCallQueueLen() { + return myServer.getCallQueueLen(); + } + + /** + * @inheritDoc + */ + public void resetAllMinMax() { + myMetrics.rpcProcessingTime.resetMinMax(); + myMetrics.rpcQueueTime.resetMinMax(); + } +} diff --git a/src/java/org/apache/hadoop/ipc/metrics/RpcMgtMBean.java b/src/java/org/apache/hadoop/ipc/metrics/RpcMgtMBean.java new file mode 100644 index 00000000000..c92bbac574a --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/metrics/RpcMgtMBean.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ipc.metrics; + + +/** + * + * This is the JMX management interface for the RPC layer. + * Many of the statistics are sampled and averaged on an interval + * which can be specified in the metrics config file. + *

+ * For the statistics that are sampled and averaged, one must specify + * a metrics context that does periodic update calls. Most do. + * The default Null metrics context however does NOT. So if you aren't + * using any other metrics context then you can turn on the viewing and averaging + * of sampled metrics by specifying the following two lines + * in the hadoop-meterics.properties file: + *

+ *        rpc.class=org.apache.hadoop.metrics.spi.NullContextWithUpdateThread
+ *        rpc.period=10
+ *  
+ *

+ * Note that the metrics are collected regardless of the context used. + * The context with the update thread is used to average the data periodically + * + */ +public interface RpcMgtMBean { + + /** + * Number of RPC Operations in the last interval + * @return number of operations + */ + int getRpcOpsNumber(); + + /** + * Average time for RPC Operations in last interval + * @return time in msec + */ + long getRpcOpsAvgProcessingTime(); + + /** + * The Minimum RPC Operation Processing Time since reset was called + * @return time in msec + */ + long getRpcOpsAvgProcessingTimeMin(); + + + /** + * The Maximum RPC Operation Processing Time since reset was called + * @return time in msec + */ + long getRpcOpsAvgProcessingTimeMax(); + + + /** + * The Average RPC Operation Queued Time in the last interval + * @return time in msec + */ + long getRpcOpsAvgQueueTime(); + + + /** + * The Minimum RPC Operation Queued Time since reset was called + * @return time in msec + */ + long getRpcOpsAvgQueueTimeMin(); + + /** + * The Maximum RPC Operation Queued Time since reset was called + * @return time in msec + */ + long getRpcOpsAvgQueueTimeMax(); + + /** + * Reset all min max times + */ + void resetAllMinMax(); + + /** + * The number of open RPC conections + * @return the number of open rpc connections + */ + public int getNumOpenConnections(); + + /** + * The number of rpc calls in the queue. + * @return The number of rpc calls in the queue. + */ + public int getCallQueueLen(); +} diff --git a/src/java/org/apache/hadoop/ipc/package.html b/src/java/org/apache/hadoop/ipc/package.html new file mode 100644 index 00000000000..3efd81a2978 --- /dev/null +++ b/src/java/org/apache/hadoop/ipc/package.html @@ -0,0 +1,23 @@ + + + + + +Tools to help define network clients and servers. + + diff --git a/src/java/org/apache/hadoop/log/LogLevel.java b/src/java/org/apache/hadoop/log/LogLevel.java new file mode 100644 index 00000000000..99fd3d0e7e7 --- /dev/null +++ b/src/java/org/apache/hadoop/log/LogLevel.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.log; + +import java.io.*; +import java.net.*; +import java.util.regex.Pattern; + +import javax.servlet.*; +import javax.servlet.http.*; + +import org.apache.commons.logging.*; +import org.apache.commons.logging.impl.*; +import org.apache.hadoop.util.ServletUtil; + +/** + * Change log level in runtime. + */ +public class LogLevel { + public static final String USAGES = "\nUSAGES:\n" + + "java " + LogLevel.class.getName() + + " -getlevel \n" + + "java " + LogLevel.class.getName() + + " -setlevel \n"; + + /** + * A command line implementation + */ + public static void main(String[] args) { + if (args.length == 3 && "-getlevel".equals(args[0])) { + process("http://" + args[1] + "/logLevel?log=" + args[2]); + return; + } + else if (args.length == 4 && "-setlevel".equals(args[0])) { + process("http://" + args[1] + "/logLevel?log=" + args[2] + + "&level=" + args[3]); + return; + } + + System.err.println(USAGES); + System.exit(-1); + } + + private static void process(String urlstring) { + try { + URL url = new URL(urlstring); + System.out.println("Connecting to " + url); + URLConnection connection = url.openConnection(); + connection.connect(); + + BufferedReader in = new BufferedReader(new InputStreamReader( + connection.getInputStream())); + for(String line; (line = in.readLine()) != null; ) + if (line.startsWith(MARKER)) { + System.out.println(TAG.matcher(line).replaceAll("")); + } + in.close(); + } catch (IOException ioe) { + System.err.println("" + ioe); + } + } + + static final String MARKER = ""; + static final Pattern TAG = Pattern.compile("<[^>]*>"); + + /** + * A servlet implementation + */ + public static class Servlet extends HttpServlet { + private static final long serialVersionUID = 1L; + + public void doGet(HttpServletRequest request, HttpServletResponse response + ) throws ServletException, IOException { + PrintWriter out = ServletUtil.initHTML(response, "Log Level"); + String logName = ServletUtil.getParameter(request, "log"); + String level = ServletUtil.getParameter(request, "level"); + + if (logName != null) { + out.println("


Results

"); + out.println(MARKER + + "Submitted Log Name: " + logName + "
"); + + Log log = LogFactory.getLog(logName); + out.println(MARKER + + "Log Class: " + log.getClass().getName() +"
"); + if (level != null) { + out.println(MARKER + "Submitted Level: " + level + "
"); + } + + if (log instanceof Log4JLogger) { + process(((Log4JLogger)log).getLogger(), level, out); + } + else if (log instanceof Jdk14Logger) { + process(((Jdk14Logger)log).getLogger(), level, out); + } + else { + out.println("Sorry, " + log.getClass() + " not supported.
"); + } + } + + out.println(FORMS); + out.println(ServletUtil.HTML_TAIL); + } + + static final String FORMS = "\n

Get / Set

" + + "\n
Log: " + + "" + + "
" + + "\n
Log: " + + "Level: " + + "" + + "
"; + + private static void process(org.apache.log4j.Logger log, String level, + PrintWriter out) throws IOException { + if (level != null) { + log.setLevel(org.apache.log4j.Level.toLevel(level)); + out.println(MARKER + "Setting Level to " + level + " ...
"); + } + out.println(MARKER + + "Effective level: " + log.getEffectiveLevel() + "
"); + } + + private static void process(java.util.logging.Logger log, String level, + PrintWriter out) throws IOException { + if (level != null) { + log.setLevel(java.util.logging.Level.parse(level)); + out.println(MARKER + "Setting Level to " + level + " ...
"); + } + + java.util.logging.Level lev; + for(; (lev = log.getLevel()) == null; log = log.getParent()); + out.println(MARKER + "Effective level: " + lev + "
"); + } + } +} diff --git a/src/java/org/apache/hadoop/metrics/ContextFactory.java b/src/java/org/apache/hadoop/metrics/ContextFactory.java new file mode 100644 index 00000000000..67bd9f95006 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/ContextFactory.java @@ -0,0 +1,204 @@ +/* + * ContextFactory.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; +import org.apache.hadoop.metrics.spi.AbstractMetricsContext; +import org.apache.hadoop.metrics.spi.NullContext; + +/** + * Factory class for creating MetricsContext objects. To obtain an instance + * of this class, use the static getFactory() method. + */ +public class ContextFactory { + + private static final String PROPERTIES_FILE = + "/hadoop-metrics.properties"; + private static final String CONTEXT_CLASS_SUFFIX = + ".class"; + private static final String DEFAULT_CONTEXT_CLASSNAME = + "org.apache.hadoop.metrics.spi.NullContext"; + + private static ContextFactory theFactory = null; + + private Map attributeMap = new HashMap(); + private Map contextMap = + new HashMap(); + + // Used only when contexts, or the ContextFactory itself, cannot be + // created. + private static Map nullContextMap = + new HashMap(); + + /** Creates a new instance of ContextFactory */ + protected ContextFactory() { + } + + /** + * Returns the value of the named attribute, or null if there is no + * attribute of that name. + * + * @param attributeName the attribute name + * @return the attribute value + */ + public Object getAttribute(String attributeName) { + return attributeMap.get(attributeName); + } + + /** + * Returns the names of all the factory's attributes. + * + * @return the attribute names + */ + public String[] getAttributeNames() { + String[] result = new String[attributeMap.size()]; + int i = 0; + // for (String attributeName : attributeMap.keySet()) { + Iterator it = attributeMap.keySet().iterator(); + while (it.hasNext()) { + result[i++] = (String) it.next(); + } + return result; + } + + /** + * Sets the named factory attribute to the specified value, creating it + * if it did not already exist. If the value is null, this is the same as + * calling removeAttribute. + * + * @param attributeName the attribute name + * @param value the new attribute value + */ + public void setAttribute(String attributeName, Object value) { + attributeMap.put(attributeName, value); + } + + /** + * Removes the named attribute if it exists. + * + * @param attributeName the attribute name + */ + public void removeAttribute(String attributeName) { + attributeMap.remove(attributeName); + } + + /** + * Returns the named MetricsContext instance, constructing it if necessary + * using the factory's current configuration attributes.

+ * + * When constructing the instance, if the factory property + * contextName.class exists, + * its value is taken to be the name of the class to instantiate. Otherwise, + * the default is to create an instance of + * org.apache.hadoop.metrics.spi.NullContext, which is a + * dummy "no-op" context which will cause all metric data to be discarded. + * + * @param contextName the name of the context + * @return the named MetricsContext + */ + public synchronized MetricsContext getContext(String refName, String contextName) + throws IOException, ClassNotFoundException, + InstantiationException, IllegalAccessException { + MetricsContext metricsContext = contextMap.get(refName); + if (metricsContext == null) { + String classNameAttribute = refName + CONTEXT_CLASS_SUFFIX; + String className = (String) getAttribute(classNameAttribute); + if (className == null) { + className = DEFAULT_CONTEXT_CLASSNAME; + } + Class contextClass = Class.forName(className); + metricsContext = (MetricsContext) contextClass.newInstance(); + metricsContext.init(contextName, this); + contextMap.put(contextName, metricsContext); + } + return metricsContext; + } + + public synchronized MetricsContext getContext(String contextName) + throws IOException, ClassNotFoundException, InstantiationException, + IllegalAccessException { + return getContext(contextName, contextName); + } + + /** + * Returns all MetricsContexts built by this factory. + */ + public synchronized Collection getAllContexts() { + // Make a copy to avoid race conditions with creating new contexts. + return new ArrayList(contextMap.values()); + } + + /** + * Returns a "null" context - one which does nothing. + */ + public static synchronized MetricsContext getNullContext(String contextName) { + MetricsContext nullContext = nullContextMap.get(contextName); + if (nullContext == null) { + nullContext = new NullContext(); + nullContextMap.put(contextName, nullContext); + } + return nullContext; + } + + /** + * Returns the singleton ContextFactory instance, constructing it if + * necessary.

+ * + * When the instance is constructed, this method checks if the file + * hadoop-metrics.properties exists on the class path. If it + * exists, it must be in the format defined by java.util.Properties, and all + * the properties in the file are set as attributes on the newly created + * ContextFactory instance. + * + * @return the singleton ContextFactory instance + */ + public static synchronized ContextFactory getFactory() throws IOException { + if (theFactory == null) { + theFactory = new ContextFactory(); + theFactory.setAttributes(); + } + return theFactory; + } + + private void setAttributes() throws IOException { + InputStream is = getClass().getResourceAsStream(PROPERTIES_FILE); + if (is != null) { + Properties properties = new Properties(); + properties.load(is); + //for (Object propertyNameObj : properties.keySet()) { + Iterator it = properties.keySet().iterator(); + while (it.hasNext()) { + String propertyName = (String) it.next(); + String propertyValue = properties.getProperty(propertyName); + setAttribute(propertyName, propertyValue); + } + is.close(); + } + } + +} diff --git a/src/java/org/apache/hadoop/metrics/MetricsContext.java b/src/java/org/apache/hadoop/metrics/MetricsContext.java new file mode 100644 index 00000000000..588a5720153 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/MetricsContext.java @@ -0,0 +1,118 @@ +/* + * MetricsContext.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; + +import org.apache.hadoop.metrics.spi.OutputRecord; + +/** + * The main interface to the metrics package. + */ +public interface MetricsContext { + + /** + * Default period in seconds at which data is sent to the metrics system. + */ + public static final int DEFAULT_PERIOD = 5; + + /** + * Initialize this context. + * @param contextName The given name for this context + * @param factory The creator of this context + */ + public void init(String contextName, ContextFactory factory); + + /** + * Returns the context name. + * + * @return the context name + */ + public abstract String getContextName(); + + /** + * Starts or restarts monitoring, the emitting of metrics records as they are + * updated. + */ + public abstract void startMonitoring() + throws IOException; + + /** + * Stops monitoring. This does not free any data that the implementation + * may have buffered for sending at the next timer event. It + * is OK to call startMonitoring() again after calling + * this. + * @see #close() + */ + public abstract void stopMonitoring(); + + /** + * Returns true if monitoring is currently in progress. + */ + public abstract boolean isMonitoring(); + + /** + * Stops monitoring and also frees any buffered data, returning this + * object to its initial state. + */ + public abstract void close(); + + /** + * Creates a new MetricsRecord instance with the given recordName. + * Throws an exception if the metrics implementation is configured with a fixed + * set of record names and recordName is not in that set. + * + * @param recordName the name of the record + * @throws MetricsException if recordName conflicts with configuration data + */ + public abstract MetricsRecord createRecord(String recordName); + + /** + * Registers a callback to be called at regular time intervals, as + * determined by the implementation-class specific configuration. + * + * @param updater object to be run periodically; it should updated + * some metrics records and then return + */ + public abstract void registerUpdater(Updater updater); + + /** + * Removes a callback, if it exists. + * + * @param updater object to be removed from the callback list + */ + public abstract void unregisterUpdater(Updater updater); + + /** + * Returns the timer period. + */ + public abstract int getPeriod(); + + /** + * Retrieves all the records managed by this MetricsContext. + * Useful for monitoring systems that are polling-based. + * + * @return A non-null map from all record names to the records managed. + */ + Map> getAllRecords(); +} diff --git a/src/java/org/apache/hadoop/metrics/MetricsException.java b/src/java/org/apache/hadoop/metrics/MetricsException.java new file mode 100644 index 00000000000..8e4f7a0497d --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/MetricsException.java @@ -0,0 +1,42 @@ +/* + * MetricsException.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics; + +/** + * General-purpose, unchecked metrics exception. + */ +public class MetricsException extends RuntimeException { + + private static final long serialVersionUID = -1643257498540498497L; + + /** Creates a new instance of MetricsException */ + public MetricsException() { + } + + /** Creates a new instance of MetricsException + * + * @param message an error message + */ + public MetricsException(String message) { + super(message); + } + +} diff --git a/src/java/org/apache/hadoop/metrics/MetricsRecord.java b/src/java/org/apache/hadoop/metrics/MetricsRecord.java new file mode 100644 index 00000000000..cec80f225a2 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/MetricsRecord.java @@ -0,0 +1,246 @@ +/* + * MetricsRecord.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics; + +/** + * A named and optionally tagged set of records to be sent to the metrics + * system.

+ * + * A record name identifies the kind of data to be reported. For example, a + * program reporting statistics relating to the disks on a computer might use + * a record name "diskStats".

+ * + * A record has zero or more tags. A tag has a name and a value. To + * continue the example, the "diskStats" record might use a tag named + * "diskName" to identify a particular disk. Sometimes it is useful to have + * more than one tag, so there might also be a "diskType" with value "ide" or + * "scsi" or whatever.

+ * + * A record also has zero or more metrics. These are the named + * values that are to be reported to the metrics system. In the "diskStats" + * example, possible metric names would be "diskPercentFull", "diskPercentBusy", + * "kbReadPerSecond", etc.

+ * + * The general procedure for using a MetricsRecord is to fill in its tag and + * metric values, and then call update() to pass the record to the + * client library. + * Metric data is not immediately sent to the metrics system + * each time that update() is called. + * An internal table is maintained, identified by the record name. This + * table has columns + * corresponding to the tag and the metric names, and rows + * corresponding to each unique set of tag values. An update + * either modifies an existing row in the table, or adds a new row with a set of + * tag values that are different from all the other rows. Note that if there + * are no tags, then there can be at most one row in the table.

+ * + * Once a row is added to the table, its data will be sent to the metrics system + * on every timer period, whether or not it has been updated since the previous + * timer period. If this is inappropriate, for example if metrics were being + * reported by some transient object in an application, the remove() + * method can be used to remove the row and thus stop the data from being + * sent.

+ * + * Note that the update() method is atomic. This means that it is + * safe for different threads to be updating the same metric. More precisely, + * it is OK for different threads to call update() on MetricsRecord instances + * with the same set of tag names and tag values. Different threads should + * not use the same MetricsRecord instance at the same time. + */ +public interface MetricsRecord { + + /** + * Returns the record name. + * + * @return the record name + */ + public abstract String getRecordName(); + + /** + * Sets the named tag to the specified value. The tagValue may be null, + * which is treated the same as an empty String. + * + * @param tagName name of the tag + * @param tagValue new value of the tag + * @throws MetricsException if the tagName conflicts with the configuration + */ + public abstract void setTag(String tagName, String tagValue); + + /** + * Sets the named tag to the specified value. + * + * @param tagName name of the tag + * @param tagValue new value of the tag + * @throws MetricsException if the tagName conflicts with the configuration + */ + public abstract void setTag(String tagName, int tagValue); + + /** + * Sets the named tag to the specified value. + * + * @param tagName name of the tag + * @param tagValue new value of the tag + * @throws MetricsException if the tagName conflicts with the configuration + */ + public abstract void setTag(String tagName, long tagValue); + + /** + * Sets the named tag to the specified value. + * + * @param tagName name of the tag + * @param tagValue new value of the tag + * @throws MetricsException if the tagName conflicts with the configuration + */ + public abstract void setTag(String tagName, short tagValue); + + /** + * Sets the named tag to the specified value. + * + * @param tagName name of the tag + * @param tagValue new value of the tag + * @throws MetricsException if the tagName conflicts with the configuration + */ + public abstract void setTag(String tagName, byte tagValue); + + /** + * Removes any tag of the specified name. + * + * @param tagName name of a tag + */ + public abstract void removeTag(String tagName); + + /** + * Sets the named metric to the specified value. + * + * @param metricName name of the metric + * @param metricValue new value of the metric + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public abstract void setMetric(String metricName, int metricValue); + + /** + * Sets the named metric to the specified value. + * + * @param metricName name of the metric + * @param metricValue new value of the metric + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public abstract void setMetric(String metricName, long metricValue); + + /** + * Sets the named metric to the specified value. + * + * @param metricName name of the metric + * @param metricValue new value of the metric + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public abstract void setMetric(String metricName, short metricValue); + + /** + * Sets the named metric to the specified value. + * + * @param metricName name of the metric + * @param metricValue new value of the metric + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public abstract void setMetric(String metricName, byte metricValue); + + /** + * Sets the named metric to the specified value. + * + * @param metricName name of the metric + * @param metricValue new value of the metric + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public abstract void setMetric(String metricName, float metricValue); + + /** + * Increments the named metric by the specified value. + * + * @param metricName name of the metric + * @param metricValue incremental value + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public abstract void incrMetric(String metricName, int metricValue); + + /** + * Increments the named metric by the specified value. + * + * @param metricName name of the metric + * @param metricValue incremental value + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public abstract void incrMetric(String metricName, long metricValue); + + /** + * Increments the named metric by the specified value. + * + * @param metricName name of the metric + * @param metricValue incremental value + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public abstract void incrMetric(String metricName, short metricValue); + + /** + * Increments the named metric by the specified value. + * + * @param metricName name of the metric + * @param metricValue incremental value + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public abstract void incrMetric(String metricName, byte metricValue); + + /** + * Increments the named metric by the specified value. + * + * @param metricName name of the metric + * @param metricValue incremental value + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public abstract void incrMetric(String metricName, float metricValue); + + /** + * Updates the table of buffered data which is to be sent periodically. + * If the tag values match an existing row, that row is updated; + * otherwise, a new row is added. + */ + public abstract void update(); + + /** + * Removes, from the buffered data table, all rows having tags + * that equal the tags that have been set on this record. For example, + * if there are no tags on this record, all rows for this record name + * would be removed. Or, if there is a single tag on this record, then + * just rows containing a tag with the same name and value would be removed. + */ + public abstract void remove(); + +} diff --git a/src/java/org/apache/hadoop/metrics/MetricsServlet.java b/src/java/org/apache/hadoop/metrics/MetricsServlet.java new file mode 100644 index 00000000000..44c0bd39654 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/MetricsServlet.java @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.hadoop.metrics.spi.OutputRecord; +import org.apache.hadoop.metrics.spi.AbstractMetricsContext.MetricMap; +import org.apache.hadoop.metrics.spi.AbstractMetricsContext.TagMap; +import org.mortbay.util.ajax.JSON; +import org.mortbay.util.ajax.JSON.Output; + +/** + * A servlet to print out metrics data. By default, the servlet returns a + * textual representation (no promises are made for parseability), and + * users can use "?format=json" for parseable output. + */ +public class MetricsServlet extends HttpServlet { + + /** + * A helper class to hold a TagMap and MetricMap. + */ + static class TagsMetricsPair implements JSON.Convertible { + final TagMap tagMap; + final MetricMap metricMap; + + public TagsMetricsPair(TagMap tagMap, MetricMap metricMap) { + this.tagMap = tagMap; + this.metricMap = metricMap; + } + + @SuppressWarnings("unchecked") + public void fromJSON(Map map) { + throw new UnsupportedOperationException(); + } + + /** Converts to JSON by providing an array. */ + public void toJSON(Output out) { + out.add(new Object[] { tagMap, metricMap }); + } + } + + /** + * Collects all metric data, and returns a map: + * contextName -> recordName -> [ (tag->tagValue), (metric->metricValue) ]. + * The values are either String or Number. The final value is implemented + * as a list of TagsMetricsPair. + */ + Map>> makeMap( + Collection contexts) throws IOException { + Map>> map = + new TreeMap>>(); + + for (MetricsContext context : contexts) { + Map> records = + new TreeMap>(); + map.put(context.getContextName(), records); + + for (Map.Entry> r : + context.getAllRecords().entrySet()) { + List metricsAndTags = + new ArrayList(); + records.put(r.getKey(), metricsAndTags); + for (OutputRecord outputRecord : r.getValue()) { + TagMap tagMap = outputRecord.getTagsCopy(); + MetricMap metricMap = outputRecord.getMetricsCopy(); + metricsAndTags.add(new TagsMetricsPair(tagMap, metricMap)); + } + } + } + return map; + } + + @Override + public void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + PrintWriter out = new PrintWriter(response.getOutputStream()); + String format = request.getParameter("format"); + Collection allContexts = + ContextFactory.getFactory().getAllContexts(); + if ("json".equals(format)) { + // Uses Jetty's built-in JSON support to convert the map into JSON. + out.print(new JSON().toJSON(makeMap(allContexts))); + } else { + printMap(out, makeMap(allContexts)); + } + out.close(); + } + + /** + * Prints metrics data in a multi-line text form. + */ + void printMap(PrintWriter out, Map>> map) { + for (Map.Entry>> context : map.entrySet()) { + out.println(context.getKey()); + for (Map.Entry> record : context.getValue().entrySet()) { + indent(out, 1); + out.println(record.getKey()); + for (TagsMetricsPair pair : record.getValue()) { + indent(out, 2); + // Prints tag values in the form "{key=value,key=value}:" + out.print("{"); + boolean first = true; + for (Map.Entry tagValue : pair.tagMap.entrySet()) { + if (first) { + first = false; + } else { + out.print(","); + } + out.print(tagValue.getKey()); + out.print("="); + out.print(tagValue.getValue().toString()); + } + out.println("}:"); + + // Now print metric values, one per line + for (Map.Entry metricValue : + pair.metricMap.entrySet()) { + indent(out, 3); + out.print(metricValue.getKey()); + out.print("="); + out.println(metricValue.getValue().toString()); + } + } + } + } + } + + private void indent(PrintWriter out, int indent) { + for (int i = 0; i < indent; ++i) { + out.append(" "); + } + } +} diff --git a/src/java/org/apache/hadoop/metrics/MetricsUtil.java b/src/java/org/apache/hadoop/metrics/MetricsUtil.java new file mode 100644 index 00000000000..09b9de62ea5 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/MetricsUtil.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics; + +import java.net.InetAddress; +import java.net.UnknownHostException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Utility class to simplify creation and reporting of hadoop metrics. + * + * For examples of usage, see NameNodeMetrics. + * @see org.apache.hadoop.metrics.MetricsRecord + * @see org.apache.hadoop.metrics.MetricsContext + * @see org.apache.hadoop.metrics.ContextFactory + */ +public class MetricsUtil { + + public static final Log LOG = + LogFactory.getLog(MetricsUtil.class); + + /** + * Don't allow creation of a new instance of Metrics + */ + private MetricsUtil() {} + + public static MetricsContext getContext(String contextName) { + return getContext(contextName, contextName); + } + + /** + * Utility method to return the named context. + * If the desired context cannot be created for any reason, the exception + * is logged, and a null context is returned. + */ + public static MetricsContext getContext(String refName, String contextName) { + MetricsContext metricsContext; + try { + metricsContext = + ContextFactory.getFactory().getContext(refName, contextName); + if (!metricsContext.isMonitoring()) { + metricsContext.startMonitoring(); + } + } catch (Exception ex) { + LOG.error("Unable to create metrics context " + contextName, ex); + metricsContext = ContextFactory.getNullContext(contextName); + } + return metricsContext; + } + + /** + * Utility method to create and return new metrics record instance within the + * given context. This record is tagged with the host name. + * + * @param context the context + * @param recordName name of the record + * @return newly created metrics record + */ + public static MetricsRecord createRecord(MetricsContext context, + String recordName) + { + MetricsRecord metricsRecord = context.createRecord(recordName); + metricsRecord.setTag("hostName", getHostName()); + return metricsRecord; + } + + /** + * Returns the host name. If the host name is unobtainable, logs the + * exception and returns "unknown". + */ + private static String getHostName() { + String hostName = null; + try { + hostName = InetAddress.getLocalHost().getHostName(); + } + catch (UnknownHostException ex) { + LOG.info("Unable to obtain hostName", ex); + hostName = "unknown"; + } + return hostName; + } + +} diff --git a/src/java/org/apache/hadoop/metrics/Updater.java b/src/java/org/apache/hadoop/metrics/Updater.java new file mode 100644 index 00000000000..e418ec09c29 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/Updater.java @@ -0,0 +1,33 @@ +/* + * Updater.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics; + +/** + * Call-back interface. See MetricsContext.registerUpdater(). + */ +public interface Updater { + + /** + * Timer-based call-back from the metric library. + */ + public abstract void doUpdates(MetricsContext context); + +} diff --git a/src/java/org/apache/hadoop/metrics/file/FileContext.java b/src/java/org/apache/hadoop/metrics/file/FileContext.java new file mode 100644 index 00000000000..16193276974 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/file/FileContext.java @@ -0,0 +1,139 @@ +/* + * FileContext.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics.file; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; + +import org.apache.hadoop.metrics.ContextFactory; +import org.apache.hadoop.metrics.spi.AbstractMetricsContext; +import org.apache.hadoop.metrics.spi.OutputRecord; + +/** + * Metrics context for writing metrics to a file.

+ * + * This class is configured by setting ContextFactory attributes which in turn + * are usually configured through a properties file. All the attributes are + * prefixed by the contextName. For example, the properties file might contain: + *

+ * myContextName.fileName=/tmp/metrics.log
+ * myContextName.period=5
+ * 
+ */ +public class FileContext extends AbstractMetricsContext { + + /* Configuration attribute names */ + protected static final String FILE_NAME_PROPERTY = "fileName"; + protected static final String PERIOD_PROPERTY = "period"; + + private File file = null; // file for metrics to be written to + private PrintWriter writer = null; + + /** Creates a new instance of FileContext */ + public FileContext() {} + + public void init(String contextName, ContextFactory factory) { + super.init(contextName, factory); + + String fileName = getAttribute(FILE_NAME_PROPERTY); + if (fileName != null) { + file = new File(fileName); + } + + parseAndSetPeriod(PERIOD_PROPERTY); + } + + /** + * Returns the configured file name, or null. + */ + public String getFileName() { + if (file == null) { + return null; + } else { + return file.getName(); + } + } + + /** + * Starts or restarts monitoring, by opening in append-mode, the + * file specified by the fileName attribute, + * if specified. Otherwise the data will be written to standard + * output. + */ + public void startMonitoring() + throws IOException + { + if (file == null) { + writer = new PrintWriter(new BufferedOutputStream(System.out)); + } else { + writer = new PrintWriter(new FileWriter(file, true)); + } + super.startMonitoring(); + } + + /** + * Stops monitoring, closing the file. + * @see #close() + */ + public void stopMonitoring() { + super.stopMonitoring(); + + if (writer != null) { + writer.close(); + writer = null; + } + } + + /** + * Emits a metrics record to a file. + */ + public void emitRecord(String contextName, String recordName, OutputRecord outRec) { + writer.print(contextName); + writer.print("."); + writer.print(recordName); + String separator = ": "; + for (String tagName : outRec.getTagNames()) { + writer.print(separator); + separator = ", "; + writer.print(tagName); + writer.print("="); + writer.print(outRec.getTag(tagName)); + } + for (String metricName : outRec.getMetricNames()) { + writer.print(separator); + separator = ", "; + writer.print(metricName); + writer.print("="); + writer.print(outRec.getMetric(metricName)); + } + writer.println(); + } + + /** + * Flushes the output writer, forcing updates to disk. + */ + public void flush() { + writer.flush(); + } +} diff --git a/src/java/org/apache/hadoop/metrics/file/package.html b/src/java/org/apache/hadoop/metrics/file/package.html new file mode 100644 index 00000000000..73584864e27 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/file/package.html @@ -0,0 +1,43 @@ + + + + + +Implementation of the metrics package that writes the metrics to a file. +Programmers should not normally need to use this package directly. Instead +they should use org.hadoop.metrics. + +

+These are the implementation specific factory attributes +(See ContextFactory.getFactory()): + +

+
contextName.fileName
+
The path of the file to which metrics in context contextName + are to be appended. If this attribute is not specified, the metrics + are written to standard output by default.
+ +
contextName.period
+
The period in seconds on which the metric data is written to the + file.
+ +
+ + + + diff --git a/src/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java b/src/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java new file mode 100644 index 00000000000..1affb02f727 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/ganglia/GangliaContext.java @@ -0,0 +1,231 @@ +/* + * GangliaContext.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics.ganglia; + +import java.io.IOException; +import java.net.DatagramPacket; +import java.net.DatagramSocket; +import java.net.SocketAddress; +import java.net.SocketException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.metrics.ContextFactory; +import org.apache.hadoop.metrics.MetricsException; +import org.apache.hadoop.metrics.spi.AbstractMetricsContext; +import org.apache.hadoop.metrics.spi.OutputRecord; +import org.apache.hadoop.metrics.spi.Util; + +/** + * Context for sending metrics to Ganglia. + * + */ +public class GangliaContext extends AbstractMetricsContext { + + private static final String PERIOD_PROPERTY = "period"; + private static final String SERVERS_PROPERTY = "servers"; + private static final String UNITS_PROPERTY = "units"; + private static final String SLOPE_PROPERTY = "slope"; + private static final String TMAX_PROPERTY = "tmax"; + private static final String DMAX_PROPERTY = "dmax"; + + private static final String DEFAULT_UNITS = ""; + private static final String DEFAULT_SLOPE = "both"; + private static final int DEFAULT_TMAX = 60; + private static final int DEFAULT_DMAX = 0; + private static final int DEFAULT_PORT = 8649; + private static final int BUFFER_SIZE = 1500; // as per libgmond.c + + private final Log LOG = LogFactory.getLog(this.getClass()); + + private static final Map typeTable = new HashMap(5); + + static { + typeTable.put(String.class, "string"); + typeTable.put(Byte.class, "int8"); + typeTable.put(Short.class, "int16"); + typeTable.put(Integer.class, "int32"); + typeTable.put(Long.class, "float"); + typeTable.put(Float.class, "float"); + } + + private byte[] buffer = new byte[BUFFER_SIZE]; + private int offset; + + private List metricsServers; + private Map unitsTable; + private Map slopeTable; + private Map tmaxTable; + private Map dmaxTable; + + private DatagramSocket datagramSocket; + + /** Creates a new instance of GangliaContext */ + public GangliaContext() { + } + + public void init(String contextName, ContextFactory factory) { + super.init(contextName, factory); + parseAndSetPeriod(PERIOD_PROPERTY); + + metricsServers = + Util.parse(getAttribute(SERVERS_PROPERTY), DEFAULT_PORT); + + unitsTable = getAttributeTable(UNITS_PROPERTY); + slopeTable = getAttributeTable(SLOPE_PROPERTY); + tmaxTable = getAttributeTable(TMAX_PROPERTY); + dmaxTable = getAttributeTable(DMAX_PROPERTY); + + try { + datagramSocket = new DatagramSocket(); + } + catch (SocketException se) { + se.printStackTrace(); + } + } + + public void emitRecord(String contextName, String recordName, + OutputRecord outRec) + throws IOException { + // Setup so that the records have the proper leader names so they are + // unambiguous at the ganglia level, and this prevents a lot of rework + StringBuilder sb = new StringBuilder(); + sb.append(contextName); + sb.append('.'); + sb.append(recordName); + sb.append('.'); + int sbBaseLen = sb.length(); + + // emit each metric in turn + for (String metricName : outRec.getMetricNames()) { + Object metric = outRec.getMetric(metricName); + String type = typeTable.get(metric.getClass()); + if (type != null) { + sb.append(metricName); + emitMetric(sb.toString(), type, metric.toString()); + sb.setLength(sbBaseLen); + } else { + LOG.warn("Unknown metrics type: " + metric.getClass()); + } + } + } + + private void emitMetric(String name, String type, String value) + throws IOException { + String units = getUnits(name); + int slope = getSlope(name); + int tmax = getTmax(name); + int dmax = getDmax(name); + + offset = 0; + xdr_int(0); // metric_user_defined + xdr_string(type); + xdr_string(name); + xdr_string(value); + xdr_string(units); + xdr_int(slope); + xdr_int(tmax); + xdr_int(dmax); + + for (SocketAddress socketAddress : metricsServers) { + DatagramPacket packet = + new DatagramPacket(buffer, offset, socketAddress); + datagramSocket.send(packet); + } + } + + private String getUnits(String metricName) { + String result = unitsTable.get(metricName); + if (result == null) { + result = DEFAULT_UNITS; + } + return result; + } + + private int getSlope(String metricName) { + String slopeString = slopeTable.get(metricName); + if (slopeString == null) { + slopeString = DEFAULT_SLOPE; + } + return ("zero".equals(slopeString) ? 0 : 3); // see gmetric.c + } + + private int getTmax(String metricName) { + if (tmaxTable == null) { + return DEFAULT_TMAX; + } + String tmaxString = tmaxTable.get(metricName); + if (tmaxString == null) { + return DEFAULT_TMAX; + } + else { + return Integer.parseInt(tmaxString); + } + } + + private int getDmax(String metricName) { + String dmaxString = dmaxTable.get(metricName); + if (dmaxString == null) { + return DEFAULT_DMAX; + } + else { + return Integer.parseInt(dmaxString); + } + } + + /** + * Puts a string into the buffer by first writing the size of the string + * as an int, followed by the bytes of the string, padded if necessary to + * a multiple of 4. + */ + private void xdr_string(String s) { + byte[] bytes = s.getBytes(); + int len = bytes.length; + xdr_int(len); + System.arraycopy(bytes, 0, buffer, offset, len); + offset += len; + pad(); + } + + /** + * Pads the buffer with zero bytes up to the nearest multiple of 4. + */ + private void pad() { + int newOffset = ((offset + 3) / 4) * 4; + while (offset < newOffset) { + buffer[offset++] = 0; + } + } + + /** + * Puts an integer into the buffer as 4 bytes, big-endian. + */ + private void xdr_int(int i) { + buffer[offset++] = (byte)((i >> 24) & 0xff); + buffer[offset++] = (byte)((i >> 16) & 0xff); + buffer[offset++] = (byte)((i >> 8) & 0xff); + buffer[offset++] = (byte)(i & 0xff); + } +} diff --git a/src/java/org/apache/hadoop/metrics/ganglia/package.html b/src/java/org/apache/hadoop/metrics/ganglia/package.html new file mode 100644 index 00000000000..87598e50332 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/ganglia/package.html @@ -0,0 +1,74 @@ + + + + + + + + +Implementation of the metrics package that sends metric data to +Ganglia. +Programmers should not normally need to use this package directly. Instead +they should use org.hadoop.metrics. + +

+These are the implementation specific factory attributes +(See ContextFactory.getFactory()): + +

+
contextName.servers
+
Space and/or comma separated sequence of servers to which UDP + messages should be sent.
+ +
contextName.period
+
The period in seconds on which the metric data is sent to the + server(s).
+ +
contextName.units.recordName.metricName
+
The units for the specified metric in the specified record.
+ +
contextName.slope.recordName.metricName
+
The slope for the specified metric in the specified record.
+ +
contextName.tmax.recordName.metricName
+
The tmax for the specified metric in the specified record.
+ +
contextName.dmax.recordName.metricName
+
The dmax for the specified metric in the specified record.
+ +
+ + + + diff --git a/src/java/org/apache/hadoop/metrics/jvm/EventCounter.java b/src/java/org/apache/hadoop/metrics/jvm/EventCounter.java new file mode 100644 index 00000000000..deb476eed69 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/jvm/EventCounter.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.jvm; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.log4j.AppenderSkeleton; +import org.apache.log4j.Level; +import org.apache.log4j.spi.LoggingEvent; + +/** + * A log4J Appender that simply counts logging events in three levels: + * fatal, error and warn. + */ +public class EventCounter extends AppenderSkeleton { + + private static final int FATAL = 0; + private static final int ERROR = 1; + private static final int WARN = 2; + private static final int INFO = 3; + + private static class EventCounts { + private final long[] counts = { 0, 0, 0, 0 }; + + private synchronized void incr(int i) { + ++counts[i]; + } + + private synchronized long get(int i) { + return counts[i]; + } + } + private static EventCounts counts = new EventCounts(); + + public static long getFatal() { + return counts.get(FATAL); + } + + public static long getError() { + return counts.get(ERROR); + } + + public static long getWarn() { + return counts.get(WARN); + } + + public static long getInfo() { + return counts.get(INFO); + } + + public void append(LoggingEvent event) { + Level level = event.getLevel(); + if (level == Level.INFO) { + counts.incr(INFO); + } + else if (level == Level.WARN) { + counts.incr(WARN); + } + else if (level == Level.ERROR) { + counts.incr(ERROR); + } + else if (level == Level.FATAL) { + counts.incr(FATAL); + } + + } + + // Strange: these two methods are abstract in AppenderSkeleton, but not + // included in the javadoc (log4j 1.2.13). + + public void close() { + } + public boolean requiresLayout() { + return false; + } + + + +} diff --git a/src/java/org/apache/hadoop/metrics/jvm/JvmMetrics.java b/src/java/org/apache/hadoop/metrics/jvm/JvmMetrics.java new file mode 100644 index 00000000000..c51916875bc --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/jvm/JvmMetrics.java @@ -0,0 +1,191 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.jvm; + +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.lang.management.MemoryUsage; +import java.lang.management.ThreadInfo; +import java.lang.management.ThreadMXBean; +import org.apache.hadoop.metrics.MetricsContext; +import org.apache.hadoop.metrics.MetricsRecord; +import org.apache.hadoop.metrics.MetricsUtil; +import org.apache.hadoop.metrics.Updater; + +import static java.lang.Thread.State.*; +import java.lang.management.GarbageCollectorMXBean; +import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Singleton class which reports Java Virtual Machine metrics to the metrics API. + * Any application can create an instance of this class in order to emit + * Java VM metrics. + */ +public class JvmMetrics implements Updater { + + private static final float M = 1024*1024; + private static JvmMetrics theInstance = null; + private static Log log = LogFactory.getLog(JvmMetrics.class); + + private MetricsRecord metrics; + + // garbage collection counters + private long gcCount = 0; + private long gcTimeMillis = 0; + + // logging event counters + private long fatalCount = 0; + private long errorCount = 0; + private long warnCount = 0; + private long infoCount = 0; + + public synchronized static JvmMetrics init(String processName, String sessionId) { + return init(processName, sessionId, "metrics"); + } + + public synchronized static JvmMetrics init(String processName, String sessionId, + String recordName) { + if (theInstance != null) { + log.info("Cannot initialize JVM Metrics with processName=" + + processName + ", sessionId=" + sessionId + + " - already initialized"); + } + else { + log.info("Initializing JVM Metrics with processName=" + + processName + ", sessionId=" + sessionId); + theInstance = new JvmMetrics(processName, sessionId, recordName); + } + return theInstance; + } + + /** Creates a new instance of JvmMetrics */ + private JvmMetrics(String processName, String sessionId, + String recordName) { + MetricsContext context = MetricsUtil.getContext("jvm"); + metrics = MetricsUtil.createRecord(context, recordName); + metrics.setTag("processName", processName); + metrics.setTag("sessionId", sessionId); + context.registerUpdater(this); + } + + /** + * This will be called periodically (with the period being configuration + * dependent). + */ + public void doUpdates(MetricsContext context) { + doMemoryUpdates(); + doGarbageCollectionUpdates(); + doThreadUpdates(); + doEventCountUpdates(); + metrics.update(); + } + + private void doMemoryUpdates() { + MemoryMXBean memoryMXBean = + ManagementFactory.getMemoryMXBean(); + MemoryUsage memNonHeap = + memoryMXBean.getNonHeapMemoryUsage(); + MemoryUsage memHeap = + memoryMXBean.getHeapMemoryUsage(); + metrics.setMetric("memNonHeapUsedM", memNonHeap.getUsed()/M); + metrics.setMetric("memNonHeapCommittedM", memNonHeap.getCommitted()/M); + metrics.setMetric("memHeapUsedM", memHeap.getUsed()/M); + metrics.setMetric("memHeapCommittedM", memHeap.getCommitted()/M); + } + + private void doGarbageCollectionUpdates() { + List gcBeans = + ManagementFactory.getGarbageCollectorMXBeans(); + long count = 0; + long timeMillis = 0; + for (GarbageCollectorMXBean gcBean : gcBeans) { + count += gcBean.getCollectionCount(); + timeMillis += gcBean.getCollectionTime(); + } + metrics.incrMetric("gcCount", (int)(count - gcCount)); + metrics.incrMetric("gcTimeMillis", (int)(timeMillis - gcTimeMillis)); + + gcCount = count; + gcTimeMillis = timeMillis; + } + + private void doThreadUpdates() { + ThreadMXBean threadMXBean = + ManagementFactory.getThreadMXBean(); + long threadIds[] = + threadMXBean.getAllThreadIds(); + ThreadInfo[] threadInfos = + threadMXBean.getThreadInfo(threadIds, 0); + + int threadsNew = 0; + int threadsRunnable = 0; + int threadsBlocked = 0; + int threadsWaiting = 0; + int threadsTimedWaiting = 0; + int threadsTerminated = 0; + + for (ThreadInfo threadInfo : threadInfos) { + // threadInfo is null if the thread is not alive or doesn't exist + if (threadInfo == null) continue; + Thread.State state = threadInfo.getThreadState(); + if (state == NEW) { + threadsNew++; + } + else if (state == RUNNABLE) { + threadsRunnable++; + } + else if (state == BLOCKED) { + threadsBlocked++; + } + else if (state == WAITING) { + threadsWaiting++; + } + else if (state == TIMED_WAITING) { + threadsTimedWaiting++; + } + else if (state == TERMINATED) { + threadsTerminated++; + } + } + metrics.setMetric("threadsNew", threadsNew); + metrics.setMetric("threadsRunnable", threadsRunnable); + metrics.setMetric("threadsBlocked", threadsBlocked); + metrics.setMetric("threadsWaiting", threadsWaiting); + metrics.setMetric("threadsTimedWaiting", threadsTimedWaiting); + metrics.setMetric("threadsTerminated", threadsTerminated); + } + + private void doEventCountUpdates() { + long newFatal = EventCounter.getFatal(); + long newError = EventCounter.getError(); + long newWarn = EventCounter.getWarn(); + long newInfo = EventCounter.getInfo(); + + metrics.incrMetric("logFatal", (int)(newFatal - fatalCount)); + metrics.incrMetric("logError", (int)(newError - errorCount)); + metrics.incrMetric("logWarn", (int)(newWarn - warnCount)); + metrics.incrMetric("logInfo", (int)(newInfo - infoCount)); + + fatalCount = newFatal; + errorCount = newError; + warnCount = newWarn; + infoCount = newInfo; + } +} diff --git a/src/java/org/apache/hadoop/metrics/package.html b/src/java/org/apache/hadoop/metrics/package.html new file mode 100644 index 00000000000..dd16e382dac --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/package.html @@ -0,0 +1,159 @@ + + + + + + org.apache.hadoop.metrics + + +This package defines an API for reporting performance metric information. +

+The API is abstract so that it can be implemented on top of +a variety of metrics client libraries. The choice of +client library is a configuration option, and different +modules within the same application can use +different metrics implementation libraries. +

+Sub-packages: +

+
org.apache.hadoop.metrics.spi
+
The abstract Server Provider Interface package. Those wishing to + integrate the metrics API with a particular metrics client library should + extend this package.
+ +
org.apache.hadoop.metrics.file
+
An implementation package which writes the metric data to + a file, or sends it to the standard output stream.
+ +
org.apache.hadoop.metrics.ganglia
+
An implementation package which sends metric data to + Ganglia.
+
+ +

Introduction to the Metrics API

+ +Here is a simple example of how to use this package to report a single +metric value: +
+    private ContextFactory contextFactory = ContextFactory.getFactory();
+    
+    void reportMyMetric(float myMetric) {
+        MetricsContext myContext = contextFactory.getContext("myContext");
+        MetricsRecord myRecord = myContext.getRecord("myRecord");
+        myRecord.setMetric("myMetric", myMetric);
+        myRecord.update();
+    }
+
+ +In this example there are three names: +
+
myContext
+
The context name will typically identify either the application, or else a + module within an application or library.
+ +
myRecord
+
The record name generally identifies some entity for which a set of + metrics are to be reported. For example, you could have a record named + "cacheStats" for reporting a number of statistics relating to the usage of + some cache in your application.
+ +
myMetric
+
This identifies a particular metric. For example, you might have metrics + named "cache_hits" and "cache_misses". +
+
+ +

Tags

+ +In some cases it is useful to have multiple records with the same name. For +example, suppose that you want to report statistics about each disk on a computer. +In this case, the record name would be something like "diskStats", but you also +need to identify the disk which is done by adding a tag to the record. +The code could look something like this: +
+    private MetricsRecord diskStats =
+            contextFactory.getContext("myContext").getRecord("diskStats");
+            
+    void reportDiskMetrics(String diskName, float diskBusy, float diskUsed) {
+        diskStats.setTag("diskName", diskName);
+        diskStats.setMetric("diskBusy", diskBusy);
+        diskStats.setMetric("diskUsed", diskUsed);
+        diskStats.update();
+    }
+
+ +

Buffering and Callbacks

+ +Data is not sent immediately to the metrics system when +MetricsRecord.update() is called. Instead it is stored in an +internal table, and the contents of the table are sent periodically. +This can be important for two reasons: +
    +
  1. It means that a programmer is free to put calls to this API in an + inner loop, since updates can be very frequent without slowing down + the application significantly.
  2. +
  3. Some implementations can gain efficiency by combining many metrics + into a single UDP message.
  4. +
+ +The API provides a timer-based callback via the +registerUpdater() method. The benefit of this +versus using java.util.Timer is that the callbacks will be done +immediately before sending the data, making the data as current as possible. + +

Configuration

+ +It is possible to programmatically examine and modify configuration data +before creating a context, like this: +
+    ContextFactory factory = ContextFactory.getFactory();
+    ... examine and/or modify factory attributes ...
+    MetricsContext context = factory.getContext("myContext");
+
+The factory attributes can be examined and modified using the following +ContextFactorymethods: +
    +
  • Object getAttribute(String attributeName)
  • +
  • String[] getAttributeNames()
  • +
  • void setAttribute(String name, Object value)
  • +
  • void removeAttribute(attributeName)
  • +
+ +

+ContextFactory.getFactory() initializes the factory attributes by +reading the properties file hadoop-metrics.properties if it exists +on the class path. + +

+A factory attribute named: +

+contextName.class
+
+should have as its value the fully qualified name of the class to be +instantiated by a call of the CodeFactory method +getContext(contextName). If this factory attribute is not +specified, the default is to instantiate +org.apache.hadoop.metrics.file.FileContext. + +

+Other factory attributes are specific to a particular implementation of this +API and are documented elsewhere. For example, configuration attributes for +the file and Ganglia implementations can be found in the javadoc for +their respective packages. + + diff --git a/src/java/org/apache/hadoop/metrics/spi/AbstractMetricsContext.java b/src/java/org/apache/hadoop/metrics/spi/AbstractMetricsContext.java new file mode 100644 index 00000000000..e6f85ae3781 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/spi/AbstractMetricsContext.java @@ -0,0 +1,475 @@ +/* + * AbstractMetricsContext.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics.spi; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Timer; +import java.util.TimerTask; +import java.util.TreeMap; +import java.util.Map.Entry; + +import org.apache.hadoop.metrics.ContextFactory; +import org.apache.hadoop.metrics.MetricsContext; +import org.apache.hadoop.metrics.MetricsException; +import org.apache.hadoop.metrics.MetricsRecord; +import org.apache.hadoop.metrics.Updater; + +/** + * The main class of the Service Provider Interface. This class should be + * extended in order to integrate the Metrics API with a specific metrics + * client library.

+ * + * This class implements the internal table of metric data, and the timer + * on which data is to be sent to the metrics system. Subclasses must + * override the abstract emitRecord method in order to transmit + * the data.

+ */ +public abstract class AbstractMetricsContext implements MetricsContext { + + private int period = MetricsContext.DEFAULT_PERIOD; + private Timer timer = null; + + private Set updaters = new HashSet(1); + private volatile boolean isMonitoring = false; + + private ContextFactory factory = null; + private String contextName = null; + + public static class TagMap extends TreeMap { + private static final long serialVersionUID = 3546309335061952993L; + TagMap() { + super(); + } + TagMap(TagMap orig) { + super(orig); + } + /** + * Returns true if this tagmap contains every tag in other. + */ + public boolean containsAll(TagMap other) { + for (Map.Entry entry : other.entrySet()) { + Object value = get(entry.getKey()); + if (value == null || !value.equals(entry.getValue())) { + // either key does not exist here, or the value is different + return false; + } + } + return true; + } + } + + public static class MetricMap extends TreeMap { + private static final long serialVersionUID = -7495051861141631609L; + MetricMap() { + super(); + } + MetricMap(MetricMap orig) { + super(orig); + } + } + + static class RecordMap extends HashMap { + private static final long serialVersionUID = 259835619700264611L; + } + + private Map bufferedData = new HashMap(); + + + /** + * Creates a new instance of AbstractMetricsContext + */ + protected AbstractMetricsContext() { + } + + /** + * Initializes the context. + */ + public void init(String contextName, ContextFactory factory) + { + this.contextName = contextName; + this.factory = factory; + } + + /** + * Convenience method for subclasses to access factory attributes. + */ + protected String getAttribute(String attributeName) { + String factoryAttribute = contextName + "." + attributeName; + return (String) factory.getAttribute(factoryAttribute); + } + + /** + * Returns an attribute-value map derived from the factory attributes + * by finding all factory attributes that begin with + * contextName.tableName. The returned map consists of + * those attributes with the contextName and tableName stripped off. + */ + protected Map getAttributeTable(String tableName) { + String prefix = contextName + "." + tableName + "."; + Map result = new HashMap(); + for (String attributeName : factory.getAttributeNames()) { + if (attributeName.startsWith(prefix)) { + String name = attributeName.substring(prefix.length()); + String value = (String) factory.getAttribute(attributeName); + result.put(name, value); + } + } + return result; + } + + /** + * Returns the context name. + */ + public String getContextName() { + return contextName; + } + + /** + * Returns the factory by which this context was created. + */ + public ContextFactory getContextFactory() { + return factory; + } + + /** + * Starts or restarts monitoring, the emitting of metrics records. + */ + public synchronized void startMonitoring() + throws IOException { + if (!isMonitoring) { + startTimer(); + isMonitoring = true; + } + } + + /** + * Stops monitoring. This does not free buffered data. + * @see #close() + */ + public synchronized void stopMonitoring() { + if (isMonitoring) { + stopTimer(); + isMonitoring = false; + } + } + + /** + * Returns true if monitoring is currently in progress. + */ + public boolean isMonitoring() { + return isMonitoring; + } + + /** + * Stops monitoring and frees buffered data, returning this + * object to its initial state. + */ + public synchronized void close() { + stopMonitoring(); + clearUpdaters(); + } + + /** + * Creates a new AbstractMetricsRecord instance with the given recordName. + * Throws an exception if the metrics implementation is configured with a fixed + * set of record names and recordName is not in that set. + * + * @param recordName the name of the record + * @throws MetricsException if recordName conflicts with configuration data + */ + public final synchronized MetricsRecord createRecord(String recordName) { + if (bufferedData.get(recordName) == null) { + bufferedData.put(recordName, new RecordMap()); + } + return newRecord(recordName); + } + + /** + * Subclasses should override this if they subclass MetricsRecordImpl. + * @param recordName the name of the record + * @return newly created instance of MetricsRecordImpl or subclass + */ + protected MetricsRecord newRecord(String recordName) { + return new MetricsRecordImpl(recordName, this); + } + + /** + * Registers a callback to be called at time intervals determined by + * the configuration. + * + * @param updater object to be run periodically; it should update + * some metrics records + */ + public synchronized void registerUpdater(final Updater updater) { + if (!updaters.contains(updater)) { + updaters.add(updater); + } + } + + /** + * Removes a callback, if it exists. + * + * @param updater object to be removed from the callback list + */ + public synchronized void unregisterUpdater(Updater updater) { + updaters.remove(updater); + } + + private synchronized void clearUpdaters() { + updaters.clear(); + } + + /** + * Starts timer if it is not already started + */ + private synchronized void startTimer() { + if (timer == null) { + timer = new Timer("Timer thread for monitoring " + getContextName(), + true); + TimerTask task = new TimerTask() { + public void run() { + try { + timerEvent(); + } + catch (IOException ioe) { + ioe.printStackTrace(); + } + } + }; + long millis = period * 1000; + timer.scheduleAtFixedRate(task, millis, millis); + } + } + + /** + * Stops timer if it is running + */ + private synchronized void stopTimer() { + if (timer != null) { + timer.cancel(); + timer = null; + } + } + + /** + * Timer callback. + */ + private void timerEvent() throws IOException { + if (isMonitoring) { + Collection myUpdaters; + synchronized (this) { + myUpdaters = new ArrayList(updaters); + } + // Run all the registered updates without holding a lock + // on this context + for (Updater updater : myUpdaters) { + try { + updater.doUpdates(this); + } + catch (Throwable throwable) { + throwable.printStackTrace(); + } + } + emitRecords(); + } + } + + /** + * Emits the records. + */ + private synchronized void emitRecords() throws IOException { + for (String recordName : bufferedData.keySet()) { + RecordMap recordMap = bufferedData.get(recordName); + synchronized (recordMap) { + Set> entrySet = recordMap.entrySet (); + for (Entry entry : entrySet) { + OutputRecord outRec = new OutputRecord(entry.getKey(), entry.getValue()); + emitRecord(contextName, recordName, outRec); + } + } + } + flush(); + } + + /** + * Retrieves all the records managed by this MetricsContext. + * Useful for monitoring systems that are polling-based. + * @return A non-null collection of all monitoring records. + */ + public synchronized Map> getAllRecords() { + Map> out = new TreeMap>(); + for (String recordName : bufferedData.keySet()) { + RecordMap recordMap = bufferedData.get(recordName); + synchronized (recordMap) { + List records = new ArrayList(); + Set> entrySet = recordMap.entrySet(); + for (Entry entry : entrySet) { + OutputRecord outRec = new OutputRecord(entry.getKey(), entry.getValue()); + records.add(outRec); + } + out.put(recordName, records); + } + } + return out; + } + + /** + * Sends a record to the metrics system. + */ + protected abstract void emitRecord(String contextName, String recordName, + OutputRecord outRec) throws IOException; + + /** + * Called each period after all records have been emitted, this method does nothing. + * Subclasses may override it in order to perform some kind of flush. + */ + protected void flush() throws IOException { + } + + /** + * Called by MetricsRecordImpl.update(). Creates or updates a row in + * the internal table of metric data. + */ + protected void update(MetricsRecordImpl record) { + String recordName = record.getRecordName(); + TagMap tagTable = record.getTagTable(); + Map metricUpdates = record.getMetricTable(); + + RecordMap recordMap = getRecordMap(recordName); + synchronized (recordMap) { + MetricMap metricMap = recordMap.get(tagTable); + if (metricMap == null) { + metricMap = new MetricMap(); + TagMap tagMap = new TagMap(tagTable); // clone tags + recordMap.put(tagMap, metricMap); + } + + Set> entrySet = metricUpdates.entrySet(); + for (Entry entry : entrySet) { + String metricName = entry.getKey (); + MetricValue updateValue = entry.getValue (); + Number updateNumber = updateValue.getNumber(); + Number currentNumber = metricMap.get(metricName); + if (currentNumber == null || updateValue.isAbsolute()) { + metricMap.put(metricName, updateNumber); + } + else { + Number newNumber = sum(updateNumber, currentNumber); + metricMap.put(metricName, newNumber); + } + } + } + } + + private synchronized RecordMap getRecordMap(String recordName) { + return bufferedData.get(recordName); + } + + /** + * Adds two numbers, coercing the second to the type of the first. + * + */ + private Number sum(Number a, Number b) { + if (a instanceof Integer) { + return Integer.valueOf(a.intValue() + b.intValue()); + } + else if (a instanceof Float) { + return new Float(a.floatValue() + b.floatValue()); + } + else if (a instanceof Short) { + return Short.valueOf((short)(a.shortValue() + b.shortValue())); + } + else if (a instanceof Byte) { + return Byte.valueOf((byte)(a.byteValue() + b.byteValue())); + } + else if (a instanceof Long) { + return Long.valueOf((a.longValue() + b.longValue())); + } + else { + // should never happen + throw new MetricsException("Invalid number type"); + } + + } + + /** + * Called by MetricsRecordImpl.remove(). Removes all matching rows in + * the internal table of metric data. A row matches if it has the same + * tag names and values as record, but it may also have additional + * tags. + */ + protected void remove(MetricsRecordImpl record) { + String recordName = record.getRecordName(); + TagMap tagTable = record.getTagTable(); + + RecordMap recordMap = getRecordMap(recordName); + synchronized (recordMap) { + Iterator it = recordMap.keySet().iterator(); + while (it.hasNext()) { + TagMap rowTags = it.next(); + if (rowTags.containsAll(tagTable)) { + it.remove(); + } + } + } + } + + /** + * Returns the timer period. + */ + public int getPeriod() { + return period; + } + + /** + * Sets the timer period + */ + protected void setPeriod(int period) { + this.period = period; + } + + /** + * If a period is set in the attribute passed in, override + * the default with it. + */ + protected void parseAndSetPeriod(String attributeName) { + String periodStr = getAttribute(attributeName); + if (periodStr != null) { + int period = 0; + try { + period = Integer.parseInt(periodStr); + } catch (NumberFormatException nfe) { + } + if (period <= 0) { + throw new MetricsException("Invalid period: " + periodStr); + } + setPeriod(period); + } + } +} diff --git a/src/java/org/apache/hadoop/metrics/spi/CompositeContext.java b/src/java/org/apache/hadoop/metrics/spi/CompositeContext.java new file mode 100644 index 00000000000..782fb30485c --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/spi/CompositeContext.java @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.spi; + +import java.io.IOException; +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.Method; +import java.lang.reflect.Proxy; +import java.util.ArrayList; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.metrics.ContextFactory; +import org.apache.hadoop.metrics.MetricsContext; +import org.apache.hadoop.metrics.MetricsException; +import org.apache.hadoop.metrics.MetricsRecord; +import org.apache.hadoop.metrics.MetricsUtil; +import org.apache.hadoop.metrics.Updater; + +public class CompositeContext extends AbstractMetricsContext { + + private static final Log LOG = LogFactory.getLog(CompositeContext.class); + private static final String ARITY_LABEL = "arity"; + private static final String SUB_FMT = "%s.sub%d"; + private final ArrayList subctxt = + new ArrayList(); + + public CompositeContext() { + } + + public void init(String contextName, ContextFactory factory) { + super.init(contextName, factory); + int nKids; + try { + String sKids = getAttribute(ARITY_LABEL); + nKids = Integer.valueOf(sKids); + } catch (Exception e) { + LOG.error("Unable to initialize composite metric " + contextName + + ": could not init arity", e); + return; + } + for (int i = 0; i < nKids; ++i) { + MetricsContext ctxt = MetricsUtil.getContext( + String.format(SUB_FMT, contextName, i), contextName); + if (null != ctxt) { + subctxt.add(ctxt); + } + } + } + + @Override + public MetricsRecord newRecord(String recordName) { + return (MetricsRecord) Proxy.newProxyInstance( + MetricsRecord.class.getClassLoader(), + new Class[] { MetricsRecord.class }, + new MetricsRecordDelegator(recordName, subctxt)); + } + + @Override + protected void emitRecord(String contextName, String recordName, + OutputRecord outRec) throws IOException { + for (MetricsContext ctxt : subctxt) { + try { + ((AbstractMetricsContext)ctxt).emitRecord( + contextName, recordName, outRec); + if (contextName == null || recordName == null || outRec == null) { + throw new IOException(contextName + ":" + recordName + ":" + outRec); + } + } catch (IOException e) { + LOG.warn("emitRecord failed: " + ctxt.getContextName(), e); + } + } + } + + @Override + protected void flush() throws IOException { + for (MetricsContext ctxt : subctxt) { + try { + ((AbstractMetricsContext)ctxt).flush(); + } catch (IOException e) { + LOG.warn("flush failed: " + ctxt.getContextName(), e); + } + } + } + + @Override + public void startMonitoring() throws IOException { + for (MetricsContext ctxt : subctxt) { + try { + ctxt.startMonitoring(); + } catch (IOException e) { + LOG.warn("startMonitoring failed: " + ctxt.getContextName(), e); + } + } + } + + @Override + public void stopMonitoring() { + for (MetricsContext ctxt : subctxt) { + ctxt.stopMonitoring(); + } + } + + /** + * Return true if all subcontexts are monitoring. + */ + @Override + public boolean isMonitoring() { + boolean ret = true; + for (MetricsContext ctxt : subctxt) { + ret &= ctxt.isMonitoring(); + } + return ret; + } + + @Override + public void close() { + for (MetricsContext ctxt : subctxt) { + ctxt.close(); + } + } + + @Override + public void registerUpdater(Updater updater) { + for (MetricsContext ctxt : subctxt) { + ctxt.registerUpdater(updater); + } + } + + @Override + public void unregisterUpdater(Updater updater) { + for (MetricsContext ctxt : subctxt) { + ctxt.unregisterUpdater(updater); + } + } + + private static class MetricsRecordDelegator implements InvocationHandler { + private static final Method m_getRecordName = initMethod(); + private static Method initMethod() { + try { + return MetricsRecord.class.getMethod("getRecordName", new Class[0]); + } catch (Exception e) { + throw new RuntimeException("Internal error", e); + } + } + + private final String recordName; + private final ArrayList subrecs; + + MetricsRecordDelegator(String recordName, ArrayList ctxts) { + this.recordName = recordName; + this.subrecs = new ArrayList(ctxts.size()); + for (MetricsContext ctxt : ctxts) { + subrecs.add(ctxt.createRecord(recordName)); + } + } + + public Object invoke(Object p, Method m, Object[] args) throws Throwable { + if (m_getRecordName.equals(m)) { + return recordName; + } + assert Void.TYPE.equals(m.getReturnType()); + for (MetricsRecord rec : subrecs) { + m.invoke(rec, args); + } + return null; + } + } + +} diff --git a/src/java/org/apache/hadoop/metrics/spi/MetricValue.java b/src/java/org/apache/hadoop/metrics/spi/MetricValue.java new file mode 100644 index 00000000000..4a6929b8507 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/spi/MetricValue.java @@ -0,0 +1,52 @@ +/* + * MetricValue.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics.spi; + +/** + * A Number that is either an absolute or an incremental amount. + */ +public class MetricValue { + + public static final boolean ABSOLUTE = false; + public static final boolean INCREMENT = true; + + private boolean isIncrement; + private Number number; + + /** Creates a new instance of MetricValue */ + public MetricValue(Number number, boolean isIncrement) { + this.number = number; + this.isIncrement = isIncrement; + } + + public boolean isIncrement() { + return isIncrement; + } + + public boolean isAbsolute() { + return !isIncrement; + } + + public Number getNumber() { + return number; + } + +} diff --git a/src/java/org/apache/hadoop/metrics/spi/MetricsRecordImpl.java b/src/java/org/apache/hadoop/metrics/spi/MetricsRecordImpl.java new file mode 100644 index 00000000000..e3bac564117 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/spi/MetricsRecordImpl.java @@ -0,0 +1,275 @@ +/* + * MetricsRecordImpl.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics.spi; + +import java.util.LinkedHashMap; +import java.util.Map; +import org.apache.hadoop.metrics.MetricsRecord; +import org.apache.hadoop.metrics.spi.AbstractMetricsContext.TagMap; + +/** + * An implementation of MetricsRecord. Keeps a back-pointer to the context + * from which it was created, and delegates back to it on update + * and remove(). + */ +public class MetricsRecordImpl implements MetricsRecord { + + private TagMap tagTable = new TagMap(); + private Map metricTable = new LinkedHashMap(); + + private String recordName; + private AbstractMetricsContext context; + + + /** Creates a new instance of FileRecord */ + protected MetricsRecordImpl(String recordName, AbstractMetricsContext context) + { + this.recordName = recordName; + this.context = context; + } + + /** + * Returns the record name. + * + * @return the record name + */ + public String getRecordName() { + return recordName; + } + + /** + * Sets the named tag to the specified value. + * + * @param tagName name of the tag + * @param tagValue new value of the tag + * @throws MetricsException if the tagName conflicts with the configuration + */ + public void setTag(String tagName, String tagValue) { + if (tagValue == null) { + tagValue = ""; + } + tagTable.put(tagName, tagValue); + } + + /** + * Sets the named tag to the specified value. + * + * @param tagName name of the tag + * @param tagValue new value of the tag + * @throws MetricsException if the tagName conflicts with the configuration + */ + public void setTag(String tagName, int tagValue) { + tagTable.put(tagName, Integer.valueOf(tagValue)); + } + + /** + * Sets the named tag to the specified value. + * + * @param tagName name of the tag + * @param tagValue new value of the tag + * @throws MetricsException if the tagName conflicts with the configuration + */ + public void setTag(String tagName, long tagValue) { + tagTable.put(tagName, Long.valueOf(tagValue)); + } + + /** + * Sets the named tag to the specified value. + * + * @param tagName name of the tag + * @param tagValue new value of the tag + * @throws MetricsException if the tagName conflicts with the configuration + */ + public void setTag(String tagName, short tagValue) { + tagTable.put(tagName, Short.valueOf(tagValue)); + } + + /** + * Sets the named tag to the specified value. + * + * @param tagName name of the tag + * @param tagValue new value of the tag + * @throws MetricsException if the tagName conflicts with the configuration + */ + public void setTag(String tagName, byte tagValue) { + tagTable.put(tagName, Byte.valueOf(tagValue)); + } + + /** + * Removes any tag of the specified name. + */ + public void removeTag(String tagName) { + tagTable.remove(tagName); + } + + /** + * Sets the named metric to the specified value. + * + * @param metricName name of the metric + * @param metricValue new value of the metric + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public void setMetric(String metricName, int metricValue) { + setAbsolute(metricName, Integer.valueOf(metricValue)); + } + + /** + * Sets the named metric to the specified value. + * + * @param metricName name of the metric + * @param metricValue new value of the metric + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public void setMetric(String metricName, long metricValue) { + setAbsolute(metricName, Long.valueOf(metricValue)); + } + + /** + * Sets the named metric to the specified value. + * + * @param metricName name of the metric + * @param metricValue new value of the metric + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public void setMetric(String metricName, short metricValue) { + setAbsolute(metricName, Short.valueOf(metricValue)); + } + + /** + * Sets the named metric to the specified value. + * + * @param metricName name of the metric + * @param metricValue new value of the metric + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public void setMetric(String metricName, byte metricValue) { + setAbsolute(metricName, Byte.valueOf(metricValue)); + } + + /** + * Sets the named metric to the specified value. + * + * @param metricName name of the metric + * @param metricValue new value of the metric + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public void setMetric(String metricName, float metricValue) { + setAbsolute(metricName, new Float(metricValue)); + } + + /** + * Increments the named metric by the specified value. + * + * @param metricName name of the metric + * @param metricValue incremental value + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public void incrMetric(String metricName, int metricValue) { + setIncrement(metricName, Integer.valueOf(metricValue)); + } + + /** + * Increments the named metric by the specified value. + * + * @param metricName name of the metric + * @param metricValue incremental value + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public void incrMetric(String metricName, long metricValue) { + setIncrement(metricName, Long.valueOf(metricValue)); + } + + /** + * Increments the named metric by the specified value. + * + * @param metricName name of the metric + * @param metricValue incremental value + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public void incrMetric(String metricName, short metricValue) { + setIncrement(metricName, Short.valueOf(metricValue)); + } + + /** + * Increments the named metric by the specified value. + * + * @param metricName name of the metric + * @param metricValue incremental value + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public void incrMetric(String metricName, byte metricValue) { + setIncrement(metricName, Byte.valueOf(metricValue)); + } + + /** + * Increments the named metric by the specified value. + * + * @param metricName name of the metric + * @param metricValue incremental value + * @throws MetricsException if the metricName or the type of the metricValue + * conflicts with the configuration + */ + public void incrMetric(String metricName, float metricValue) { + setIncrement(metricName, new Float(metricValue)); + } + + private void setAbsolute(String metricName, Number metricValue) { + metricTable.put(metricName, new MetricValue(metricValue, MetricValue.ABSOLUTE)); + } + + private void setIncrement(String metricName, Number metricValue) { + metricTable.put(metricName, new MetricValue(metricValue, MetricValue.INCREMENT)); + } + + /** + * Updates the table of buffered data which is to be sent periodically. + * If the tag values match an existing row, that row is updated; + * otherwise, a new row is added. + */ + public void update() { + context.update(this); + } + + /** + * Removes the row, if it exists, in the buffered data table having tags + * that equal the tags that have been set on this record. + */ + public void remove() { + context.remove(this); + } + + TagMap getTagTable() { + return tagTable; + } + + Map getMetricTable() { + return metricTable; + } +} diff --git a/src/java/org/apache/hadoop/metrics/spi/NoEmitMetricsContext.java b/src/java/org/apache/hadoop/metrics/spi/NoEmitMetricsContext.java new file mode 100644 index 00000000000..9e9893426a5 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/spi/NoEmitMetricsContext.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.spi; + +import org.apache.hadoop.metrics.ContextFactory; +import org.apache.hadoop.metrics.MetricsServlet; + +/** + * A MetricsContext that does not emit data, but, unlike NullContextWithUpdate, + * does save it for retrieval with getAllRecords(). + * + * This is useful if you want to support {@link MetricsServlet}, but + * not emit metrics in any other way. + */ +public class NoEmitMetricsContext extends AbstractMetricsContext { + + private static final String PERIOD_PROPERTY = "period"; + + /** Creates a new instance of NullContextWithUpdateThread */ + public NoEmitMetricsContext() { + } + + public void init(String contextName, ContextFactory factory) { + super.init(contextName, factory); + parseAndSetPeriod(PERIOD_PROPERTY); + } + + /** + * Do-nothing version of emitRecord + */ + protected void emitRecord(String contextName, String recordName, + OutputRecord outRec) { + } +} diff --git a/src/java/org/apache/hadoop/metrics/spi/NullContext.java b/src/java/org/apache/hadoop/metrics/spi/NullContext.java new file mode 100644 index 00000000000..11cccb5b0af --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/spi/NullContext.java @@ -0,0 +1,58 @@ +/* + * NullContext.java + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics.spi; + +/** + * Null metrics context: a metrics context which does nothing. Used as the + * default context, so that no performance data is emitted if no configuration + * data is found. + * + */ +public class NullContext extends AbstractMetricsContext { + + /** Creates a new instance of NullContext */ + public NullContext() { + } + + /** + * Do-nothing version of startMonitoring + */ + public void startMonitoring() { + } + + /** + * Do-nothing version of emitRecord + */ + protected void emitRecord(String contextName, String recordName, + OutputRecord outRec) + {} + + /** + * Do-nothing version of update + */ + protected void update(MetricsRecordImpl record) { + } + + /** + * Do-nothing version of remove + */ + protected void remove(MetricsRecordImpl record) { + } +} diff --git a/src/java/org/apache/hadoop/metrics/spi/NullContextWithUpdateThread.java b/src/java/org/apache/hadoop/metrics/spi/NullContextWithUpdateThread.java new file mode 100644 index 00000000000..5efe5f0fb77 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/spi/NullContextWithUpdateThread.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics.spi; + +import org.apache.hadoop.metrics.ContextFactory; +import org.apache.hadoop.metrics.MetricsException; + +/** + * A null context which has a thread calling + * periodically when monitoring is started. This keeps the data sampled + * correctly. + * In all other respects, this is like the NULL context: No data is emitted. + * This is suitable for Monitoring systems like JMX which reads the metrics + * when someone reads the data from JMX. + * + * The default impl of start and stop monitoring: + * is the AbstractMetricsContext is good enough. + * + */ + +public class NullContextWithUpdateThread extends AbstractMetricsContext { + + private static final String PERIOD_PROPERTY = "period"; + + /** Creates a new instance of NullContextWithUpdateThread */ + public NullContextWithUpdateThread() { + } + + public void init(String contextName, ContextFactory factory) { + super.init(contextName, factory); + parseAndSetPeriod(PERIOD_PROPERTY); + } + + + /** + * Do-nothing version of emitRecord + */ + protected void emitRecord(String contextName, String recordName, + OutputRecord outRec) + {} + + /** + * Do-nothing version of update + */ + protected void update(MetricsRecordImpl record) { + } + + /** + * Do-nothing version of remove + */ + protected void remove(MetricsRecordImpl record) { + } +} diff --git a/src/java/org/apache/hadoop/metrics/spi/OutputRecord.java b/src/java/org/apache/hadoop/metrics/spi/OutputRecord.java new file mode 100644 index 00000000000..4fa54158956 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/spi/OutputRecord.java @@ -0,0 +1,90 @@ +/* + * OutputRecord.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.metrics.spi; + +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.Map.Entry; + +import org.apache.hadoop.metrics.spi.AbstractMetricsContext.MetricMap; +import org.apache.hadoop.metrics.spi.AbstractMetricsContext.TagMap; + +/** + * Represents a record of metric data to be sent to a metrics system. + */ +public class OutputRecord { + + private TagMap tagMap; + private MetricMap metricMap; + + /** Creates a new instance of OutputRecord */ + OutputRecord(TagMap tagMap, MetricMap metricMap) { + this.tagMap = tagMap; + this.metricMap = metricMap; + } + + /** + * Returns the set of tag names + */ + public Set getTagNames() { + return Collections.unmodifiableSet(tagMap.keySet()); + } + + /** + * Returns a tag object which is can be a String, Integer, Short or Byte. + * + * @return the tag value, or null if there is no such tag + */ + public Object getTag(String name) { + return tagMap.get(name); + } + + /** + * Returns the set of metric names. + */ + public Set getMetricNames() { + return Collections.unmodifiableSet(metricMap.keySet()); + } + + /** + * Returns the metric object which can be a Float, Integer, Short or Byte. + */ + public Number getMetric(String name) { + return metricMap.get(name); + } + + + /** + * Returns a copy of this record's tags. + */ + public TagMap getTagsCopy() { + return new TagMap(tagMap); + } + + /** + * Returns a copy of this record's metrics. + */ + public MetricMap getMetricsCopy() { + return new MetricMap(metricMap); + } +} diff --git a/src/java/org/apache/hadoop/metrics/spi/Util.java b/src/java/org/apache/hadoop/metrics/spi/Util.java new file mode 100644 index 00000000000..d7c1912976f --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/spi/Util.java @@ -0,0 +1,67 @@ +/* + * Util.java + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.hadoop.metrics.spi; + +import java.net.InetSocketAddress; +import java.net.SocketAddress; +import java.util.ArrayList; +import java.util.List; + +/** + * Static utility methods + */ +public class Util { + + /** + * This class is not intended to be instantiated + */ + private Util() {} + + /** + * Parses a space and/or comma separated sequence of server specifications + * of the form hostname or hostname:port. If + * the specs string is null, defaults to localhost:defaultPort. + * + * @return a list of InetSocketAddress objects. + */ + public static List parse(String specs, int defaultPort) { + List result = new ArrayList(1); + if (specs == null) { + result.add(new InetSocketAddress("localhost", defaultPort)); + } + else { + String[] specStrings = specs.split("[ ,]+"); + for (String specString : specStrings) { + int colon = specString.indexOf(':'); + if (colon < 0 || colon == specString.length() - 1) { + result.add(new InetSocketAddress(specString, defaultPort)); + } else { + String hostname = specString.substring(0, colon); + int port = Integer.parseInt(specString.substring(colon+1)); + result.add(new InetSocketAddress(hostname, port)); + } + } + } + return result; + } + +} diff --git a/src/java/org/apache/hadoop/metrics/spi/package.html b/src/java/org/apache/hadoop/metrics/spi/package.html new file mode 100644 index 00000000000..b72552f761f --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/spi/package.html @@ -0,0 +1,36 @@ + + + + + + + org.apache.hadoop.metrics.spi + + +The Service Provider Interface for the Metrics API. This package provides +an interface allowing a variety of metrics reporting implementations to be +plugged in to the Metrics API. Examples of such implementations can be found +in the packages org.apache.hadoop.metrics.file and +org.apache.hadoop.metrics.ganglia.

+ +Plugging in an implementation involves writing a concrete subclass of +AbstractMetricsContext. The subclass should get its + configuration information using the getAttribute(attributeName) + method. + + diff --git a/src/java/org/apache/hadoop/metrics/util/MBeanUtil.java b/src/java/org/apache/hadoop/metrics/util/MBeanUtil.java new file mode 100644 index 00000000000..ded1a5a1958 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/util/MBeanUtil.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.util; + +import java.lang.management.ManagementFactory; + +import javax.management.InstanceNotFoundException; +import javax.management.MBeanServer; +import javax.management.MalformedObjectNameException; +import javax.management.ObjectName; +import javax.management.InstanceAlreadyExistsException; + + +/** + * This util class provides a method to register an MBean using + * our standard naming convention as described in the doc + * for {link {@link #registerMBean(String, String, Object)} + * + */ +public class MBeanUtil { + + /** + * Register the MBean using our standard MBeanName format + * "hadoop:service=,name=" + * Where the and are the supplied parameters + * + * @param serviceName + * @param nameName + * @param theMbean - the MBean to register + * @return the named used to register the MBean + */ + static public ObjectName registerMBean(final String serviceName, + final String nameName, + final Object theMbean) { + final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName name = getMBeanName(serviceName, nameName); + try { + mbs.registerMBean(theMbean, name); + return name; + } catch (InstanceAlreadyExistsException ie) { + // Ignore if instance already exists + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } + + static public void unregisterMBean(ObjectName mbeanName) { + final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + if (mbeanName == null) + return; + try { + mbs.unregisterMBean(mbeanName); + } catch (InstanceNotFoundException e ) { + // ignore + } catch (Exception e) { + e.printStackTrace(); + } + } + + static private ObjectName getMBeanName(final String serviceName, + final String nameName) { + ObjectName name = null; + try { + name = new ObjectName("hadoop:" + + "service=" + serviceName + ",name=" + nameName); + } catch (MalformedObjectNameException e) { + e.printStackTrace(); + } + return name; + } +} diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsBase.java b/src/java/org/apache/hadoop/metrics/util/MetricsBase.java new file mode 100644 index 00000000000..1cbcf3212a6 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/util/MetricsBase.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.util; + +import org.apache.hadoop.metrics.MetricsRecord; + +/** + * + * This is base class for all metrics + * + */ +public abstract class MetricsBase { + public static final String NO_DESCRIPTION = "NoDescription"; + final private String name; + final private String description; + + protected MetricsBase(final String nam) { + name = nam; + description = NO_DESCRIPTION; + } + + protected MetricsBase(final String nam, final String desc) { + name = nam; + description = desc; + } + + public abstract void pushMetric(final MetricsRecord mr); + + public String getName() { return name; } + public String getDescription() { return description; }; + +} diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java b/src/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java new file mode 100644 index 00000000000..d65cce0597e --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java @@ -0,0 +1,226 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.util; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.management.Attribute; +import javax.management.AttributeList; +import javax.management.AttributeNotFoundException; +import javax.management.DynamicMBean; +import javax.management.InvalidAttributeValueException; +import javax.management.MBeanAttributeInfo; +import javax.management.MBeanException; +import javax.management.MBeanInfo; +import javax.management.MBeanOperationInfo; +import javax.management.ReflectionException; + +import org.apache.hadoop.metrics.MetricsUtil; + + + +/** + * This abstract base class facilitates creating dynamic mbeans automatically from + * metrics. + * The metrics constructors registers metrics in a registry. + * Different categories of metrics should be in differnt classes with their own + * registry (as in NameNodeMetrics and DataNodeMetrics). + * Then the MBean can be created passing the registry to the constructor. + * The MBean should be then registered using a mbean name (example): + * MetricsHolder myMetrics = new MetricsHolder(); // has metrics and registry + * MetricsTestMBean theMBean = new MetricsTestMBean(myMetrics.mregistry); + * ObjectName mbeanName = MBeanUtil.registerMBean("ServiceFoo", + * "TestStatistics", theMBean); + * + * + */ +public abstract class MetricsDynamicMBeanBase implements DynamicMBean { + private final static String AVG_TIME = "AvgTime"; + private final static String MIN_TIME = "MinTime"; + private final static String MAX_TIME = "MaxTime"; + private final static String NUM_OPS = "NumOps"; + private final static String RESET_ALL_MIN_MAX_OP = "resetAllMinMax"; + private MetricsRegistry metricsRegistry; + private MBeanInfo mbeanInfo; + private Map metricsRateAttributeMod; + private int numEntriesInRegistry = 0; + private String mbeanDescription; + + protected MetricsDynamicMBeanBase(final MetricsRegistry mr, final String aMBeanDescription) { + metricsRegistry = mr; + mbeanDescription = aMBeanDescription; + createMBeanInfo(); + } + + private void updateMbeanInfoIfMetricsListChanged() { + if (numEntriesInRegistry != metricsRegistry.size()) + createMBeanInfo(); + } + + private void createMBeanInfo() { + metricsRateAttributeMod = new HashMap(); + boolean needsMinMaxResetOperation = false; + List attributesInfo = new ArrayList(); + MBeanOperationInfo[] operationsInfo = null; + numEntriesInRegistry = metricsRegistry.size(); + + for (MetricsBase o : metricsRegistry.getMetricsList()) { + + if (MetricsTimeVaryingRate.class.isInstance(o)) { + // For each of the metrics there are 3 different attributes + attributesInfo.add(new MBeanAttributeInfo(o.getName() + NUM_OPS, "java.lang.Integer", + o.getDescription(), true, false, false)); + attributesInfo.add(new MBeanAttributeInfo(o.getName() + AVG_TIME, "java.lang.Long", + o.getDescription(), true, false, false)); + attributesInfo.add(new MBeanAttributeInfo(o.getName() + MIN_TIME, "java.lang.Long", + o.getDescription(), true, false, false)); + attributesInfo.add(new MBeanAttributeInfo(o.getName() + MAX_TIME, "java.lang.Long", + o.getDescription(), true, false, false)); + needsMinMaxResetOperation = true; // the min and max can be reset. + + // Note the special attributes (AVG_TIME, MIN_TIME, ..) are derived from metrics + // Rather than check for the suffix we store them in a map. + metricsRateAttributeMod.put(o.getName() + NUM_OPS, o); + metricsRateAttributeMod.put(o.getName() + AVG_TIME, o); + metricsRateAttributeMod.put(o.getName() + MIN_TIME, o); + metricsRateAttributeMod.put(o.getName() + MAX_TIME, o); + + } else if ( MetricsIntValue.class.isInstance(o) || MetricsTimeVaryingInt.class.isInstance(o) ) { + attributesInfo.add(new MBeanAttributeInfo(o.getName(), "java.lang.Integer", + o.getDescription(), true, false, false)); + } else if ( MetricsLongValue.class.isInstance(o) || MetricsTimeVaryingLong.class.isInstance(o) ) { + attributesInfo.add(new MBeanAttributeInfo(o.getName(), "java.lang.Long", + o.getDescription(), true, false, false)); + } else { + MetricsUtil.LOG.error("unknown metrics type: " + o.getClass().getName()); + } + + if (needsMinMaxResetOperation) { + operationsInfo = new MBeanOperationInfo[] { + new MBeanOperationInfo(RESET_ALL_MIN_MAX_OP, "Reset (zero) All Min Max", + null, "void", MBeanOperationInfo.ACTION) }; + } + } + MBeanAttributeInfo[] attrArray = new MBeanAttributeInfo[attributesInfo.size()]; + mbeanInfo = new MBeanInfo(this.getClass().getName(), mbeanDescription, + attributesInfo.toArray(attrArray), null, operationsInfo, null); + } + + @Override + public Object getAttribute(String attributeName) throws AttributeNotFoundException, + MBeanException, ReflectionException { + if (attributeName == null || attributeName.equals("")) + throw new IllegalArgumentException(); + + updateMbeanInfoIfMetricsListChanged(); + + Object o = metricsRateAttributeMod.get(attributeName); + if (o == null) { + o = metricsRegistry.get(attributeName); + } + if (o == null) + throw new AttributeNotFoundException(); + + if (o instanceof MetricsIntValue) + return ((MetricsIntValue) o).get(); + else if (o instanceof MetricsLongValue) + return ((MetricsLongValue) o).get(); + else if (o instanceof MetricsTimeVaryingInt) + return ((MetricsTimeVaryingInt) o).getPreviousIntervalValue(); + else if (o instanceof MetricsTimeVaryingLong) + return ((MetricsTimeVaryingLong) o).getPreviousIntervalValue(); + else if (o instanceof MetricsTimeVaryingRate) { + MetricsTimeVaryingRate or = (MetricsTimeVaryingRate) o; + if (attributeName.endsWith(NUM_OPS)) + return or.getPreviousIntervalNumOps(); + else if (attributeName.endsWith(AVG_TIME)) + return or.getPreviousIntervalAverageTime(); + else if (attributeName.endsWith(MIN_TIME)) + return or.getMinTime(); + else if (attributeName.endsWith(MAX_TIME)) + return or.getMaxTime(); + else { + MetricsUtil.LOG.error("Unexpected attrubute suffix"); + throw new AttributeNotFoundException(); + } + } else { + MetricsUtil.LOG.error("unknown metrics type: " + o.getClass().getName()); + throw new AttributeNotFoundException(); + } + } + + @Override + public AttributeList getAttributes(String[] attributeNames) { + if (attributeNames == null || attributeNames.length == 0) + throw new IllegalArgumentException(); + + updateMbeanInfoIfMetricsListChanged(); + + AttributeList result = new AttributeList(attributeNames.length); + for (String iAttributeName : attributeNames) { + try { + Object value = getAttribute(iAttributeName); + result.add(new Attribute(iAttributeName, value)); + } catch (Exception e) { + continue; + } + } + return result; + } + + @Override + public MBeanInfo getMBeanInfo() { + return mbeanInfo; + } + + @Override + public Object invoke(String actionName, Object[] parms, String[] signature) + throws MBeanException, ReflectionException { + + if (actionName == null || actionName.equals("")) + throw new IllegalArgumentException(); + + + // Right now we support only one fixed operation (if it applies) + if (!(actionName.equals(RESET_ALL_MIN_MAX_OP)) || + mbeanInfo.getOperations().length != 1) { + throw new ReflectionException(new NoSuchMethodException(actionName)); + } + for (MetricsBase m : metricsRegistry.getMetricsList()) { + if ( MetricsTimeVaryingRate.class.isInstance(m) ) { + MetricsTimeVaryingRate.class.cast(m).resetMinMax(); + } + } + return null; + } + + @Override + public void setAttribute(Attribute attribute) + throws AttributeNotFoundException, InvalidAttributeValueException, + MBeanException, ReflectionException { + throw new ReflectionException(new NoSuchMethodException("set" + attribute)); + } + + @Override + public AttributeList setAttributes(AttributeList attributes) { + return null; + } +} diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsIntValue.java b/src/java/org/apache/hadoop/metrics/util/MetricsIntValue.java new file mode 100644 index 00000000000..d467677aaa1 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/util/MetricsIntValue.java @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.util; + +import org.apache.hadoop.metrics.MetricsRecord; +import org.apache.hadoop.util.StringUtils; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * The MetricsIntValue class is for a metric that is not time varied + * but changes only when it is set. + * Each time its value is set, it is published only *once* at the next update + * call. + * + */ +public class MetricsIntValue extends MetricsBase { + + private static final Log LOG = + LogFactory.getLog("org.apache.hadoop.metrics.util"); + + private int value; + private boolean changed; + + + /** + * Constructor - create a new metric + * @param nam the name of the metrics to be used to publish the metric + * @param registry - where the metrics object will be registered + */ + public MetricsIntValue(final String nam, final MetricsRegistry registry, final String description) { + super(nam, description); + value = 0; + changed = false; + registry.add(nam, this); + } + + /** + * Constructor - create a new metric + * @param nam the name of the metrics to be used to publish the metric + * @param registry - where the metrics object will be registered + * A description of {@link #NO_DESCRIPTION} is used + */ + public MetricsIntValue(final String nam, MetricsRegistry registry) { + this(nam, registry, NO_DESCRIPTION); + } + + + + /** + * Set the value + * @param newValue + */ + public synchronized void set(final int newValue) { + value = newValue; + changed = true; + } + + /** + * Get value + * @return the value last set + */ + public synchronized int get() { + return value; + } + + + /** + * Push the metric to the mr. + * The metric is pushed only if it was updated since last push + * + * Note this does NOT push to JMX + * (JMX gets the info via {@link #get()} + * + * @param mr + */ + public synchronized void pushMetric(final MetricsRecord mr) { + if (changed) { + try { + mr.setMetric(getName(), value); + } catch (Exception e) { + LOG.info("pushMetric failed for " + getName() + "\n" + + StringUtils.stringifyException(e)); + } + } + changed = false; + } +} diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsLongValue.java b/src/java/org/apache/hadoop/metrics/util/MetricsLongValue.java new file mode 100644 index 00000000000..639b6a7bd54 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/util/MetricsLongValue.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.util; + +import org.apache.hadoop.metrics.MetricsRecord; + + +/** + * The MetricsLongValue class is for a metric that is not time varied + * but changes only when it is set. + * Each time its value is set, it is published only *once* at the next update + * call. + * + */ +public class MetricsLongValue extends MetricsBase{ + private long value; + private boolean changed; + + /** + * Constructor - create a new metric + * @param nam the name of the metrics to be used to publish the metric + * @param registry - where the metrics object will be registered + */ + public MetricsLongValue(final String nam, final MetricsRegistry registry, final String description) { + super(nam, description); + value = 0; + changed = false; + registry.add(nam, this); + } + + /** + * Constructor - create a new metric + * @param nam the name of the metrics to be used to publish the metric + * @param registry - where the metrics object will be registered + * A description of {@link #NO_DESCRIPTION} is used + */ + public MetricsLongValue(final String nam, MetricsRegistry registry) { + this(nam, registry, NO_DESCRIPTION); + } + + /** + * Set the value + * @param newValue + */ + public synchronized void set(final long newValue) { + value = newValue; + changed = true; + } + + /** + * Get value + * @return the value last set + */ + public synchronized long get() { + return value; + } + + + /** + * Push the metric to the mr. + * The metric is pushed only if it was updated since last push + * + * Note this does NOT push to JMX + * (JMX gets the info via {@link #get()} + * + * @param mr + */ + public synchronized void pushMetric(final MetricsRecord mr) { + if (changed) + mr.setMetric(getName(), value); + changed = false; + } +} diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsRegistry.java b/src/java/org/apache/hadoop/metrics/util/MetricsRegistry.java new file mode 100644 index 00000000000..faf4b63524b --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/util/MetricsRegistry.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.util; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * + * This is the registry for metrics. + * Related set of metrics should be declared in a holding class and registered + * in a registry for those metrics which is also stored in the the holding class. + * + */ +public class MetricsRegistry { + private Map metricsList = new HashMap(); + + public MetricsRegistry() { + } + + /** + * + * @return number of metrics in the registry + */ + public int size() { + return metricsList.size(); + } + + /** + * Add a new metrics to the registry + * @param metricsName - the name + * @param theMetricsObj - the metrics + * @throws IllegalArgumentException if a name is already registered + */ + public synchronized void add(final String metricsName, final MetricsBase theMetricsObj) { + if (metricsList.containsKey(metricsName)) { + throw new IllegalArgumentException("Duplicate metricsName:" + metricsName); + } + metricsList.put(metricsName, theMetricsObj); + } + + + /** + * + * @param metricsName + * @return the metrics if there is one registered by the supplied name. + * Returns null if none is registered + */ + public synchronized MetricsBase get(final String metricsName) { + return metricsList.get(metricsName); + } + + + /** + * + * @return the list of metrics names + */ + public synchronized Collection getKeyList() { + return metricsList.keySet(); + } + + /** + * + * @return the list of metrics + */ + public synchronized Collection getMetricsList() { + return metricsList.values(); + } +} diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingInt.java b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingInt.java new file mode 100644 index 00000000000..96b4fe14880 --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingInt.java @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.util; + +import org.apache.hadoop.metrics.MetricsRecord; +import org.apache.hadoop.util.StringUtils; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * The MetricsTimeVaryingInt class is for a metric that naturally + * varies over time (e.g. number of files created). The metrics is accumulated + * over an interval (set in the metrics config file); the metrics is + * published at the end of each interval and then + * reset to zero. Hence the counter has the value in the current interval. + * + * Note if one wants a time associated with the metric then use + * @see org.apache.hadoop.metrics.util.MetricsTimeVaryingRate + * + */ +public class MetricsTimeVaryingInt extends MetricsBase { + + private static final Log LOG = + LogFactory.getLog("org.apache.hadoop.metrics.util"); + + private int currentValue; + private int previousIntervalValue; + + + /** + * Constructor - create a new metric + * @param nam the name of the metrics to be used to publish the metric + * @param registry - where the metrics object will be registered + * @param description - the description + */ + public MetricsTimeVaryingInt(final String nam, + final MetricsRegistry registry, + final String description) { + super(nam, description); + currentValue = 0; + previousIntervalValue = 0; + registry.add(nam, this); + } + + /** + * Constructor - create a new metric + * @param nam the name of the metrics to be used to publish the metric + * @param registry - where the metrics object will be registered + * A description of {@link #NO_DESCRIPTION} is used + */ + public MetricsTimeVaryingInt(final String nam, final MetricsRegistry registry) { + this(nam, registry, NO_DESCRIPTION); + } + + + + /** + * Inc metrics for incr vlaue + * @param incr - number of operations + */ + public synchronized void inc(final int incr) { + currentValue += incr; + } + + /** + * Inc metrics by one + */ + public synchronized void inc() { + currentValue++; + } + + private synchronized void intervalHeartBeat() { + previousIntervalValue = currentValue; + currentValue = 0; + } + + /** + * Push the delta metrics to the mr. + * The delta is since the last push/interval. + * + * Note this does NOT push to JMX + * (JMX gets the info via {@link #previousIntervalValue} + * + * @param mr + */ + public synchronized void pushMetric(final MetricsRecord mr) { + intervalHeartBeat(); + try { + mr.incrMetric(getName(), getPreviousIntervalValue()); + } catch (Exception e) { + LOG.info("pushMetric failed for " + getName() + "\n" + + StringUtils.stringifyException(e)); + } + } + + + /** + * The Value at the Previous interval + * @return prev interval value + */ + public synchronized int getPreviousIntervalValue() { + return previousIntervalValue; + } + + /** + * The Value at the current interval + * @return prev interval value + */ + public synchronized int getCurrentIntervalValue() { + return currentValue; + } +} diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingLong.java b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingLong.java new file mode 100644 index 00000000000..929303c832f --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingLong.java @@ -0,0 +1,124 @@ +package org.apache.hadoop.metrics.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.hadoop.metrics.MetricsRecord; +import org.apache.hadoop.util.StringUtils; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * The MetricsTimeVaryingLong class is for a metric that naturally + * varies over time (e.g. number of files created). The metrics is accumulated + * over an interval (set in the metrics config file); the metrics is + * published at the end of each interval and then + * reset to zero. Hence the counter has the value in the current interval. + * + * Note if one wants a time associated with the metric then use + * @see org.apache.hadoop.metrics.util.MetricsTimeVaryingRate + * + */ +public class MetricsTimeVaryingLong extends MetricsBase{ + + private static final Log LOG = + LogFactory.getLog("org.apache.hadoop.metrics.util"); + + private long currentValue; + private long previousIntervalValue; + + /** + * Constructor - create a new metric + * @param nam the name of the metrics to be used to publish the metric + * @param registry - where the metrics object will be registered + */ + public MetricsTimeVaryingLong(final String nam, MetricsRegistry registry, final String description) { + super(nam, description); + currentValue = 0; + previousIntervalValue = 0; + registry.add(nam, this); + } + + + /** + * Constructor - create a new metric + * @param nam the name of the metrics to be used to publish the metric + * @param registry - where the metrics object will be registered + * A description of {@link #NO_DESCRIPTION} is used + */ + public MetricsTimeVaryingLong(final String nam, MetricsRegistry registry) { + this(nam, registry, NO_DESCRIPTION); + } + + /** + * Inc metrics for incr vlaue + * @param incr - number of operations + */ + public synchronized void inc(final long incr) { + currentValue += incr; + } + + /** + * Inc metrics by one + */ + public synchronized void inc() { + currentValue++; + } + + private synchronized void intervalHeartBeat() { + previousIntervalValue = currentValue; + currentValue = 0; + } + + /** + * Push the delta metrics to the mr. + * The delta is since the last push/interval. + * + * Note this does NOT push to JMX + * (JMX gets the info via {@link #previousIntervalValue} + * + * @param mr + */ + public synchronized void pushMetric(final MetricsRecord mr) { + intervalHeartBeat(); + try { + mr.incrMetric(getName(), getPreviousIntervalValue()); + } catch (Exception e) { + LOG.info("pushMetric failed for " + getName() + "\n" + + StringUtils.stringifyException(e)); + } + } + + + /** + * The Value at the Previous interval + * @return prev interval value + */ + public synchronized long getPreviousIntervalValue() { + return previousIntervalValue; + } + + /** + * The Value at the current interval + * @return prev interval value + */ + public synchronized long getCurrentIntervalValue() { + return currentValue; + } +} diff --git a/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingRate.java b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingRate.java new file mode 100644 index 00000000000..7d05af325da --- /dev/null +++ b/src/java/org/apache/hadoop/metrics/util/MetricsTimeVaryingRate.java @@ -0,0 +1,196 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.metrics.util; + +import org.apache.hadoop.metrics.MetricsRecord; +import org.apache.hadoop.util.StringUtils; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * The MetricsTimeVaryingRate class is for a rate based metric that + * naturally varies over time (e.g. time taken to create a file). + * The rate is averaged at each interval heart beat (the interval + * is set in the metrics config file). + * This class also keeps track of the min and max rates along with + * a method to reset the min-max. + * + */ +public class MetricsTimeVaryingRate extends MetricsBase { + + private static final Log LOG = + LogFactory.getLog("org.apache.hadoop.metrics.util"); + + static class Metrics { + int numOperations = 0; + long time = 0; // total time or average time + + void set(final Metrics resetTo) { + numOperations = resetTo.numOperations; + time = resetTo.time; + } + + void reset() { + numOperations = 0; + time = 0; + } + } + + static class MinMax { + long minTime = -1; + long maxTime = 0; + + void set(final MinMax newVal) { + minTime = newVal.minTime; + maxTime = newVal.maxTime; + } + + void reset() { + minTime = -1; + maxTime = 0; + } + void update(final long time) { // update min max + minTime = (minTime == -1) ? time : Math.min(minTime, time); + minTime = Math.min(minTime, time); + maxTime = Math.max(maxTime, time); + } + } + private Metrics currentData; + private Metrics previousIntervalData; + private MinMax minMax; + + + /** + * Constructor - create a new metric + * @param nam the name of the metrics to be used to publish the metric + * @param registry - where the metrics object will be registered + */ + public MetricsTimeVaryingRate(final String nam, final MetricsRegistry registry, final String description) { + super(nam, description); + currentData = new Metrics(); + previousIntervalData = new Metrics(); + minMax = new MinMax(); + registry.add(nam, this); + } + + /** + * Constructor - create a new metric + * @param nam the name of the metrics to be used to publish the metric + * @param registry - where the metrics object will be registered + * A description of {@link #NO_DESCRIPTION} is used + */ + public MetricsTimeVaryingRate(final String nam, MetricsRegistry registry) { + this(nam, registry, NO_DESCRIPTION); + + } + + + /** + * Increment the metrics for numOps operations + * @param numOps - number of operations + * @param time - time for numOps operations + */ + public synchronized void inc(final int numOps, final long time) { + currentData.numOperations += numOps; + currentData.time += time; + long timePerOps = time/numOps; + minMax.update(timePerOps); + } + + /** + * Increment the metrics for one operation + * @param time for one operation + */ + public synchronized void inc(final long time) { + currentData.numOperations++; + currentData.time += time; + minMax.update(time); + } + + + + private synchronized void intervalHeartBeat() { + previousIntervalData.numOperations = currentData.numOperations; + previousIntervalData.time = (currentData.numOperations == 0) ? + 0 : currentData.time / currentData.numOperations; + currentData.reset(); + } + + /** + * Push the delta metrics to the mr. + * The delta is since the last push/interval. + * + * Note this does NOT push to JMX + * (JMX gets the info via {@link #getPreviousIntervalAverageTime()} and + * {@link #getPreviousIntervalNumOps()} + * + * @param mr + */ + public synchronized void pushMetric(final MetricsRecord mr) { + intervalHeartBeat(); + try { + mr.incrMetric(getName() + "_num_ops", getPreviousIntervalNumOps()); + mr.setMetric(getName() + "_avg_time", getPreviousIntervalAverageTime()); + } catch (Exception e) { + LOG.info("pushMetric failed for " + getName() + "\n" + + StringUtils.stringifyException(e)); + } + } + + /** + * The number of operations in the previous interval + * @return - ops in prev interval + */ + public synchronized int getPreviousIntervalNumOps() { + return previousIntervalData.numOperations; + } + + /** + * The average rate of an operation in the previous interval + * @return - the average rate. + */ + public synchronized long getPreviousIntervalAverageTime() { + return previousIntervalData.time; + } + + /** + * The min time for a single operation since the last reset + * {@link #resetMinMax()} + * @return min time for an operation + */ + public synchronized long getMinTime() { + return minMax.minTime; + } + + /** + * The max time for a single operation since the last reset + * {@link #resetMinMax()} + * @return max time for an operation + */ + public synchronized long getMaxTime() { + return minMax.maxTime; + } + + /** + * Reset the min max values + */ + public synchronized void resetMinMax() { + minMax.reset(); + } +} diff --git a/src/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java b/src/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java new file mode 100644 index 00000000000..0490e3cabf4 --- /dev/null +++ b/src/java/org/apache/hadoop/net/CachedDNSToSwitchMapping.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.net; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * A cached implementation of DNSToSwitchMapping that takes an + * raw DNSToSwitchMapping and stores the resolved network location in + * a cache. The following calls to a resolved network location + * will get its location from the cache. + * + */ +public class CachedDNSToSwitchMapping implements DNSToSwitchMapping { + private Map cache = new ConcurrentHashMap(); + protected DNSToSwitchMapping rawMapping; + + public CachedDNSToSwitchMapping(DNSToSwitchMapping rawMapping) { + this.rawMapping = rawMapping; + } + + public List resolve(List names) { + // normalize all input names to be in the form of IP addresses + names = NetUtils.normalizeHostNames(names); + + List result = new ArrayList(names.size()); + if (names.isEmpty()) { + return result; + } + + + // find out all names without cached resolved location + List unCachedHosts = new ArrayList(names.size()); + for (String name : names) { + if (cache.get(name) == null) { + unCachedHosts.add(name); + } + } + + // Resolve those names + List rNames = rawMapping.resolve(unCachedHosts); + + // Cache the result + if (rNames != null) { + for (int i=0; i ips = new Vector(); + Enumeration e = netIF.getInetAddresses(); + while (e.hasMoreElements()) { + ips.add(((InetAddress) e.nextElement()).getHostAddress()); + } + return ips.toArray(new String[] {}); + } + } catch (SocketException e) { + return new String[] { cachedHostAddress }; + } + } + + + /** + * Returns the first available IP address associated with the provided + * network interface + * + * @param strInterface + * The name of the network interface to query (e.g. eth0) + * @return The IP address in text form + * @throws UnknownHostException + * If one is encountered in querying the default interface + */ + public static String getDefaultIP(String strInterface) + throws UnknownHostException { + String[] ips = getIPs(strInterface); + return ips[0]; + } + + /** + * Returns all the host names associated by the provided nameserver with the + * address bound to the specified network interface + * + * @param strInterface + * The name of the network interface to query (e.g. eth0) + * @param nameserver + * The DNS host name + * @return A string vector of all host names associated with the IPs tied to + * the specified interface + * @throws UnknownHostException if the hostname cannot be determined + */ + public static String[] getHosts(String strInterface, String nameserver) + throws UnknownHostException { + String[] ips = getIPs(strInterface); + Vector hosts = new Vector(); + for (int ctr = 0; ctr < ips.length; ctr++) + try { + hosts.add(reverseDns(InetAddress.getByName(ips[ctr]), + nameserver)); + } catch (UnknownHostException ignored) { + } catch (NamingException ignored) { + } + + if (hosts.isEmpty()) { + return new String[] { cachedHostname }; + } else { + return hosts.toArray(new String[hosts.size()]); + } + } + + + /** + * Determine the local hostname; retrieving it from cache if it is known + * If we cannot determine our host name, return "localhost" + * @return the local hostname or "localhost" + */ + private static String resolveLocalHostname() { + String localhost; + try { + localhost = InetAddress.getLocalHost().getCanonicalHostName(); + } catch (UnknownHostException e) { + LOG.info("Unable to determine local hostname " + + "-falling back to \"" + LOCALHOST + "\"", e); + localhost = LOCALHOST; + } + return localhost; + } + + + /** + * Get the IPAddress of the local host as a string. + * This will be a loop back value if the local host address cannot be + * determined. + * If the loopback address of "localhost" does not resolve, then the system's + * network is in such a state that nothing is going to work. A message is + * logged at the error level and a null pointer returned, a pointer + * which will trigger failures later on the application + * @return the IPAddress of the local host or null for a serious problem. + */ + private static String resolveLocalHostIPAddress() { + String address; + try { + address = InetAddress.getLocalHost().getHostAddress(); + } catch (UnknownHostException e) { + LOG.info("Unable to determine address of the host" + + "-falling back to \"" + LOCALHOST + "\" address", e); + try { + address = InetAddress.getByName(LOCALHOST).getHostAddress(); + } catch (UnknownHostException noLocalHostAddressException) { + //at this point, deep trouble + LOG.error("Unable to determine local loopback address " + + "of \"" + LOCALHOST + "\" " + + "-this system's network configuration is unsupported", e); + address = null; + } + } + return address; + } + + /** + * Returns all the host names associated by the default nameserver with the + * address bound to the specified network interface + * + * @param strInterface + * The name of the network interface to query (e.g. eth0) + * @return The list of host names associated with IPs bound to the network + * interface + * @throws UnknownHostException + * If one is encountered while querying the default interface + * + */ + public static String[] getHosts(String strInterface) + throws UnknownHostException { + return getHosts(strInterface, null); + } + + /** + * Returns the default (first) host name associated by the provided + * nameserver with the address bound to the specified network interface + * + * @param strInterface + * The name of the network interface to query (e.g. eth0) + * @param nameserver + * The DNS host name + * @return The default host names associated with IPs bound to the network + * interface + * @throws UnknownHostException + * If one is encountered while querying the default interface + */ + public static String getDefaultHost(String strInterface, String nameserver) + throws UnknownHostException { + if ("default".equals(strInterface)) { + return cachedHostname; + } + + if ("default".equals(nameserver)) { + return getDefaultHost(strInterface); + } + + String[] hosts = getHosts(strInterface, nameserver); + return hosts[0]; + } + + /** + * Returns the default (first) host name associated by the default + * nameserver with the address bound to the specified network interface + * + * @param strInterface + * The name of the network interface to query (e.g. eth0). + * Must not be null. + * @return The default host name associated with IPs bound to the network + * interface + * @throws UnknownHostException + * If one is encountered while querying the default interface + */ + public static String getDefaultHost(String strInterface) + throws UnknownHostException { + return getDefaultHost(strInterface, null); + } + +} diff --git a/src/java/org/apache/hadoop/net/DNSToSwitchMapping.java b/src/java/org/apache/hadoop/net/DNSToSwitchMapping.java new file mode 100644 index 00000000000..f71b95025d1 --- /dev/null +++ b/src/java/org/apache/hadoop/net/DNSToSwitchMapping.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.net; + +import java.util.List; + +/** + * An interface that should be implemented to allow pluggable + * DNS-name/IP-address to RackID resolvers. + * + */ +public interface DNSToSwitchMapping { + /** + * Resolves a list of DNS-names/IP-addresses and returns back a list of + * switch information (network paths). One-to-one correspondence must be + * maintained between the elements in the lists. + * Consider an element in the argument list - x.y.com. The switch information + * that is returned must be a network path of the form /foo/rack, + * where / is the root, and 'foo' is the switch where 'rack' is connected. + * Note the hostname/ip-address is not part of the returned path. + * The network topology of the cluster would determine the number of + * components in the network path. + * @param names + * @return list of resolved network paths + */ + public List resolve(List names); +} diff --git a/src/java/org/apache/hadoop/net/NetUtils.java b/src/java/org/apache/hadoop/net/NetUtils.java new file mode 100644 index 00000000000..ce07fab858e --- /dev/null +++ b/src/java/org/apache/hadoop/net/NetUtils.java @@ -0,0 +1,440 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.net; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.Socket; +import java.net.SocketAddress; +import java.net.URI; +import java.net.UnknownHostException; +import java.nio.channels.SocketChannel; +import java.util.Map.Entry; +import java.util.regex.Pattern; +import java.util.*; + +import javax.net.SocketFactory; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.ipc.VersionedProtocol; +import org.apache.hadoop.util.ReflectionUtils; + +public class NetUtils { + private static final Log LOG = LogFactory.getLog(NetUtils.class); + + private static Map hostToResolved = + new HashMap(); + + /** + * Get the socket factory for the given class according to its + * configuration parameter + * hadoop.rpc.socket.factory.class.<ClassName>. When no + * such parameter exists then fall back on the default socket factory as + * configured by hadoop.rpc.socket.factory.class.default. If + * this default socket factory is not configured, then fall back on the JVM + * default socket factory. + * + * @param conf the configuration + * @param clazz the class (usually a {@link VersionedProtocol}) + * @return a socket factory + */ + public static SocketFactory getSocketFactory(Configuration conf, + Class clazz) { + + SocketFactory factory = null; + + String propValue = + conf.get("hadoop.rpc.socket.factory.class." + clazz.getSimpleName()); + if ((propValue != null) && (propValue.length() > 0)) + factory = getSocketFactoryFromProperty(conf, propValue); + + if (factory == null) + factory = getDefaultSocketFactory(conf); + + return factory; + } + + /** + * Get the default socket factory as specified by the configuration + * parameter hadoop.rpc.socket.factory.default + * + * @param conf the configuration + * @return the default socket factory as specified in the configuration or + * the JVM default socket factory if the configuration does not + * contain a default socket factory property. + */ + public static SocketFactory getDefaultSocketFactory(Configuration conf) { + + String propValue = conf.get("hadoop.rpc.socket.factory.class.default"); + if ((propValue == null) || (propValue.length() == 0)) + return SocketFactory.getDefault(); + + return getSocketFactoryFromProperty(conf, propValue); + } + + /** + * Get the socket factory corresponding to the given proxy URI. If the + * given proxy URI corresponds to an absence of configuration parameter, + * returns null. If the URI is malformed raises an exception. + * + * @param propValue the property which is the class name of the + * SocketFactory to instantiate; assumed non null and non empty. + * @return a socket factory as defined in the property value. + */ + public static SocketFactory getSocketFactoryFromProperty( + Configuration conf, String propValue) { + + try { + Class theClass = conf.getClassByName(propValue); + return (SocketFactory) ReflectionUtils.newInstance(theClass, conf); + + } catch (ClassNotFoundException cnfe) { + throw new RuntimeException("Socket Factory class not found: " + cnfe); + } + } + + /** + * Util method to build socket addr from either: + * : + * ://:/ + */ + public static InetSocketAddress createSocketAddr(String target) { + return createSocketAddr(target, -1); + } + + /** + * Util method to build socket addr from either: + * + * : + * ://:/ + */ + public static InetSocketAddress createSocketAddr(String target, + int defaultPort) { + int colonIndex = target.indexOf(':'); + if (colonIndex < 0 && defaultPort == -1) { + throw new RuntimeException("Not a host:port pair: " + target); + } + String hostname; + int port = -1; + if (!target.contains("/")) { + if (colonIndex == -1) { + hostname = target; + } else { + // must be the old style : + hostname = target.substring(0, colonIndex); + port = Integer.parseInt(target.substring(colonIndex + 1)); + } + } else { + // a new uri + URI addr = new Path(target).toUri(); + hostname = addr.getHost(); + port = addr.getPort(); + } + + if (port == -1) { + port = defaultPort; + } + + if (getStaticResolution(hostname) != null) { + hostname = getStaticResolution(hostname); + } + return new InetSocketAddress(hostname, port); + } + + /** + * Adds a static resolution for host. This can be used for setting up + * hostnames with names that are fake to point to a well known host. For e.g. + * in some testcases we require to have daemons with different hostnames + * running on the same machine. In order to create connections to these + * daemons, one can set up mappings from those hostnames to "localhost". + * {@link NetUtils#getStaticResolution(String)} can be used to query for + * the actual hostname. + * @param host + * @param resolvedName + */ + public static void addStaticResolution(String host, String resolvedName) { + synchronized (hostToResolved) { + hostToResolved.put(host, resolvedName); + } + } + + /** + * Retrieves the resolved name for the passed host. The resolved name must + * have been set earlier using + * {@link NetUtils#addStaticResolution(String, String)} + * @param host + * @return the resolution + */ + public static String getStaticResolution(String host) { + synchronized (hostToResolved) { + return hostToResolved.get(host); + } + } + + /** + * This is used to get all the resolutions that were added using + * {@link NetUtils#addStaticResolution(String, String)}. The return + * value is a List each element of which contains an array of String + * of the form String[0]=hostname, String[1]=resolved-hostname + * @return the list of resolutions + */ + public static List getAllStaticResolutions() { + synchronized (hostToResolved) { + Set >entries = hostToResolved.entrySet(); + if (entries.size() == 0) { + return null; + } + List l = new ArrayList(entries.size()); + for (Entry e : entries) { + l.add(new String[] {e.getKey(), e.getValue()}); + } + return l; + } + } + + /** + * Returns InetSocketAddress that a client can use to + * connect to the server. Server.getListenerAddress() is not correct when + * the server binds to "0.0.0.0". This returns "127.0.0.1:port" when + * the getListenerAddress() returns "0.0.0.0:port". + * + * @param server + * @return socket address that a client can use to connect to the server. + */ + public static InetSocketAddress getConnectAddress(Server server) { + InetSocketAddress addr = server.getListenerAddress(); + if (addr.getAddress().getHostAddress().equals("0.0.0.0")) { + addr = new InetSocketAddress("127.0.0.1", addr.getPort()); + } + return addr; + } + + /** + * Same as getInputStream(socket, socket.getSoTimeout()).

+ * + * From documentation for {@link #getInputStream(Socket, long)}:
+ * Returns InputStream for the socket. If the socket has an associated + * SocketChannel then it returns a + * {@link SocketInputStream} with the given timeout. If the socket does not + * have a channel, {@link Socket#getInputStream()} is returned. In the later + * case, the timeout argument is ignored and the timeout set with + * {@link Socket#setSoTimeout(int)} applies for reads.

+ * + * Any socket created using socket factories returned by {@link #NetUtils}, + * must use this interface instead of {@link Socket#getInputStream()}. + * + * @see #getInputStream(Socket, long) + * + * @param socket + * @return InputStream for reading from the socket. + * @throws IOException + */ + public static InputStream getInputStream(Socket socket) + throws IOException { + return getInputStream(socket, socket.getSoTimeout()); + } + + /** + * Returns InputStream for the socket. If the socket has an associated + * SocketChannel then it returns a + * {@link SocketInputStream} with the given timeout. If the socket does not + * have a channel, {@link Socket#getInputStream()} is returned. In the later + * case, the timeout argument is ignored and the timeout set with + * {@link Socket#setSoTimeout(int)} applies for reads.

+ * + * Any socket created using socket factories returned by {@link #NetUtils}, + * must use this interface instead of {@link Socket#getInputStream()}. + * + * @see Socket#getChannel() + * + * @param socket + * @param timeout timeout in milliseconds. This may not always apply. zero + * for waiting as long as necessary. + * @return InputStream for reading from the socket. + * @throws IOException + */ + public static InputStream getInputStream(Socket socket, long timeout) + throws IOException { + return (socket.getChannel() == null) ? + socket.getInputStream() : new SocketInputStream(socket, timeout); + } + + /** + * Same as getOutputStream(socket, 0). Timeout of zero implies write will + * wait until data is available.

+ * + * From documentation for {@link #getOutputStream(Socket, long)} :
+ * Returns OutputStream for the socket. If the socket has an associated + * SocketChannel then it returns a + * {@link SocketOutputStream} with the given timeout. If the socket does not + * have a channel, {@link Socket#getOutputStream()} is returned. In the later + * case, the timeout argument is ignored and the write will wait until + * data is available.

+ * + * Any socket created using socket factories returned by {@link #NetUtils}, + * must use this interface instead of {@link Socket#getOutputStream()}. + * + * @see #getOutputStream(Socket, long) + * + * @param socket + * @return OutputStream for writing to the socket. + * @throws IOException + */ + public static OutputStream getOutputStream(Socket socket) + throws IOException { + return getOutputStream(socket, 0); + } + + /** + * Returns OutputStream for the socket. If the socket has an associated + * SocketChannel then it returns a + * {@link SocketOutputStream} with the given timeout. If the socket does not + * have a channel, {@link Socket#getOutputStream()} is returned. In the later + * case, the timeout argument is ignored and the write will wait until + * data is available.

+ * + * Any socket created using socket factories returned by {@link #NetUtils}, + * must use this interface instead of {@link Socket#getOutputStream()}. + * + * @see Socket#getChannel() + * + * @param socket + * @param timeout timeout in milliseconds. This may not always apply. zero + * for waiting as long as necessary. + * @return OutputStream for writing to the socket. + * @throws IOException + */ + public static OutputStream getOutputStream(Socket socket, long timeout) + throws IOException { + return (socket.getChannel() == null) ? + socket.getOutputStream() : new SocketOutputStream(socket, timeout); + } + + /** + * This is a drop-in replacement for + * {@link Socket#connect(SocketAddress, int)}. + * In the case of normal sockets that don't have associated channels, this + * just invokes socket.connect(endpoint, timeout). If + * socket.getChannel() returns a non-null channel, + * connect is implemented using Hadoop's selectors. This is done mainly + * to avoid Sun's connect implementation from creating thread-local + * selectors, since Hadoop does not have control on when these are closed + * and could end up taking all the available file descriptors. + * + * @see java.net.Socket#connect(java.net.SocketAddress, int) + * + * @param socket + * @param endpoint + * @param timeout - timeout in milliseconds + */ + public static void connect(Socket socket, + SocketAddress endpoint, + int timeout) throws IOException { + if (socket == null || endpoint == null || timeout < 0) { + throw new IllegalArgumentException("Illegal argument for connect()"); + } + + SocketChannel ch = socket.getChannel(); + + if (ch == null) { + // let the default implementation handle it. + socket.connect(endpoint, timeout); + } else { + SocketIOWithTimeout.connect(ch, endpoint, timeout); + } + } + + /** + * Given a string representation of a host, return its ip address + * in textual presentation. + * + * @param name a string representation of a host: + * either a textual representation its IP address or its host name + * @return its IP address in the string format + */ + public static String normalizeHostName(String name) { + if (Character.digit(name.charAt(0), 16) != -1) { // it is an IP + return name; + } else { + try { + InetAddress ipAddress = InetAddress.getByName(name); + return ipAddress.getHostAddress(); + } catch (UnknownHostException e) { + return name; + } + } + } + + /** + * Given a collection of string representation of hosts, return a list of + * corresponding IP addresses in the textual representation. + * + * @param names a collection of string representations of hosts + * @return a list of corresponding IP addresses in the string format + * @see #normalizeHostName(String) + */ + public static List normalizeHostNames(Collection names) { + List hostNames = new ArrayList(names.size()); + for (String name : names) { + hostNames.add(normalizeHostName(name)); + } + return hostNames; + } + + /** + * Attempt to obtain the host name of a name specified by ip address. + * Check that the node name is an ip addr and if so, attempt to determine + * its host name. If the name is not an IP addr, or the actual name cannot + * be determined, return null. + * + * @return Host name or null + */ + private static final Pattern ipPattern = // Pattern for matching hostname to ip:port + Pattern.compile("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:?\\d*"); + public static String getHostNameOfIP(String ip) { + // If name is not an ip addr, don't bother looking it up + if(!ipPattern.matcher(ip).matches()) + return null; + + String hostname = ""; + try { + String n = ip.substring(0, ip.indexOf(':')); + hostname = InetAddress.getByName(n).getHostName(); + } catch (UnknownHostException e) { + return null; + } + + return hostname; + } + + /** + * Return hostname without throwing exception. + * @return hostname + */ + public static String getHostname() { + try {return "" + InetAddress.getLocalHost();} + catch(UnknownHostException uhe) {return "" + uhe;} + } +} diff --git a/src/java/org/apache/hadoop/net/NetworkTopology.java b/src/java/org/apache/hadoop/net/NetworkTopology.java new file mode 100644 index 00000000000..1de588bd43f --- /dev/null +++ b/src/java/org/apache/hadoop/net/NetworkTopology.java @@ -0,0 +1,655 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.net; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Random; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** The class represents a cluster of computer with a tree hierarchical + * network topology. + * For example, a cluster may be consists of many data centers filled + * with racks of computers. + * In a network topology, leaves represent data nodes (computers) and inner + * nodes represent switches/routers that manage traffic in/out of data centers + * or racks. + * + */ +public class NetworkTopology { + public final static String DEFAULT_RACK = "/default-rack"; + public final static int DEFAULT_HOST_LEVEL = 2; + public static final Log LOG = + LogFactory.getLog(NetworkTopology.class); + + /* Inner Node represent a switch/router of a data center or rack. + * Different from a leave node, it has non-null children. + */ + private class InnerNode extends NodeBase { + private ArrayList children=new ArrayList(); + private int numOfLeaves; + + /** Construct an InnerNode from a path-like string */ + InnerNode(String path) { + super(path); + } + + /** Construct an InnerNode from its name and its network location */ + InnerNode(String name, String location) { + super(name, location); + } + + /** Construct an InnerNode + * from its name, its network location, its parent, and its level */ + InnerNode(String name, String location, InnerNode parent, int level) { + super(name, location, parent, level); + } + + /** Get its children */ + Collection getChildren() {return children;} + + /** Return the number of children this node has */ + int getNumOfChildren() { + return children.size(); + } + + /** Judge if this node represents a rack + * Return true if it has no child or its children are not InnerNodes + */ + boolean isRack() { + if (children.isEmpty()) { + return true; + } + + Node firstChild = children.get(0); + if (firstChild instanceof InnerNode) { + return false; + } + + return true; + } + + /** Judge if this node is an ancestor of node n + * + * @param n a node + * @return true if this node is an ancestor of n + */ + boolean isAncestor(Node n) { + return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) || + (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR). + startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR); + } + + /** Judge if this node is the parent of node n + * + * @param n a node + * @return true if this node is the parent of n + */ + boolean isParent(Node n) { + return n.getNetworkLocation().equals(getPath(this)); + } + + /* Return a child name of this node who is an ancestor of node n */ + private String getNextAncestorName(Node n) { + if (!isAncestor(n)) { + throw new IllegalArgumentException( + this + "is not an ancestor of " + n); + } + String name = n.getNetworkLocation().substring(getPath(this).length()); + if (name.charAt(0) == PATH_SEPARATOR) { + name = name.substring(1); + } + int index=name.indexOf(PATH_SEPARATOR); + if (index !=-1) + name = name.substring(0, index); + return name; + } + + /** Add node n to the subtree of this node + * @param n node to be added + * @return true if the node is added; false otherwise + */ + boolean add(Node n) { + if (!isAncestor(n)) + throw new IllegalArgumentException(n.getName()+", which is located at " + +n.getNetworkLocation()+", is not a decendent of " + +getPath(this)); + if (isParent(n)) { + // this node is the parent of n; add n directly + n.setParent(this); + n.setLevel(this.level+1); + for(int i=0; in from the subtree of this node + * @param n node to be deleted + * @return true if the node is deleted; false otherwise + */ + boolean remove(Node n) { + String parent = n.getNetworkLocation(); + String currentPath = getPath(this); + if (!isAncestor(n)) + throw new IllegalArgumentException(n.getName() + +", which is located at " + +parent+", is not a descendent of "+currentPath); + if (isParent(n)) { + // this node is the parent of n; remove n directly + for(int i=0; ileafIndex leaf of this subtree + * if it is not in the excludedNode*/ + private Node getLeaf(int leafIndex, Node excludedNode) { + int count=0; + // check if the excluded node a leaf + boolean isLeaf = + excludedNode == null || !(excludedNode instanceof InnerNode); + // calculate the total number of excluded leaf nodes + int numOfExcludedLeaves = + isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves(); + if (isRack()) { // children are leaves + if (isLeaf) { // excluded node is a leaf node + int excludedIndex = children.indexOf(excludedNode); + if (excludedIndex != -1 && leafIndex >= 0) { + // excluded node is one of the children so adjust the leaf index + leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex; + } + } + // range check + if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) { + return null; + } + return children.get(leafIndex); + } else { + for(int i=0; i leafIndex) { + // the leaf is in the child subtree + return child.getLeaf(leafIndex-count, excludedNode); + } else { + // go to the next child + count = count+numOfLeaves; + } + } else { // it is the excluededNode + // skip it and set the excludedNode to be null + excludedNode = null; + } + } + return null; + } + } + + int getNumOfLeaves() { + return numOfLeaves; + } + } // end of InnerNode + + InnerNode clusterMap = new InnerNode(InnerNode.ROOT); // the root + private int numOfRacks = 0; // rack counter + private ReadWriteLock netlock; + + public NetworkTopology() { + netlock = new ReentrantReadWriteLock(); + } + + /** Add a leaf node + * Update node counter & rack counter if necessary + * @param node + * node to be added + * @exception IllegalArgumentException if add a node to a leave + or node to be added is not a leaf + */ + public void add(Node node) { + if (node==null) return; + if( node instanceof InnerNode ) { + throw new IllegalArgumentException( + "Not allow to add an inner node: "+NodeBase.getPath(node)); + } + netlock.writeLock().lock(); + try { + Node rack = getNode(node.getNetworkLocation()); + if (rack != null && !(rack instanceof InnerNode)) { + throw new IllegalArgumentException("Unexpected data node " + + node.toString() + + " at an illegal network location"); + } + if (clusterMap.add(node)) { + LOG.info("Adding a new node: "+NodeBase.getPath(node)); + if (rack == null) { + numOfRacks++; + } + } + LOG.debug("NetworkTopology became:\n" + this.toString()); + } finally { + netlock.writeLock().unlock(); + } + } + + /** Remove a node + * Update node counter & rack counter if necessary + * @param node + * node to be removed + */ + public void remove(Node node) { + if (node==null) return; + if( node instanceof InnerNode ) { + throw new IllegalArgumentException( + "Not allow to remove an inner node: "+NodeBase.getPath(node)); + } + LOG.info("Removing a node: "+NodeBase.getPath(node)); + netlock.writeLock().lock(); + try { + if (clusterMap.remove(node)) { + InnerNode rack = (InnerNode)getNode(node.getNetworkLocation()); + if (rack == null) { + numOfRacks--; + } + } + LOG.debug("NetworkTopology became:\n" + this.toString()); + } finally { + netlock.writeLock().unlock(); + } + } + + /** Check if the tree contains node node + * + * @param node + * a node + * @return true if node is already in the tree; false otherwise + */ + public boolean contains(Node node) { + if (node == null) return false; + netlock.readLock().lock(); + try { + Node parent = node.getParent(); + for(int level=node.getLevel(); parent!=null&&level>0; + parent=parent.getParent(), level--) { + if (parent == clusterMap) + return true; + } + } finally { + netlock.readLock().unlock(); + } + return false; + } + + /** Given a string representation of a node, return its reference + * + * @param loc + * a path-like string representation of a node + * @return a reference to the node; null if the node is not in the tree + */ + public Node getNode(String loc) { + netlock.readLock().lock(); + try { + loc = NodeBase.normalize(loc); + if (!NodeBase.ROOT.equals(loc)) + loc = loc.substring(1); + return clusterMap.getLoc(loc); + } finally { + netlock.readLock().unlock(); + } + } + + /** Return the total number of racks */ + public int getNumOfRacks() { + netlock.readLock().lock(); + try { + return numOfRacks; + } finally { + netlock.readLock().unlock(); + } + } + + /** Return the total number of nodes */ + public int getNumOfLeaves() { + netlock.readLock().lock(); + try { + return clusterMap.getNumOfLeaves(); + } finally { + netlock.readLock().unlock(); + } + } + + /** Return the distance between two nodes + * It is assumed that the distance from one node to its parent is 1 + * The distance between two nodes is calculated by summing up their distances + * to their closest common ancestor. + * @param node1 one node + * @param node2 another node + * @return the distance between node1 and node2 + * node1 or node2 do not belong to the cluster + */ + public int getDistance(Node node1, Node node2) { + if (node1 == node2) { + return 0; + } + Node n1=node1, n2=node2; + int dis = 0; + netlock.readLock().lock(); + try { + int level1=node1.getLevel(), level2=node2.getLevel(); + while(n1!=null && level1>level2) { + n1 = n1.getParent(); + level1--; + dis++; + } + while(n2!=null && level2>level1) { + n2 = n2.getParent(); + level2--; + dis++; + } + while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) { + n1=n1.getParent(); + n2=n2.getParent(); + dis+=2; + } + } finally { + netlock.readLock().unlock(); + } + if (n1==null) { + LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1)); + return Integer.MAX_VALUE; + } + if (n2==null) { + LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2)); + return Integer.MAX_VALUE; + } + return dis+2; + } + + /** Check if two nodes are on the same rack + * @param node1 one node + * @param node2 another node + * @return true if node1 and node2 are on the same rack; false otherwise + * @exception IllegalArgumentException when either node1 or node2 is null, or + * node1 or node2 do not belong to the cluster + */ + public boolean isOnSameRack( Node node1, Node node2) { + if (node1 == null || node2 == null) { + return false; + } + + netlock.readLock().lock(); + try { + return node1.getParent()==node2.getParent(); + } finally { + netlock.readLock().unlock(); + } + } + + final private static Random r = new Random(); + /** randomly choose one node from scope + * if scope starts with ~, choose one from the all nodes except for the + * ones in scope; otherwise, choose one from scope + * @param scope range of nodes from which a node will be chosen + * @return the chosen node + */ + public Node chooseRandom(String scope) { + netlock.readLock().lock(); + try { + if (scope.startsWith("~")) { + return chooseRandom(NodeBase.ROOT, scope.substring(1)); + } else { + return chooseRandom(scope, null); + } + } finally { + netlock.readLock().unlock(); + } + } + + private Node chooseRandom(String scope, String excludedScope){ + if (excludedScope != null) { + if (scope.startsWith(excludedScope)) { + return null; + } + if (!excludedScope.startsWith(scope)) { + excludedScope = null; + } + } + Node node = getNode(scope); + if (!(node instanceof InnerNode)) { + return node; + } + InnerNode innerNode = (InnerNode)node; + int numOfDatanodes = innerNode.getNumOfLeaves(); + if (excludedScope == null) { + node = null; + } else { + node = getNode(excludedScope); + if (!(node instanceof InnerNode)) { + numOfDatanodes -= 1; + } else { + numOfDatanodes -= ((InnerNode)node).getNumOfLeaves(); + } + } + int leaveIndex = r.nextInt(numOfDatanodes); + return innerNode.getLeaf(leaveIndex, node); + } + + /** return the number of leaves in scope but not in excludedNodes + * if scope starts with ~, return the number of nodes that are not + * in scope and excludedNodes; + * @param scope a path string that may start with ~ + * @param excludedNodes a list of nodes + * @return number of available nodes + */ + public int countNumOfAvailableNodes(String scope, + Collection excludedNodes) { + boolean isExcluded=false; + if (scope.startsWith("~")) { + isExcluded=true; + scope=scope.substring(1); + } + scope = NodeBase.normalize(scope); + int count=0; // the number of nodes in both scope & excludedNodes + netlock.readLock().lock(); + try { + for(Node node:excludedNodes) { + if ((NodeBase.getPath(node)+NodeBase.PATH_SEPARATOR_STR). + startsWith(scope+NodeBase.PATH_SEPARATOR_STR)) { + count++; + } + } + Node n=getNode(scope); + int scopeNodeCount=1; + if (n instanceof InnerNode) { + scopeNodeCount=((InnerNode)n).getNumOfLeaves(); + } + if (isExcluded) { + return clusterMap.getNumOfLeaves()- + scopeNodeCount-excludedNodes.size()+count; + } else { + return scopeNodeCount-count; + } + } finally { + netlock.readLock().unlock(); + } + } + + /** convert a network tree to a string */ + public String toString() { + // print the number of racks + StringBuffer tree = new StringBuffer(); + tree.append("Number of racks: "); + tree.append(numOfRacks); + tree.append("\n"); + // print the number of leaves + int numOfLeaves = getNumOfLeaves(); + tree.append("Expected number of leaves:"); + tree.append(numOfLeaves); + tree.append("\n"); + // print nodes + for(int i=0; ireader + * It linearly scans the array, if a local node is found, swap it with + * the first element of the array. + * If a local rack node is found, swap it with the first element following + * the local node. + * If neither local node or local rack node is found, put a random replica + * location at position 0. + * It leaves the rest nodes untouched. + */ + public void pseudoSortByDistance( Node reader, Node[] nodes ) { + int tempIndex = 0; + if (reader != null ) { + int localRackNode = -1; + //scan the array to find the local node & local rack node + for(int i=0; i resolve(List names) { + List m = new ArrayList(names.size()); + + if (names.isEmpty()) { + return m; + } + + if (scriptName == null) { + for (int i = 0; i < names.size(); i++) { + m.add(NetworkTopology.DEFAULT_RACK); + } + return m; + } + + String output = runResolveCommand(names); + if (output != null) { + StringTokenizer allSwitchInfo = new StringTokenizer(output); + while (allSwitchInfo.hasMoreTokens()) { + String switchInfo = allSwitchInfo.nextToken(); + m.add(switchInfo); + } + + if (m.size() != names.size()) { + // invalid number of entries returned by the script + LOG.warn("Script " + scriptName + " returned " + + Integer.toString(m.size()) + " values when " + + Integer.toString(names.size()) + " were expected."); + return null; + } + } else { + // an error occurred. return null to signify this. + // (exn was already logged in runResolveCommand) + return null; + } + + return m; + } + + private String runResolveCommand(List args) { + int loopCount = 0; + if (args.size() == 0) { + return null; + } + StringBuffer allOutput = new StringBuffer(); + int numProcessed = 0; + if (maxArgs < MIN_ALLOWABLE_ARGS) { + LOG.warn("Invalid value " + Integer.toString(maxArgs) + + " for " + SCRIPT_ARG_COUNT_KEY + "; must be >= " + + Integer.toString(MIN_ALLOWABLE_ARGS)); + return null; + } + + while (numProcessed != args.size()) { + int start = maxArgs * loopCount; + List cmdList = new ArrayList(); + cmdList.add(scriptName); + for (numProcessed = start; numProcessed < (start + maxArgs) && + numProcessed < args.size(); numProcessed++) { + cmdList.add(args.get(numProcessed)); + } + File dir = null; + String userDir; + if ((userDir = System.getProperty("user.dir")) != null) { + dir = new File(userDir); + } + ShellCommandExecutor s = new ShellCommandExecutor( + cmdList.toArray(new String[0]), dir); + try { + s.execute(); + allOutput.append(s.getOutput() + " "); + } catch (Exception e) { + LOG.warn(StringUtils.stringifyException(e)); + return null; + } + loopCount++; + } + return allOutput.toString(); + } + } +} diff --git a/src/java/org/apache/hadoop/net/SocketIOWithTimeout.java b/src/java/org/apache/hadoop/net/SocketIOWithTimeout.java new file mode 100644 index 00000000000..f48d2d2db39 --- /dev/null +++ b/src/java/org/apache/hadoop/net/SocketIOWithTimeout.java @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.net; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.SocketAddress; +import java.net.SocketTimeoutException; +import java.nio.ByteBuffer; +import java.nio.channels.SelectableChannel; +import java.nio.channels.SelectionKey; +import java.nio.channels.Selector; +import java.nio.channels.SocketChannel; +import java.nio.channels.spi.SelectorProvider; +import java.util.Iterator; +import java.util.LinkedList; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.StringUtils; + +/** + * This supports input and output streams for a socket channels. + * These streams can have a timeout. + */ +abstract class SocketIOWithTimeout { + // This is intentionally package private. + + static final Log LOG = LogFactory.getLog(SocketIOWithTimeout.class); + + private SelectableChannel channel; + private long timeout; + private boolean closed = false; + + private static SelectorPool selector = new SelectorPool(); + + /* A timeout value of 0 implies wait for ever. + * We should have a value of timeout that implies zero wait.. i.e. + * read or write returns immediately. + * + * This will set channel to non-blocking. + */ + SocketIOWithTimeout(SelectableChannel channel, long timeout) + throws IOException { + checkChannelValidity(channel); + + this.channel = channel; + this.timeout = timeout; + // Set non-blocking + channel.configureBlocking(false); + } + + void close() { + closed = true; + } + + boolean isOpen() { + return !closed && channel.isOpen(); + } + + SelectableChannel getChannel() { + return channel; + } + + /** + * Utility function to check if channel is ok. + * Mainly to throw IOException instead of runtime exception + * in case of mismatch. This mismatch can occur for many runtime + * reasons. + */ + static void checkChannelValidity(Object channel) throws IOException { + if (channel == null) { + /* Most common reason is that original socket does not have a channel. + * So making this an IOException rather than a RuntimeException. + */ + throw new IOException("Channel is null. Check " + + "how the channel or socket is created."); + } + + if (!(channel instanceof SelectableChannel)) { + throw new IOException("Channel should be a SelectableChannel"); + } + } + + /** + * Performs actual IO operations. This is not expected to block. + * + * @param buf + * @return number of bytes (or some equivalent). 0 implies underlying + * channel is drained completely. We will wait if more IO is + * required. + * @throws IOException + */ + abstract int performIO(ByteBuffer buf) throws IOException; + + /** + * Performs one IO and returns number of bytes read or written. + * It waits up to the specified timeout. If the channel is + * not read before the timeout, SocketTimeoutException is thrown. + * + * @param buf buffer for IO + * @param ops Selection Ops used for waiting. Suggested values: + * SelectionKey.OP_READ while reading and SelectionKey.OP_WRITE while + * writing. + * + * @return number of bytes read or written. negative implies end of stream. + * @throws IOException + */ + int doIO(ByteBuffer buf, int ops) throws IOException { + + /* For now only one thread is allowed. If user want to read or write + * from multiple threads, multiple streams could be created. In that + * case multiple threads work as well as underlying channel supports it. + */ + if (!buf.hasRemaining()) { + throw new IllegalArgumentException("Buffer has no data left."); + //or should we just return 0? + } + + while (buf.hasRemaining()) { + if (closed) { + return -1; + } + + try { + int n = performIO(buf); + if (n != 0) { + // successful io or an error. + return n; + } + } catch (IOException e) { + if (!channel.isOpen()) { + closed = true; + } + throw e; + } + + //now wait for socket to be ready. + int count = 0; + try { + count = selector.select(channel, ops, timeout); + } catch (IOException e) { //unexpected IOException. + closed = true; + throw e; + } + + if (count == 0) { + throw new SocketTimeoutException(timeoutExceptionString(channel, + timeout, ops)); + } + // otherwise the socket should be ready for io. + } + + return 0; // does not reach here. + } + + /** + * The contract is similar to {@link SocketChannel#connect(SocketAddress)} + * with a timeout. + * + * @see SocketChannel#connect(SocketAddress) + * + * @param channel - this should be a {@link SelectableChannel} + * @param endpoint + * @throws IOException + */ + static void connect(SocketChannel channel, + SocketAddress endpoint, int timeout) throws IOException { + + boolean blockingOn = channel.isBlocking(); + if (blockingOn) { + channel.configureBlocking(false); + } + + try { + if (channel.connect(endpoint)) { + return; + } + + long timeoutLeft = timeout; + long endTime = (timeout > 0) ? (System.currentTimeMillis() + timeout): 0; + + while (true) { + // we might have to call finishConnect() more than once + // for some channels (with user level protocols) + + int ret = selector.select((SelectableChannel)channel, + SelectionKey.OP_CONNECT, timeoutLeft); + + if (ret > 0 && channel.finishConnect()) { + return; + } + + if (ret == 0 || + (timeout > 0 && + (timeoutLeft = (endTime - System.currentTimeMillis())) <= 0)) { + throw new SocketTimeoutException( + timeoutExceptionString(channel, timeout, + SelectionKey.OP_CONNECT)); + } + } + } catch (IOException e) { + // javadoc for SocketChannel.connect() says channel should be closed. + try { + channel.close(); + } catch (IOException ignored) {} + throw e; + } finally { + if (blockingOn && channel.isOpen()) { + channel.configureBlocking(true); + } + } + } + + /** + * This is similar to {@link #doIO(ByteBuffer, int)} except that it + * does not perform any I/O. It just waits for the channel to be ready + * for I/O as specified in ops. + * + * @param ops Selection Ops used for waiting + * + * @throws SocketTimeoutException + * if select on the channel times out. + * @throws IOException + * if any other I/O error occurs. + */ + void waitForIO(int ops) throws IOException { + + if (selector.select(channel, ops, timeout) == 0) { + throw new SocketTimeoutException(timeoutExceptionString(channel, timeout, + ops)); + } + } + + private static String timeoutExceptionString(SelectableChannel channel, + long timeout, int ops) { + + String waitingFor; + switch(ops) { + + case SelectionKey.OP_READ : + waitingFor = "read"; break; + + case SelectionKey.OP_WRITE : + waitingFor = "write"; break; + + case SelectionKey.OP_CONNECT : + waitingFor = "connect"; break; + + default : + waitingFor = "" + ops; + } + + return timeout + " millis timeout while " + + "waiting for channel to be ready for " + + waitingFor + ". ch : " + channel; + } + + /** + * This maintains a pool of selectors. These selectors are closed + * once they are idle (unused) for a few seconds. + */ + private static class SelectorPool { + + private static class SelectorInfo { + Selector selector; + long lastActivityTime; + LinkedList queue; + + void close() { + if (selector != null) { + try { + selector.close(); + } catch (IOException e) { + LOG.warn("Unexpected exception while closing selector : " + + StringUtils.stringifyException(e)); + } + } + } + } + + private static class ProviderInfo { + SelectorProvider provider; + LinkedList queue; // lifo + ProviderInfo next; + } + + private static final long IDLE_TIMEOUT = 10 * 1000; // 10 seconds. + + private ProviderInfo providerList = null; + + /** + * Waits on the channel with the given timeout using one of the + * cached selectors. It also removes any cached selectors that are + * idle for a few seconds. + * + * @param channel + * @param ops + * @param timeout + * @return + * @throws IOException + */ + int select(SelectableChannel channel, int ops, long timeout) + throws IOException { + + SelectorInfo info = get(channel); + + SelectionKey key = null; + int ret = 0; + + try { + while (true) { + long start = (timeout == 0) ? 0 : System.currentTimeMillis(); + + key = channel.register(info.selector, ops); + ret = info.selector.select(timeout); + + if (ret != 0) { + return ret; + } + + /* Sometimes select() returns 0 much before timeout for + * unknown reasons. So select again if required. + */ + if (timeout > 0) { + timeout -= System.currentTimeMillis() - start; + if (timeout <= 0) { + return 0; + } + } + + if (Thread.currentThread().isInterrupted()) { + throw new InterruptedIOException("Interruped while waiting for " + + "IO on channel " + channel + + ". " + timeout + + " millis timeout left."); + } + } + } finally { + if (key != null) { + key.cancel(); + } + + //clear the canceled key. + try { + info.selector.selectNow(); + } catch (IOException e) { + LOG.info("Unexpected Exception while clearing selector : " + + StringUtils.stringifyException(e)); + // don't put the selector back. + info.close(); + return ret; + } + + release(info); + } + } + + /** + * Takes one selector from end of LRU list of free selectors. + * If there are no selectors awailable, it creates a new selector. + * Also invokes trimIdleSelectors(). + * + * @param channel + * @return + * @throws IOException + */ + private synchronized SelectorInfo get(SelectableChannel channel) + throws IOException { + SelectorInfo selInfo = null; + + SelectorProvider provider = channel.provider(); + + // pick the list : rarely there is more than one provider in use. + ProviderInfo pList = providerList; + while (pList != null && pList.provider != provider) { + pList = pList.next; + } + if (pList == null) { + //LOG.info("Creating new ProviderInfo : " + provider.toString()); + pList = new ProviderInfo(); + pList.provider = provider; + pList.queue = new LinkedList(); + pList.next = providerList; + providerList = pList; + } + + LinkedList queue = pList.queue; + + if (queue.isEmpty()) { + Selector selector = provider.openSelector(); + selInfo = new SelectorInfo(); + selInfo.selector = selector; + selInfo.queue = queue; + } else { + selInfo = queue.removeLast(); + } + + trimIdleSelectors(System.currentTimeMillis()); + return selInfo; + } + + /** + * puts selector back at the end of LRU list of free selectos. + * Also invokes trimIdleSelectors(). + * + * @param info + */ + private synchronized void release(SelectorInfo info) { + long now = System.currentTimeMillis(); + trimIdleSelectors(now); + info.lastActivityTime = now; + info.queue.addLast(info); + } + + /** + * Closes selectors that are idle for IDLE_TIMEOUT (10 sec). It does not + * traverse the whole list, just over the one that have crossed + * the timeout. + */ + private void trimIdleSelectors(long now) { + long cutoff = now - IDLE_TIMEOUT; + + for(ProviderInfo pList=providerList; pList != null; pList=pList.next) { + if (pList.queue.isEmpty()) { + continue; + } + for(Iterator it = pList.queue.iterator(); it.hasNext();) { + SelectorInfo info = it.next(); + if (info.lastActivityTime > cutoff) { + break; + } + it.remove(); + info.close(); + } + } + } + } +} diff --git a/src/java/org/apache/hadoop/net/SocketInputStream.java b/src/java/org/apache/hadoop/net/SocketInputStream.java new file mode 100644 index 00000000000..2568ba9c2bc --- /dev/null +++ b/src/java/org/apache/hadoop/net/SocketInputStream.java @@ -0,0 +1,170 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.net; + +import java.io.IOException; +import java.io.InputStream; +import java.net.Socket; +import java.net.SocketTimeoutException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.SelectableChannel; +import java.nio.channels.SelectionKey; + + +/** + * This implements an input stream that can have a timeout while reading. + * This sets non-blocking flag on the socket channel. + * So after create this object, read() on + * {@link Socket#getInputStream()} and write() on + * {@link Socket#getOutputStream()} for the associated socket will throw + * IllegalBlockingModeException. + * Please use {@link SocketOutputStream} for writing. + */ +public class SocketInputStream extends InputStream + implements ReadableByteChannel { + + private Reader reader; + + private static class Reader extends SocketIOWithTimeout { + ReadableByteChannel channel; + + Reader(ReadableByteChannel channel, long timeout) throws IOException { + super((SelectableChannel)channel, timeout); + this.channel = channel; + } + + int performIO(ByteBuffer buf) throws IOException { + return channel.read(buf); + } + } + + /** + * Create a new input stream with the given timeout. If the timeout + * is zero, it will be treated as infinite timeout. The socket's + * channel will be configured to be non-blocking. + * + * @param channel + * Channel for reading, should also be a {@link SelectableChannel}. + * The channel will be configured to be non-blocking. + * @param timeout timeout in milliseconds. must not be negative. + * @throws IOException + */ + public SocketInputStream(ReadableByteChannel channel, long timeout) + throws IOException { + SocketIOWithTimeout.checkChannelValidity(channel); + reader = new Reader(channel, timeout); + } + + /** + * Same as SocketInputStream(socket.getChannel(), timeout):

+ * + * Create a new input stream with the given timeout. If the timeout + * is zero, it will be treated as infinite timeout. The socket's + * channel will be configured to be non-blocking. + * + * @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + * + * @param socket should have a channel associated with it. + * @param timeout timeout timeout in milliseconds. must not be negative. + * @throws IOException + */ + public SocketInputStream(Socket socket, long timeout) + throws IOException { + this(socket.getChannel(), timeout); + } + + /** + * Same as SocketInputStream(socket.getChannel(), socket.getSoTimeout()) + * :

+ * + * Create a new input stream with the given timeout. If the timeout + * is zero, it will be treated as infinite timeout. The socket's + * channel will be configured to be non-blocking. + * @see SocketInputStream#SocketInputStream(ReadableByteChannel, long) + * + * @param socket should have a channel associated with it. + * @throws IOException + */ + public SocketInputStream(Socket socket) throws IOException { + this(socket.getChannel(), socket.getSoTimeout()); + } + + @Override + public int read() throws IOException { + /* Allocation can be removed if required. + * probably no need to optimize or encourage single byte read. + */ + byte[] buf = new byte[1]; + int ret = read(buf, 0, 1); + if (ret > 0) { + return (byte)buf[0]; + } + if (ret != -1) { + // unexpected + throw new IOException("Could not read from stream"); + } + return ret; + } + + public int read(byte[] b, int off, int len) throws IOException { + return read(ByteBuffer.wrap(b, off, len)); + } + + public synchronized void close() throws IOException { + /* close the channel since Socket.getInputStream().close() + * closes the socket. + */ + reader.channel.close(); + reader.close(); + } + + /** + * Returns underlying channel used by inputstream. + * This is useful in certain cases like channel for + * {@link FileChannel#transferFrom(ReadableByteChannel, long, long)}. + */ + public ReadableByteChannel getChannel() { + return reader.channel; + } + + //ReadableByteChannel interface + + public boolean isOpen() { + return reader.isOpen(); + } + + public int read(ByteBuffer dst) throws IOException { + return reader.doIO(dst, SelectionKey.OP_READ); + } + + /** + * waits for the underlying channel to be ready for reading. + * The timeout specified for this stream applies to this wait. + * + * @throws SocketTimeoutException + * if select on the channel times out. + * @throws IOException + * if any other I/O error occurs. + */ + public void waitForReadable() throws IOException { + reader.waitForIO(SelectionKey.OP_READ); + } +} diff --git a/src/java/org/apache/hadoop/net/SocketOutputStream.java b/src/java/org/apache/hadoop/net/SocketOutputStream.java new file mode 100644 index 00000000000..fa4e8500b5a --- /dev/null +++ b/src/java/org/apache/hadoop/net/SocketOutputStream.java @@ -0,0 +1,219 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.net; + +import java.io.EOFException; +import java.io.IOException; +import java.io.OutputStream; +import java.net.Socket; +import java.net.SocketTimeoutException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.SelectableChannel; +import java.nio.channels.SelectionKey; +import java.nio.channels.WritableByteChannel; + +/** + * This implements an output stream that can have a timeout while writing. + * This sets non-blocking flag on the socket channel. + * So after creating this object , read() on + * {@link Socket#getInputStream()} and write() on + * {@link Socket#getOutputStream()} on the associated socket will throw + * llegalBlockingModeException. + * Please use {@link SocketInputStream} for reading. + */ +public class SocketOutputStream extends OutputStream + implements WritableByteChannel { + + private Writer writer; + + private static class Writer extends SocketIOWithTimeout { + WritableByteChannel channel; + + Writer(WritableByteChannel channel, long timeout) throws IOException { + super((SelectableChannel)channel, timeout); + this.channel = channel; + } + + int performIO(ByteBuffer buf) throws IOException { + return channel.write(buf); + } + } + + /** + * Create a new ouput stream with the given timeout. If the timeout + * is zero, it will be treated as infinite timeout. The socket's + * channel will be configured to be non-blocking. + * + * @param channel + * Channel for writing, should also be a {@link SelectableChannel}. + * The channel will be configured to be non-blocking. + * @param timeout timeout in milliseconds. must not be negative. + * @throws IOException + */ + public SocketOutputStream(WritableByteChannel channel, long timeout) + throws IOException { + SocketIOWithTimeout.checkChannelValidity(channel); + writer = new Writer(channel, timeout); + } + + /** + * Same as SocketOutputStream(socket.getChannel(), timeout):

+ * + * Create a new ouput stream with the given timeout. If the timeout + * is zero, it will be treated as infinite timeout. The socket's + * channel will be configured to be non-blocking. + * + * @see SocketOutputStream#SocketOutputStream(WritableByteChannel, long) + * + * @param socket should have a channel associated with it. + * @param timeout timeout timeout in milliseconds. must not be negative. + * @throws IOException + */ + public SocketOutputStream(Socket socket, long timeout) + throws IOException { + this(socket.getChannel(), timeout); + } + + public void write(int b) throws IOException { + /* If we need to, we can optimize this allocation. + * probably no need to optimize or encourage single byte writes. + */ + byte[] buf = new byte[1]; + buf[0] = (byte)b; + write(buf, 0, 1); + } + + public void write(byte[] b, int off, int len) throws IOException { + ByteBuffer buf = ByteBuffer.wrap(b, off, len); + while (buf.hasRemaining()) { + try { + if (write(buf) < 0) { + throw new IOException("The stream is closed"); + } + } catch (IOException e) { + /* Unlike read, write can not inform user of partial writes. + * So will close this if there was a partial write. + */ + if (buf.capacity() > buf.remaining()) { + writer.close(); + } + throw e; + } + } + } + + public synchronized void close() throws IOException { + /* close the channel since Socket.getOuputStream().close() + * closes the socket. + */ + writer.channel.close(); + writer.close(); + } + + /** + * Returns underlying channel used by this stream. + * This is useful in certain cases like channel for + * {@link FileChannel#transferTo(long, long, WritableByteChannel)} + */ + public WritableByteChannel getChannel() { + return writer.channel; + } + + //WritableByteChannle interface + + public boolean isOpen() { + return writer.isOpen(); + } + + public int write(ByteBuffer src) throws IOException { + return writer.doIO(src, SelectionKey.OP_WRITE); + } + + /** + * waits for the underlying channel to be ready for writing. + * The timeout specified for this stream applies to this wait. + * + * @throws SocketTimeoutException + * if select on the channel times out. + * @throws IOException + * if any other I/O error occurs. + */ + public void waitForWritable() throws IOException { + writer.waitForIO(SelectionKey.OP_WRITE); + } + + /** + * Transfers data from FileChannel using + * {@link FileChannel#transferTo(long, long, WritableByteChannel)}. + * + * Similar to readFully(), this waits till requested amount of + * data is transfered. + * + * @param fileCh FileChannel to transfer data from. + * @param position position within the channel where the transfer begins + * @param count number of bytes to transfer. + * + * @throws EOFException + * If end of input file is reached before requested number of + * bytes are transfered. + * + * @throws SocketTimeoutException + * If this channel blocks transfer longer than timeout for + * this stream. + * + * @throws IOException Includes any exception thrown by + * {@link FileChannel#transferTo(long, long, WritableByteChannel)}. + */ + public void transferToFully(FileChannel fileCh, long position, int count) + throws IOException { + + while (count > 0) { + /* + * Ideally we should wait after transferTo returns 0. But because of + * a bug in JRE on Linux (http://bugs.sun.com/view_bug.do?bug_id=5103988), + * which throws an exception instead of returning 0, we wait for the + * channel to be writable before writing to it. If you ever see + * IOException with message "Resource temporarily unavailable" + * thrown here, please let us know. + * + * Once we move to JAVA SE 7, wait should be moved to correct place. + */ + waitForWritable(); + int nTransfered = (int) fileCh.transferTo(position, count, getChannel()); + + if (nTransfered == 0) { + //check if end of file is reached. + if (position >= fileCh.size()) { + throw new EOFException("EOF Reached. file size is " + fileCh.size() + + " and " + count + " more bytes left to be " + + "transfered."); + } + //otherwise assume the socket is full. + //waitForWritable(); // see comment above. + } else if (nTransfered < 0) { + throw new IOException("Unexpected return of " + nTransfered + + " from transferTo()"); + } else { + position += nTransfered; + count -= nTransfered; + } + } + } +} diff --git a/src/java/org/apache/hadoop/net/SocksSocketFactory.java b/src/java/org/apache/hadoop/net/SocksSocketFactory.java new file mode 100644 index 00000000000..19c89210da9 --- /dev/null +++ b/src/java/org/apache/hadoop/net/SocksSocketFactory.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.net; + +import java.io.IOException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.Proxy; +import java.net.Socket; +import java.net.UnknownHostException; + +import javax.net.SocketFactory; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; + +/** + * Specialized SocketFactory to create sockets with a SOCKS proxy + */ +public class SocksSocketFactory extends SocketFactory implements + Configurable { + + private Configuration conf; + + private Proxy proxy; + + /** + * Default empty constructor (for use with the reflection API). + */ + public SocksSocketFactory() { + this.proxy = Proxy.NO_PROXY; + } + + /** + * Constructor with a supplied Proxy + * + * @param proxy the proxy to use to create sockets + */ + public SocksSocketFactory(Proxy proxy) { + this.proxy = proxy; + } + + /* @inheritDoc */ + @Override + public Socket createSocket() throws IOException { + + return new Socket(proxy); + } + + /* @inheritDoc */ + @Override + public Socket createSocket(InetAddress addr, int port) throws IOException { + + Socket socket = createSocket(); + socket.connect(new InetSocketAddress(addr, port)); + return socket; + } + + /* @inheritDoc */ + @Override + public Socket createSocket(InetAddress addr, int port, + InetAddress localHostAddr, int localPort) throws IOException { + + Socket socket = createSocket(); + socket.bind(new InetSocketAddress(localHostAddr, localPort)); + socket.connect(new InetSocketAddress(addr, port)); + return socket; + } + + /* @inheritDoc */ + @Override + public Socket createSocket(String host, int port) throws IOException, + UnknownHostException { + + Socket socket = createSocket(); + socket.connect(new InetSocketAddress(host, port)); + return socket; + } + + /* @inheritDoc */ + @Override + public Socket createSocket(String host, int port, + InetAddress localHostAddr, int localPort) throws IOException, + UnknownHostException { + + Socket socket = createSocket(); + socket.bind(new InetSocketAddress(localHostAddr, localPort)); + socket.connect(new InetSocketAddress(host, port)); + return socket; + } + + /* @inheritDoc */ + @Override + public int hashCode() { + return proxy.hashCode(); + } + + /* @inheritDoc */ + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (!(obj instanceof SocksSocketFactory)) + return false; + final SocksSocketFactory other = (SocksSocketFactory) obj; + if (proxy == null) { + if (other.proxy != null) + return false; + } else if (!proxy.equals(other.proxy)) + return false; + return true; + } + + /* @inheritDoc */ + public Configuration getConf() { + return this.conf; + } + + /* @inheritDoc */ + public void setConf(Configuration conf) { + this.conf = conf; + String proxyStr = conf.get("hadoop.socks.server"); + if ((proxyStr != null) && (proxyStr.length() > 0)) { + setProxy(proxyStr); + } + } + + /** + * Set the proxy of this socket factory as described in the string + * parameter + * + * @param proxyStr the proxy address using the format "host:port" + */ + private void setProxy(String proxyStr) { + String[] strs = proxyStr.split(":", 2); + if (strs.length != 2) + throw new RuntimeException("Bad SOCKS proxy parameter: " + proxyStr); + String host = strs[0]; + int port = Integer.parseInt(strs[1]); + this.proxy = + new Proxy(Proxy.Type.SOCKS, InetSocketAddress.createUnresolved(host, + port)); + } +} diff --git a/src/java/org/apache/hadoop/net/StandardSocketFactory.java b/src/java/org/apache/hadoop/net/StandardSocketFactory.java new file mode 100644 index 00000000000..b95258557e9 --- /dev/null +++ b/src/java/org/apache/hadoop/net/StandardSocketFactory.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.net; + +import java.io.IOException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.Socket; +import java.net.UnknownHostException; +import java.nio.channels.SocketChannel; + +import javax.net.SocketFactory; + +/** + * Specialized SocketFactory to create sockets with a SOCKS proxy + */ +public class StandardSocketFactory extends SocketFactory { + + /** + * Default empty constructor (for use with the reflection API). + */ + public StandardSocketFactory() { + } + + /* @inheritDoc */ + @Override + public Socket createSocket() throws IOException { + /* + * NOTE: This returns an NIO socket so that it has an associated + * SocketChannel. As of now, this unfortunately makes streams returned + * by Socket.getInputStream() and Socket.getOutputStream() unusable + * (because a blocking read on input stream blocks write on output stream + * and vice versa). + * + * So users of these socket factories should use + * NetUtils.getInputStream(socket) and + * NetUtils.getOutputStream(socket) instead. + * + * A solution for hiding from this from user is to write a + * 'FilterSocket' on the lines of FilterInputStream and extend it by + * overriding getInputStream() and getOutputStream(). + */ + return SocketChannel.open().socket(); + } + + /* @inheritDoc */ + @Override + public Socket createSocket(InetAddress addr, int port) throws IOException { + + Socket socket = createSocket(); + socket.connect(new InetSocketAddress(addr, port)); + return socket; + } + + /* @inheritDoc */ + @Override + public Socket createSocket(InetAddress addr, int port, + InetAddress localHostAddr, int localPort) throws IOException { + + Socket socket = createSocket(); + socket.bind(new InetSocketAddress(localHostAddr, localPort)); + socket.connect(new InetSocketAddress(addr, port)); + return socket; + } + + /* @inheritDoc */ + @Override + public Socket createSocket(String host, int port) throws IOException, + UnknownHostException { + + Socket socket = createSocket(); + socket.connect(new InetSocketAddress(host, port)); + return socket; + } + + /* @inheritDoc */ + @Override + public Socket createSocket(String host, int port, + InetAddress localHostAddr, int localPort) throws IOException, + UnknownHostException { + + Socket socket = createSocket(); + socket.bind(new InetSocketAddress(localHostAddr, localPort)); + socket.connect(new InetSocketAddress(host, port)); + return socket; + } + + /* @inheritDoc */ + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (!(obj instanceof StandardSocketFactory)) + return false; + return true; + } + + /* @inheritDoc */ + @Override + public int hashCode() { + // Dummy hash code (to make find bugs happy) + return 47; + } + +} diff --git a/src/java/org/apache/hadoop/net/package.html b/src/java/org/apache/hadoop/net/package.html new file mode 100644 index 00000000000..b4e5b5dbdc9 --- /dev/null +++ b/src/java/org/apache/hadoop/net/package.html @@ -0,0 +1,23 @@ + + + + + +Network-related classes. + + diff --git a/src/java/org/apache/hadoop/record/BinaryRecordInput.java b/src/java/org/apache/hadoop/record/BinaryRecordInput.java new file mode 100644 index 00000000000..b750df9aed5 --- /dev/null +++ b/src/java/org/apache/hadoop/record/BinaryRecordInput.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.DataInput; +import java.io.IOException; +import java.io.DataInputStream; +import java.io.InputStream; + +/** + */ +public class BinaryRecordInput implements RecordInput { + + private DataInput in; + + static private class BinaryIndex implements Index { + private int nelems; + private BinaryIndex(int nelems) { + this.nelems = nelems; + } + public boolean done() { + return (nelems <= 0); + } + public void incr() { + nelems--; + } + } + + private BinaryRecordInput() {} + + private void setDataInput(DataInput inp) { + this.in = inp; + } + + private static ThreadLocal bIn = new ThreadLocal() { + protected synchronized Object initialValue() { + return new BinaryRecordInput(); + } + }; + + /** + * Get a thread-local record input for the supplied DataInput. + * @param inp data input stream + * @return binary record input corresponding to the supplied DataInput. + */ + public static BinaryRecordInput get(DataInput inp) { + BinaryRecordInput bin = (BinaryRecordInput) bIn.get(); + bin.setDataInput(inp); + return bin; + } + + /** Creates a new instance of BinaryRecordInput */ + public BinaryRecordInput(InputStream strm) { + this.in = new DataInputStream(strm); + } + + /** Creates a new instance of BinaryRecordInput */ + public BinaryRecordInput(DataInput din) { + this.in = din; + } + + public byte readByte(final String tag) throws IOException { + return in.readByte(); + } + + public boolean readBool(final String tag) throws IOException { + return in.readBoolean(); + } + + public int readInt(final String tag) throws IOException { + return Utils.readVInt(in); + } + + public long readLong(final String tag) throws IOException { + return Utils.readVLong(in); + } + + public float readFloat(final String tag) throws IOException { + return in.readFloat(); + } + + public double readDouble(final String tag) throws IOException { + return in.readDouble(); + } + + public String readString(final String tag) throws IOException { + return Utils.fromBinaryString(in); + } + + public Buffer readBuffer(final String tag) throws IOException { + final int len = Utils.readVInt(in); + final byte[] barr = new byte[len]; + in.readFully(barr); + return new Buffer(barr); + } + + public void startRecord(final String tag) throws IOException { + // no-op + } + + public void endRecord(final String tag) throws IOException { + // no-op + } + + public Index startVector(final String tag) throws IOException { + return new BinaryIndex(readInt(tag)); + } + + public void endVector(final String tag) throws IOException { + // no-op + } + + public Index startMap(final String tag) throws IOException { + return new BinaryIndex(readInt(tag)); + } + + public void endMap(final String tag) throws IOException { + // no-op + } +} diff --git a/src/java/org/apache/hadoop/record/BinaryRecordOutput.java b/src/java/org/apache/hadoop/record/BinaryRecordOutput.java new file mode 100644 index 00000000000..a0586534590 --- /dev/null +++ b/src/java/org/apache/hadoop/record/BinaryRecordOutput.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.IOException; +import java.util.TreeMap; +import java.util.ArrayList; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.OutputStream; + +/** + */ +public class BinaryRecordOutput implements RecordOutput { + + private DataOutput out; + + private BinaryRecordOutput() {} + + private void setDataOutput(DataOutput out) { + this.out = out; + } + + private static ThreadLocal bOut = new ThreadLocal() { + protected synchronized Object initialValue() { + return new BinaryRecordOutput(); + } + }; + + /** + * Get a thread-local record output for the supplied DataOutput. + * @param out data output stream + * @return binary record output corresponding to the supplied DataOutput. + */ + public static BinaryRecordOutput get(DataOutput out) { + BinaryRecordOutput bout = (BinaryRecordOutput) bOut.get(); + bout.setDataOutput(out); + return bout; + } + + /** Creates a new instance of BinaryRecordOutput */ + public BinaryRecordOutput(OutputStream out) { + this.out = new DataOutputStream(out); + } + + /** Creates a new instance of BinaryRecordOutput */ + public BinaryRecordOutput(DataOutput out) { + this.out = out; + } + + + public void writeByte(byte b, String tag) throws IOException { + out.writeByte(b); + } + + public void writeBool(boolean b, String tag) throws IOException { + out.writeBoolean(b); + } + + public void writeInt(int i, String tag) throws IOException { + Utils.writeVInt(out, i); + } + + public void writeLong(long l, String tag) throws IOException { + Utils.writeVLong(out, l); + } + + public void writeFloat(float f, String tag) throws IOException { + out.writeFloat(f); + } + + public void writeDouble(double d, String tag) throws IOException { + out.writeDouble(d); + } + + public void writeString(String s, String tag) throws IOException { + Utils.toBinaryString(out, s); + } + + public void writeBuffer(Buffer buf, String tag) + throws IOException { + byte[] barr = buf.get(); + int len = buf.getCount(); + Utils.writeVInt(out, len); + out.write(barr, 0, len); + } + + public void startRecord(Record r, String tag) throws IOException {} + + public void endRecord(Record r, String tag) throws IOException {} + + public void startVector(ArrayList v, String tag) throws IOException { + writeInt(v.size(), tag); + } + + public void endVector(ArrayList v, String tag) throws IOException {} + + public void startMap(TreeMap v, String tag) throws IOException { + writeInt(v.size(), tag); + } + + public void endMap(TreeMap v, String tag) throws IOException {} + +} diff --git a/src/java/org/apache/hadoop/record/Buffer.java b/src/java/org/apache/hadoop/record/Buffer.java new file mode 100644 index 00000000000..d0fa95d0b48 --- /dev/null +++ b/src/java/org/apache/hadoop/record/Buffer.java @@ -0,0 +1,246 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.UnsupportedEncodingException; + +/** + * A byte sequence that is used as a Java native type for buffer. + * It is resizable and distinguishes between the count of the seqeunce and + * the current capacity. + * + */ +public class Buffer implements Comparable, Cloneable { + /** Number of valid bytes in this.bytes. */ + private int count; + /** Backing store for Buffer. */ + private byte[] bytes = null; + + /** + * Create a zero-count sequence. + */ + public Buffer() { + this.count = 0; + } + + /** + * Create a Buffer using the byte array as the initial value. + * + * @param bytes This array becomes the backing storage for the object. + */ + public Buffer(byte[] bytes) { + this.bytes = bytes; + this.count = (bytes == null) ? 0 : bytes.length; + } + + /** + * Create a Buffer using the byte range as the initial value. + * + * @param bytes Copy of this array becomes the backing storage for the object. + * @param offset offset into byte array + * @param length length of data + */ + public Buffer(byte[] bytes, int offset, int length) { + copy(bytes, offset, length); + } + + + /** + * Use the specified bytes array as underlying sequence. + * + * @param bytes byte sequence + */ + public void set(byte[] bytes) { + this.count = (bytes == null) ? 0 : bytes.length; + this.bytes = bytes; + } + + /** + * Copy the specified byte array to the Buffer. Replaces the current buffer. + * + * @param bytes byte array to be assigned + * @param offset offset into byte array + * @param length length of data + */ + public final void copy(byte[] bytes, int offset, int length) { + if (this.bytes == null || this.bytes.length < length) { + this.bytes = new byte[length]; + } + System.arraycopy(bytes, offset, this.bytes, 0, length); + this.count = length; + } + + /** + * Get the data from the Buffer. + * + * @return The data is only valid between 0 and getCount() - 1. + */ + public byte[] get() { + if (bytes == null) { + bytes = new byte[0]; + } + return bytes; + } + + /** + * Get the current count of the buffer. + */ + public int getCount() { + return count; + } + + /** + * Get the capacity, which is the maximum count that could handled without + * resizing the backing storage. + * + * @return The number of bytes + */ + public int getCapacity() { + return this.get().length; + } + + /** + * Change the capacity of the backing storage. + * The data is preserved if newCapacity >= getCount(). + * @param newCapacity The new capacity in bytes. + */ + public void setCapacity(int newCapacity) { + if (newCapacity < 0) { + throw new IllegalArgumentException("Invalid capacity argument "+newCapacity); + } + if (newCapacity == 0) { + this.bytes = null; + this.count = 0; + return; + } + if (newCapacity != getCapacity()) { + byte[] data = new byte[newCapacity]; + if (newCapacity < count) { + count = newCapacity; + } + if (count != 0) { + System.arraycopy(this.get(), 0, data, 0, count); + } + bytes = data; + } + } + + /** + * Reset the buffer to 0 size + */ + public void reset() { + setCapacity(0); + } + + /** + * Change the capacity of the backing store to be the same as the current + * count of buffer. + */ + public void truncate() { + setCapacity(count); + } + + /** + * Append specified bytes to the buffer. + * + * @param bytes byte array to be appended + * @param offset offset into byte array + * @param length length of data + + */ + public void append(byte[] bytes, int offset, int length) { + setCapacity(count+length); + System.arraycopy(bytes, offset, this.get(), count, length); + count = count + length; + } + + /** + * Append specified bytes to the buffer + * + * @param bytes byte array to be appended + */ + public void append(byte[] bytes) { + append(bytes, 0, bytes.length); + } + + // inherit javadoc + public int hashCode() { + int hash = 1; + byte[] b = this.get(); + for (int i = 0; i < count; i++) + hash = (31 * hash) + (int)b[i]; + return hash; + } + + /** + * Define the sort order of the Buffer. + * + * @param other The other buffer + * @return Positive if this is bigger than other, 0 if they are equal, and + * negative if this is smaller than other. + */ + public int compareTo(Object other) { + Buffer right = ((Buffer) other); + byte[] lb = this.get(); + byte[] rb = right.get(); + for (int i = 0; i < count && i < right.count; i++) { + int a = (lb[i] & 0xff); + int b = (rb[i] & 0xff); + if (a != b) { + return a - b; + } + } + return count - right.count; + } + + // inherit javadoc + public boolean equals(Object other) { + if (other instanceof Buffer && this != other) { + return compareTo(other) == 0; + } + return (this == other); + } + + // inheric javadoc + public String toString() { + StringBuffer sb = new StringBuffer(2*count); + for(int idx = 0; idx < count; idx++) { + sb.append(Character.forDigit((bytes[idx] & 0xF0) >> 4, 16)); + sb.append(Character.forDigit(bytes[idx] & 0x0F, 16)); + } + return sb.toString(); + } + + /** + * Convert the byte buffer to a string an specific character encoding + * + * @param charsetName Valid Java Character Set Name + */ + public String toString(String charsetName) + throws UnsupportedEncodingException { + return new String(this.get(), 0, this.getCount(), charsetName); + } + + // inherit javadoc + public Object clone() throws CloneNotSupportedException { + Buffer result = (Buffer) super.clone(); + result.copy(this.get(), 0, this.getCount()); + return result; + } +} diff --git a/src/java/org/apache/hadoop/record/CsvRecordInput.java b/src/java/org/apache/hadoop/record/CsvRecordInput.java new file mode 100644 index 00000000000..e9c538d28c6 --- /dev/null +++ b/src/java/org/apache/hadoop/record/CsvRecordInput.java @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.InputStreamReader; +import java.io.InputStream; +import java.io.IOException; +import java.io.PushbackReader; +import java.io.UnsupportedEncodingException; + +/** + */ +public class CsvRecordInput implements RecordInput { + + private PushbackReader stream; + + private class CsvIndex implements Index { + public boolean done() { + char c = '\0'; + try { + c = (char) stream.read(); + stream.unread(c); + } catch (IOException ex) { + } + return (c == '}') ? true : false; + } + public void incr() {} + } + + private void throwExceptionOnError(String tag) throws IOException { + throw new IOException("Error deserializing "+tag); + } + + private String readField(String tag) throws IOException { + try { + StringBuffer buf = new StringBuffer(); + while (true) { + char c = (char) stream.read(); + switch (c) { + case ',': + return buf.toString(); + case '}': + case '\n': + case '\r': + stream.unread(c); + return buf.toString(); + default: + buf.append(c); + } + } + } catch (IOException ex) { + throw new IOException("Error reading "+tag); + } + } + + /** Creates a new instance of CsvRecordInput */ + public CsvRecordInput(InputStream in) { + try { + stream = new PushbackReader(new InputStreamReader(in, "UTF-8")); + } catch (UnsupportedEncodingException ex) { + throw new RuntimeException(ex); + } + } + + public byte readByte(String tag) throws IOException { + return (byte) readLong(tag); + } + + public boolean readBool(String tag) throws IOException { + String sval = readField(tag); + return "T".equals(sval) ? true : false; + } + + public int readInt(String tag) throws IOException { + return (int) readLong(tag); + } + + public long readLong(String tag) throws IOException { + String sval = readField(tag); + try { + long lval = Long.parseLong(sval); + return lval; + } catch (NumberFormatException ex) { + throw new IOException("Error deserializing "+tag); + } + } + + public float readFloat(String tag) throws IOException { + return (float) readDouble(tag); + } + + public double readDouble(String tag) throws IOException { + String sval = readField(tag); + try { + double dval = Double.parseDouble(sval); + return dval; + } catch (NumberFormatException ex) { + throw new IOException("Error deserializing "+tag); + } + } + + public String readString(String tag) throws IOException { + String sval = readField(tag); + return Utils.fromCSVString(sval); + } + + public Buffer readBuffer(String tag) throws IOException { + String sval = readField(tag); + return Utils.fromCSVBuffer(sval); + } + + public void startRecord(String tag) throws IOException { + if (tag != null && !"".equals(tag)) { + char c1 = (char) stream.read(); + char c2 = (char) stream.read(); + if (c1 != 's' || c2 != '{') { + throw new IOException("Error deserializing "+tag); + } + } + } + + public void endRecord(String tag) throws IOException { + char c = (char) stream.read(); + if (tag == null || "".equals(tag)) { + if (c != '\n' && c != '\r') { + throw new IOException("Error deserializing record."); + } else { + return; + } + } + + if (c != '}') { + throw new IOException("Error deserializing "+tag); + } + c = (char) stream.read(); + if (c != ',') { + stream.unread(c); + } + + return; + } + + public Index startVector(String tag) throws IOException { + char c1 = (char) stream.read(); + char c2 = (char) stream.read(); + if (c1 != 'v' || c2 != '{') { + throw new IOException("Error deserializing "+tag); + } + return new CsvIndex(); + } + + public void endVector(String tag) throws IOException { + char c = (char) stream.read(); + if (c != '}') { + throw new IOException("Error deserializing "+tag); + } + c = (char) stream.read(); + if (c != ',') { + stream.unread(c); + } + return; + } + + public Index startMap(String tag) throws IOException { + char c1 = (char) stream.read(); + char c2 = (char) stream.read(); + if (c1 != 'm' || c2 != '{') { + throw new IOException("Error deserializing "+tag); + } + return new CsvIndex(); + } + + public void endMap(String tag) throws IOException { + char c = (char) stream.read(); + if (c != '}') { + throw new IOException("Error deserializing "+tag); + } + c = (char) stream.read(); + if (c != ',') { + stream.unread(c); + } + return; + } +} diff --git a/src/java/org/apache/hadoop/record/CsvRecordOutput.java b/src/java/org/apache/hadoop/record/CsvRecordOutput.java new file mode 100644 index 00000000000..f2c6be6f4f9 --- /dev/null +++ b/src/java/org/apache/hadoop/record/CsvRecordOutput.java @@ -0,0 +1,140 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.IOException; +import java.util.TreeMap; +import java.util.ArrayList; +import java.io.PrintStream; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; + +/** + */ +public class CsvRecordOutput implements RecordOutput { + + private PrintStream stream; + private boolean isFirst = true; + + private void throwExceptionOnError(String tag) throws IOException { + if (stream.checkError()) { + throw new IOException("Error serializing "+tag); + } + } + + private void printCommaUnlessFirst() { + if (!isFirst) { + stream.print(","); + } + isFirst = false; + } + + /** Creates a new instance of CsvRecordOutput */ + public CsvRecordOutput(OutputStream out) { + try { + stream = new PrintStream(out, true, "UTF-8"); + } catch (UnsupportedEncodingException ex) { + throw new RuntimeException(ex); + } + } + + public void writeByte(byte b, String tag) throws IOException { + writeLong((long)b, tag); + } + + public void writeBool(boolean b, String tag) throws IOException { + printCommaUnlessFirst(); + String val = b ? "T" : "F"; + stream.print(val); + throwExceptionOnError(tag); + } + + public void writeInt(int i, String tag) throws IOException { + writeLong((long)i, tag); + } + + public void writeLong(long l, String tag) throws IOException { + printCommaUnlessFirst(); + stream.print(l); + throwExceptionOnError(tag); + } + + public void writeFloat(float f, String tag) throws IOException { + writeDouble((double)f, tag); + } + + public void writeDouble(double d, String tag) throws IOException { + printCommaUnlessFirst(); + stream.print(d); + throwExceptionOnError(tag); + } + + public void writeString(String s, String tag) throws IOException { + printCommaUnlessFirst(); + stream.print(Utils.toCSVString(s)); + throwExceptionOnError(tag); + } + + public void writeBuffer(Buffer buf, String tag) + throws IOException { + printCommaUnlessFirst(); + stream.print(Utils.toCSVBuffer(buf)); + throwExceptionOnError(tag); + } + + public void startRecord(Record r, String tag) throws IOException { + if (tag != null && !"".equals(tag)) { + printCommaUnlessFirst(); + stream.print("s{"); + isFirst = true; + } + } + + public void endRecord(Record r, String tag) throws IOException { + if (tag == null || "".equals(tag)) { + stream.print("\n"); + isFirst = true; + } else { + stream.print("}"); + isFirst = false; + } + } + + public void startVector(ArrayList v, String tag) throws IOException { + printCommaUnlessFirst(); + stream.print("v{"); + isFirst = true; + } + + public void endVector(ArrayList v, String tag) throws IOException { + stream.print("}"); + isFirst = false; + } + + public void startMap(TreeMap v, String tag) throws IOException { + printCommaUnlessFirst(); + stream.print("m{"); + isFirst = true; + } + + public void endMap(TreeMap v, String tag) throws IOException { + stream.print("}"); + isFirst = false; + } +} diff --git a/src/java/org/apache/hadoop/record/Index.java b/src/java/org/apache/hadoop/record/Index.java new file mode 100644 index 00000000000..82daecec03f --- /dev/null +++ b/src/java/org/apache/hadoop/record/Index.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +/** + * Interface that acts as an iterator for deserializing maps. + * The deserializer returns an instance that the record uses to + * read vectors and maps. An example of usage is as follows: + * + * + * Index idx = startVector(...); + * while (!idx.done()) { + * .... // read element of a vector + * idx.incr(); + * } + * + */ +public interface Index { + boolean done(); + void incr(); +} diff --git a/src/java/org/apache/hadoop/record/Record.java b/src/java/org/apache/hadoop/record/Record.java new file mode 100644 index 00000000000..794b2597599 --- /dev/null +++ b/src/java/org/apache/hadoop/record/Record.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.apache.hadoop.io.WritableComparable; + +/** + * Abstract class that is extended by generated classes. + * + */ +public abstract class Record implements WritableComparable, Cloneable { + + /** + * Serialize a record with tag (ususally field name) + * @param rout Record output destination + * @param tag record tag (Used only in tagged serialization e.g. XML) + */ + public abstract void serialize(RecordOutput rout, String tag) + throws IOException; + + /** + * Deserialize a record with a tag (usually field name) + * @param rin Record input source + * @param tag Record tag (Used only in tagged serialization e.g. XML) + */ + public abstract void deserialize(RecordInput rin, String tag) + throws IOException; + + // inheric javadoc + public abstract int compareTo (final Object peer) throws ClassCastException; + + /** + * Serialize a record without a tag + * @param rout Record output destination + */ + public void serialize(RecordOutput rout) throws IOException { + this.serialize(rout, ""); + } + + /** + * Deserialize a record without a tag + * @param rin Record input source + */ + public void deserialize(RecordInput rin) throws IOException { + this.deserialize(rin, ""); + } + + // inherit javadoc + public void write(final DataOutput out) throws java.io.IOException { + BinaryRecordOutput bout = BinaryRecordOutput.get(out); + this.serialize(bout); + } + + // inherit javadoc + public void readFields(final DataInput din) throws java.io.IOException { + BinaryRecordInput rin = BinaryRecordInput.get(din); + this.deserialize(rin); + } + + // inherit javadoc + public String toString() { + try { + ByteArrayOutputStream s = new ByteArrayOutputStream(); + CsvRecordOutput a = new CsvRecordOutput(s); + this.serialize(a); + return new String(s.toByteArray(), "UTF-8"); + } catch (Throwable ex) { + throw new RuntimeException(ex); + } + } +} diff --git a/src/java/org/apache/hadoop/record/RecordComparator.java b/src/java/org/apache/hadoop/record/RecordComparator.java new file mode 100644 index 00000000000..b2c2ea3e101 --- /dev/null +++ b/src/java/org/apache/hadoop/record/RecordComparator.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; + +/** + * A raw record comparator base class + */ +public abstract class RecordComparator extends WritableComparator { + + /** + * Construct a raw {@link Record} comparison implementation. */ + protected RecordComparator(Class recordClass) { + super(recordClass); + } + + // inheric JavaDoc + public abstract int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2); + + /** + * Register an optimized comparator for a {@link Record} implementation. + * + * @param c record classs for which a raw comparator is provided + * @param comparator Raw comparator instance for class c + */ + public static synchronized void define(Class c, RecordComparator comparator) { + WritableComparator.define(c, comparator); + } +} diff --git a/src/java/org/apache/hadoop/record/RecordInput.java b/src/java/org/apache/hadoop/record/RecordInput.java new file mode 100644 index 00000000000..f41f12eba11 --- /dev/null +++ b/src/java/org/apache/hadoop/record/RecordInput.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.IOException; + +/** + * Interface that all the Deserializers have to implement. + */ +public interface RecordInput { + /** + * Read a byte from serialized record. + * @param tag Used by tagged serialization formats (such as XML) + * @return value read from serialized record. + */ + byte readByte(String tag) throws IOException; + + /** + * Read a boolean from serialized record. + * @param tag Used by tagged serialization formats (such as XML) + * @return value read from serialized record. + */ + boolean readBool(String tag) throws IOException; + + /** + * Read an integer from serialized record. + * @param tag Used by tagged serialization formats (such as XML) + * @return value read from serialized record. + */ + int readInt(String tag) throws IOException; + + /** + * Read a long integer from serialized record. + * @param tag Used by tagged serialization formats (such as XML) + * @return value read from serialized record. + */ + long readLong(String tag) throws IOException; + + /** + * Read a single-precision float from serialized record. + * @param tag Used by tagged serialization formats (such as XML) + * @return value read from serialized record. + */ + float readFloat(String tag) throws IOException; + + /** + * Read a double-precision number from serialized record. + * @param tag Used by tagged serialization formats (such as XML) + * @return value read from serialized record. + */ + double readDouble(String tag) throws IOException; + + /** + * Read a UTF-8 encoded string from serialized record. + * @param tag Used by tagged serialization formats (such as XML) + * @return value read from serialized record. + */ + String readString(String tag) throws IOException; + + /** + * Read byte array from serialized record. + * @param tag Used by tagged serialization formats (such as XML) + * @return value read from serialized record. + */ + Buffer readBuffer(String tag) throws IOException; + + /** + * Check the mark for start of the serialized record. + * @param tag Used by tagged serialization formats (such as XML) + */ + void startRecord(String tag) throws IOException; + + /** + * Check the mark for end of the serialized record. + * @param tag Used by tagged serialization formats (such as XML) + */ + void endRecord(String tag) throws IOException; + + /** + * Check the mark for start of the serialized vector. + * @param tag Used by tagged serialization formats (such as XML) + * @return Index that is used to count the number of elements. + */ + Index startVector(String tag) throws IOException; + + /** + * Check the mark for end of the serialized vector. + * @param tag Used by tagged serialization formats (such as XML) + */ + void endVector(String tag) throws IOException; + + /** + * Check the mark for start of the serialized map. + * @param tag Used by tagged serialization formats (such as XML) + * @return Index that is used to count the number of map entries. + */ + Index startMap(String tag) throws IOException; + + /** + * Check the mark for end of the serialized map. + * @param tag Used by tagged serialization formats (such as XML) + */ + void endMap(String tag) throws IOException; +} diff --git a/src/java/org/apache/hadoop/record/RecordOutput.java b/src/java/org/apache/hadoop/record/RecordOutput.java new file mode 100644 index 00000000000..a8aba2f3d61 --- /dev/null +++ b/src/java/org/apache/hadoop/record/RecordOutput.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.IOException; +import java.util.TreeMap; +import java.util.ArrayList; + +/** + * Interface that alll the serializers have to implement. + */ +public interface RecordOutput { + /** + * Write a byte to serialized record. + * @param b Byte to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void writeByte(byte b, String tag) throws IOException; + + /** + * Write a boolean to serialized record. + * @param b Boolean to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void writeBool(boolean b, String tag) throws IOException; + + /** + * Write an integer to serialized record. + * @param i Integer to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void writeInt(int i, String tag) throws IOException; + + /** + * Write a long integer to serialized record. + * @param l Long to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void writeLong(long l, String tag) throws IOException; + + /** + * Write a single-precision float to serialized record. + * @param f Float to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void writeFloat(float f, String tag) throws IOException; + + /** + * Write a double precision floating point number to serialized record. + * @param d Double to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void writeDouble(double d, String tag) throws IOException; + + /** + * Write a unicode string to serialized record. + * @param s String to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void writeString(String s, String tag) throws IOException; + + /** + * Write a buffer to serialized record. + * @param buf Buffer to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void writeBuffer(Buffer buf, String tag) + throws IOException; + + /** + * Mark the start of a record to be serialized. + * @param r Record to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void startRecord(Record r, String tag) throws IOException; + + /** + * Mark the end of a serialized record. + * @param r Record to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void endRecord(Record r, String tag) throws IOException; + + /** + * Mark the start of a vector to be serialized. + * @param v Vector to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void startVector(ArrayList v, String tag) throws IOException; + + /** + * Mark the end of a serialized vector. + * @param v Vector to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void endVector(ArrayList v, String tag) throws IOException; + + /** + * Mark the start of a map to be serialized. + * @param m Map to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void startMap(TreeMap m, String tag) throws IOException; + + /** + * Mark the end of a serialized map. + * @param m Map to be serialized + * @param tag Used by tagged serialization formats (such as XML) + * @throws IOException Indicates error in serialization + */ + public void endMap(TreeMap m, String tag) throws IOException; +} diff --git a/src/java/org/apache/hadoop/record/Utils.java b/src/java/org/apache/hadoop/record/Utils.java new file mode 100644 index 00000000000..1e8d8277a98 --- /dev/null +++ b/src/java/org/apache/hadoop/record/Utils.java @@ -0,0 +1,490 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.io.WritableUtils; + +/** + * Various utility functions for Hadooop record I/O runtime. + */ +public class Utils { + + /** Cannot create a new instance of Utils */ + private Utils() { + } + + public static final char[] hexchars = { '0', '1', '2', '3', '4', '5', + '6', '7', '8', '9', 'A', 'B', + 'C', 'D', 'E', 'F' }; + /** + * + * @param s + * @return + */ + static String toXMLString(String s) { + StringBuffer sb = new StringBuffer(); + for (int idx = 0; idx < s.length(); idx++) { + char ch = s.charAt(idx); + if (ch == '<') { + sb.append("<"); + } else if (ch == '&') { + sb.append("&"); + } else if (ch == '%') { + sb.append("%0025"); + } else if (ch < 0x20 || + (ch > 0xD7FF && ch < 0xE000) || + (ch > 0xFFFD)) { + sb.append("%"); + sb.append(hexchars[(ch & 0xF000) >> 12]); + sb.append(hexchars[(ch & 0x0F00) >> 8]); + sb.append(hexchars[(ch & 0x00F0) >> 4]); + sb.append(hexchars[(ch & 0x000F)]); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + + static private int h2c(char ch) { + if (ch >= '0' && ch <= '9') { + return ch - '0'; + } else if (ch >= 'A' && ch <= 'F') { + return ch - 'A' + 10; + } else if (ch >= 'a' && ch <= 'f') { + return ch - 'a' + 10; + } + return 0; + } + + /** + * + * @param s + * @return + */ + static String fromXMLString(String s) { + StringBuffer sb = new StringBuffer(); + for (int idx = 0; idx < s.length();) { + char ch = s.charAt(idx++); + if (ch == '%') { + int ch1 = h2c(s.charAt(idx++)) << 12; + int ch2 = h2c(s.charAt(idx++)) << 8; + int ch3 = h2c(s.charAt(idx++)) << 4; + int ch4 = h2c(s.charAt(idx++)); + char res = (char)(ch1 | ch2 | ch3 | ch4); + sb.append(res); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + + /** + * + * @param s + * @return + */ + static String toCSVString(String s) { + StringBuffer sb = new StringBuffer(s.length()+1); + sb.append('\''); + int len = s.length(); + for (int i = 0; i < len; i++) { + char c = s.charAt(i); + switch(c) { + case '\0': + sb.append("%00"); + break; + case '\n': + sb.append("%0A"); + break; + case '\r': + sb.append("%0D"); + break; + case ',': + sb.append("%2C"); + break; + case '}': + sb.append("%7D"); + break; + case '%': + sb.append("%25"); + break; + default: + sb.append(c); + } + } + return sb.toString(); + } + + /** + * + * @param s + * @throws java.io.IOException + * @return + */ + static String fromCSVString(String s) throws IOException { + if (s.charAt(0) != '\'') { + throw new IOException("Error deserializing string."); + } + int len = s.length(); + StringBuffer sb = new StringBuffer(len-1); + for (int i = 1; i < len; i++) { + char c = s.charAt(i); + if (c == '%') { + char ch1 = s.charAt(i+1); + char ch2 = s.charAt(i+2); + i += 2; + if (ch1 == '0' && ch2 == '0') { + sb.append('\0'); + } else if (ch1 == '0' && ch2 == 'A') { + sb.append('\n'); + } else if (ch1 == '0' && ch2 == 'D') { + sb.append('\r'); + } else if (ch1 == '2' && ch2 == 'C') { + sb.append(','); + } else if (ch1 == '7' && ch2 == 'D') { + sb.append('}'); + } else if (ch1 == '2' && ch2 == '5') { + sb.append('%'); + } else { + throw new IOException("Error deserializing string."); + } + } else { + sb.append(c); + } + } + return sb.toString(); + } + + /** + * + * @param s + * @return + */ + static String toXMLBuffer(Buffer s) { + return s.toString(); + } + + /** + * + * @param s + * @throws java.io.IOException + * @return + */ + static Buffer fromXMLBuffer(String s) + throws IOException { + if (s.length() == 0) { return new Buffer(); } + int blen = s.length()/2; + byte[] barr = new byte[blen]; + for (int idx = 0; idx < blen; idx++) { + char c1 = s.charAt(2*idx); + char c2 = s.charAt(2*idx+1); + barr[idx] = (byte)Integer.parseInt(""+c1+c2, 16); + } + return new Buffer(barr); + } + + /** + * + * @param buf + * @return + */ + static String toCSVBuffer(Buffer buf) { + StringBuffer sb = new StringBuffer("#"); + sb.append(buf.toString()); + return sb.toString(); + } + + /** + * Converts a CSV-serialized representation of buffer to a new + * Buffer + * @param s CSV-serialized representation of buffer + * @throws java.io.IOException + * @return Deserialized Buffer + */ + static Buffer fromCSVBuffer(String s) + throws IOException { + if (s.charAt(0) != '#') { + throw new IOException("Error deserializing buffer."); + } + if (s.length() == 1) { return new Buffer(); } + int blen = (s.length()-1)/2; + byte[] barr = new byte[blen]; + for (int idx = 0; idx < blen; idx++) { + char c1 = s.charAt(2*idx+1); + char c2 = s.charAt(2*idx+2); + barr[idx] = (byte)Integer.parseInt(""+c1+c2, 16); + } + return new Buffer(barr); + } + + private static int utf8LenForCodePoint(final int cpt) throws IOException { + if (cpt >=0 && cpt <= 0x7F) { + return 1; + } + if (cpt >= 0x80 && cpt <= 0x07FF) { + return 2; + } + if ((cpt >= 0x0800 && cpt < 0xD800) || + (cpt > 0xDFFF && cpt <= 0xFFFD)) { + return 3; + } + if (cpt >= 0x10000 && cpt <= 0x10FFFF) { + return 4; + } + throw new IOException("Illegal Unicode Codepoint "+ + Integer.toHexString(cpt)+" in string."); + } + + private static final int B10 = Integer.parseInt("10000000", 2); + private static final int B110 = Integer.parseInt("11000000", 2); + private static final int B1110 = Integer.parseInt("11100000", 2); + private static final int B11110 = Integer.parseInt("11110000", 2); + private static final int B11 = Integer.parseInt("11000000", 2); + private static final int B111 = Integer.parseInt("11100000", 2); + private static final int B1111 = Integer.parseInt("11110000", 2); + private static final int B11111 = Integer.parseInt("11111000", 2); + + private static int writeUtf8(int cpt, final byte[] bytes, final int offset) + throws IOException { + if (cpt >=0 && cpt <= 0x7F) { + bytes[offset] = (byte) cpt; + return 1; + } + if (cpt >= 0x80 && cpt <= 0x07FF) { + bytes[offset+1] = (byte) (B10 | (cpt & 0x3F)); + cpt = cpt >> 6; + bytes[offset] = (byte) (B110 | (cpt & 0x1F)); + return 2; + } + if ((cpt >= 0x0800 && cpt < 0xD800) || + (cpt > 0xDFFF && cpt <= 0xFFFD)) { + bytes[offset+2] = (byte) (B10 | (cpt & 0x3F)); + cpt = cpt >> 6; + bytes[offset+1] = (byte) (B10 | (cpt & 0x3F)); + cpt = cpt >> 6; + bytes[offset] = (byte) (B1110 | (cpt & 0x0F)); + return 3; + } + if (cpt >= 0x10000 && cpt <= 0x10FFFF) { + bytes[offset+3] = (byte) (B10 | (cpt & 0x3F)); + cpt = cpt >> 6; + bytes[offset+2] = (byte) (B10 | (cpt & 0x3F)); + cpt = cpt >> 6; + bytes[offset+1] = (byte) (B10 | (cpt & 0x3F)); + cpt = cpt >> 6; + bytes[offset] = (byte) (B11110 | (cpt & 0x07)); + return 4; + } + throw new IOException("Illegal Unicode Codepoint "+ + Integer.toHexString(cpt)+" in string."); + } + + static void toBinaryString(final DataOutput out, final String str) + throws IOException { + final int strlen = str.length(); + byte[] bytes = new byte[strlen*4]; // Codepoints expand to 4 bytes max + int utf8Len = 0; + int idx = 0; + while(idx < strlen) { + final int cpt = str.codePointAt(idx); + idx += Character.isSupplementaryCodePoint(cpt) ? 2 : 1; + utf8Len += writeUtf8(cpt, bytes, utf8Len); + } + writeVInt(out, utf8Len); + out.write(bytes, 0, utf8Len); + } + + static boolean isValidCodePoint(int cpt) { + return !((cpt > 0x10FFFF) || + (cpt >= 0xD800 && cpt <= 0xDFFF) || + (cpt >= 0xFFFE && cpt <=0xFFFF)); + } + + private static int utf8ToCodePoint(int b1, int b2, int b3, int b4) { + int cpt = 0; + cpt = (((b1 & ~B11111) << 18) | + ((b2 & ~B11) << 12) | + ((b3 & ~B11) << 6) | + (b4 & ~B11)); + return cpt; + } + + private static int utf8ToCodePoint(int b1, int b2, int b3) { + int cpt = 0; + cpt = (((b1 & ~B1111) << 12) | ((b2 & ~B11) << 6) | (b3 & ~B11)); + return cpt; + } + + private static int utf8ToCodePoint(int b1, int b2) { + int cpt = 0; + cpt = (((b1 & ~B111) << 6) | (b2 & ~B11)); + return cpt; + } + + private static void checkB10(int b) throws IOException { + if ((b & B11) != B10) { + throw new IOException("Invalid UTF-8 representation."); + } + } + + static String fromBinaryString(final DataInput din) throws IOException { + final int utf8Len = readVInt(din); + final byte[] bytes = new byte[utf8Len]; + din.readFully(bytes); + int len = 0; + // For the most commmon case, i.e. ascii, numChars = utf8Len + StringBuilder sb = new StringBuilder(utf8Len); + while(len < utf8Len) { + int cpt = 0; + final int b1 = bytes[len++] & 0xFF; + if (b1 <= 0x7F) { + cpt = b1; + } else if ((b1 & B11111) == B11110) { + int b2 = bytes[len++] & 0xFF; + checkB10(b2); + int b3 = bytes[len++] & 0xFF; + checkB10(b3); + int b4 = bytes[len++] & 0xFF; + checkB10(b4); + cpt = utf8ToCodePoint(b1, b2, b3, b4); + } else if ((b1 & B1111) == B1110) { + int b2 = bytes[len++] & 0xFF; + checkB10(b2); + int b3 = bytes[len++] & 0xFF; + checkB10(b3); + cpt = utf8ToCodePoint(b1, b2, b3); + } else if ((b1 & B111) == B110) { + int b2 = bytes[len++] & 0xFF; + checkB10(b2); + cpt = utf8ToCodePoint(b1, b2); + } else { + throw new IOException("Invalid UTF-8 byte "+Integer.toHexString(b1)+ + " at offset "+(len-1)+" in length of "+utf8Len); + } + if (!isValidCodePoint(cpt)) { + throw new IOException("Illegal Unicode Codepoint "+ + Integer.toHexString(cpt)+" in stream."); + } + sb.appendCodePoint(cpt); + } + return sb.toString(); + } + + /** Parse a float from a byte array. */ + public static float readFloat(byte[] bytes, int start) { + return WritableComparator.readFloat(bytes, start); + } + + /** Parse a double from a byte array. */ + public static double readDouble(byte[] bytes, int start) { + return WritableComparator.readDouble(bytes, start); + } + + /** + * Reads a zero-compressed encoded long from a byte array and returns it. + * @param bytes byte array with decode long + * @param start starting index + * @throws java.io.IOException + * @return deserialized long + */ + public static long readVLong(byte[] bytes, int start) throws IOException { + return WritableComparator.readVLong(bytes, start); + } + + /** + * Reads a zero-compressed encoded integer from a byte array and returns it. + * @param bytes byte array with the encoded integer + * @param start start index + * @throws java.io.IOException + * @return deserialized integer + */ + public static int readVInt(byte[] bytes, int start) throws IOException { + return WritableComparator.readVInt(bytes, start); + } + + /** + * Reads a zero-compressed encoded long from a stream and return it. + * @param in input stream + * @throws java.io.IOException + * @return deserialized long + */ + public static long readVLong(DataInput in) throws IOException { + return WritableUtils.readVLong(in); + } + + /** + * Reads a zero-compressed encoded integer from a stream and returns it. + * @param in input stream + * @throws java.io.IOException + * @return deserialized integer + */ + public static int readVInt(DataInput in) throws IOException { + return WritableUtils.readVInt(in); + } + + /** + * Get the encoded length if an integer is stored in a variable-length format + * @return the encoded length + */ + public static int getVIntSize(long i) { + return WritableUtils.getVIntSize(i); + } + + /** + * Serializes a long to a binary stream with zero-compressed encoding. + * For -112 <= i <= 127, only one byte is used with the actual value. + * For other values of i, the first byte value indicates whether the + * long is positive or negative, and the number of bytes that follow. + * If the first byte value v is between -113 and -120, the following long + * is positive, with number of bytes that follow are -(v+112). + * If the first byte value v is between -121 and -128, the following long + * is negative, with number of bytes that follow are -(v+120). Bytes are + * stored in the high-non-zero-byte-first order. + * + * @param stream Binary output stream + * @param i Long to be serialized + * @throws java.io.IOException + */ + public static void writeVLong(DataOutput stream, long i) throws IOException { + WritableUtils.writeVLong(stream, i); + } + + /** + * Serializes an int to a binary stream with zero-compressed encoding. + * + * @param stream Binary output stream + * @param i int to be serialized + * @throws java.io.IOException + */ + public static void writeVInt(DataOutput stream, int i) throws IOException { + WritableUtils.writeVInt(stream, i); + } + + /** Lexicographic order of binary data. */ + public static int compareBytes(byte[] b1, int s1, int l1, + byte[] b2, int s2, int l2) { + return WritableComparator.compareBytes(b1, s1, l1, b2, s2, l2); + } +} diff --git a/src/java/org/apache/hadoop/record/XmlRecordInput.java b/src/java/org/apache/hadoop/record/XmlRecordInput.java new file mode 100644 index 00000000000..5272c68727c --- /dev/null +++ b/src/java/org/apache/hadoop/record/XmlRecordInput.java @@ -0,0 +1,243 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.InputStream; +import java.io.IOException; +import java.util.ArrayList; + +import org.xml.sax.*; +import org.xml.sax.helpers.DefaultHandler; +import javax.xml.parsers.SAXParserFactory; +import javax.xml.parsers.SAXParser; + +/** + * XML Deserializer. + */ +public class XmlRecordInput implements RecordInput { + + static private class Value { + private String type; + private StringBuffer sb; + + public Value(String t) { + type = t; + sb = new StringBuffer(); + } + public void addChars(char[] buf, int offset, int len) { + sb.append(buf, offset, len); + } + public String getValue() { return sb.toString(); } + public String getType() { return type; } + } + + private static class XMLParser extends DefaultHandler { + private boolean charsValid = false; + + private ArrayList valList; + + private XMLParser(ArrayList vlist) { + valList = vlist; + } + + public void startDocument() throws SAXException {} + + public void endDocument() throws SAXException {} + + public void startElement(String ns, + String sname, + String qname, + Attributes attrs) throws SAXException { + charsValid = false; + if ("boolean".equals(qname) || + "i4".equals(qname) || + "int".equals(qname) || + "string".equals(qname) || + "double".equals(qname) || + "ex:i1".equals(qname) || + "ex:i8".equals(qname) || + "ex:float".equals(qname)) { + charsValid = true; + valList.add(new Value(qname)); + } else if ("struct".equals(qname) || + "array".equals(qname)) { + valList.add(new Value(qname)); + } + } + + public void endElement(String ns, + String sname, + String qname) throws SAXException { + charsValid = false; + if ("struct".equals(qname) || + "array".equals(qname)) { + valList.add(new Value("/"+qname)); + } + } + + public void characters(char buf[], int offset, int len) + throws SAXException { + if (charsValid) { + Value v = valList.get(valList.size()-1); + v.addChars(buf, offset, len); + } + } + + } + + private class XmlIndex implements Index { + public boolean done() { + Value v = valList.get(vIdx); + if ("/array".equals(v.getType())) { + valList.set(vIdx, null); + vIdx++; + return true; + } else { + return false; + } + } + public void incr() {} + } + + private ArrayList valList; + private int vLen; + private int vIdx; + + private Value next() throws IOException { + if (vIdx < vLen) { + Value v = valList.get(vIdx); + valList.set(vIdx, null); + vIdx++; + return v; + } else { + throw new IOException("Error in deserialization."); + } + } + + /** Creates a new instance of XmlRecordInput */ + public XmlRecordInput(InputStream in) { + try{ + valList = new ArrayList(); + DefaultHandler handler = new XMLParser(valList); + SAXParserFactory factory = SAXParserFactory.newInstance(); + SAXParser parser = factory.newSAXParser(); + parser.parse(in, handler); + vLen = valList.size(); + vIdx = 0; + } catch (Exception ex) { + throw new RuntimeException(ex); + } + } + + public byte readByte(String tag) throws IOException { + Value v = next(); + if (!"ex:i1".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + return Byte.parseByte(v.getValue()); + } + + public boolean readBool(String tag) throws IOException { + Value v = next(); + if (!"boolean".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + return "1".equals(v.getValue()); + } + + public int readInt(String tag) throws IOException { + Value v = next(); + if (!"i4".equals(v.getType()) && + !"int".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + return Integer.parseInt(v.getValue()); + } + + public long readLong(String tag) throws IOException { + Value v = next(); + if (!"ex:i8".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + return Long.parseLong(v.getValue()); + } + + public float readFloat(String tag) throws IOException { + Value v = next(); + if (!"ex:float".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + return Float.parseFloat(v.getValue()); + } + + public double readDouble(String tag) throws IOException { + Value v = next(); + if (!"double".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + return Double.parseDouble(v.getValue()); + } + + public String readString(String tag) throws IOException { + Value v = next(); + if (!"string".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + return Utils.fromXMLString(v.getValue()); + } + + public Buffer readBuffer(String tag) throws IOException { + Value v = next(); + if (!"string".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + return Utils.fromXMLBuffer(v.getValue()); + } + + public void startRecord(String tag) throws IOException { + Value v = next(); + if (!"struct".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + } + + public void endRecord(String tag) throws IOException { + Value v = next(); + if (!"/struct".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + } + + public Index startVector(String tag) throws IOException { + Value v = next(); + if (!"array".equals(v.getType())) { + throw new IOException("Error deserializing "+tag+"."); + } + return new XmlIndex(); + } + + public void endVector(String tag) throws IOException {} + + public Index startMap(String tag) throws IOException { + return startVector(tag); + } + + public void endMap(String tag) throws IOException { endVector(tag); } + +} diff --git a/src/java/org/apache/hadoop/record/XmlRecordOutput.java b/src/java/org/apache/hadoop/record/XmlRecordOutput.java new file mode 100644 index 00000000000..643ee1f225c --- /dev/null +++ b/src/java/org/apache/hadoop/record/XmlRecordOutput.java @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record; + +import java.io.IOException; +import java.util.TreeMap; +import java.util.ArrayList; +import java.io.PrintStream; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.util.Stack; + +/** + * XML Serializer. + */ +public class XmlRecordOutput implements RecordOutput { + + private PrintStream stream; + + private int indent = 0; + + private Stack compoundStack; + + private void putIndent() { + StringBuffer sb = new StringBuffer(""); + for (int idx = 0; idx < indent; idx++) { + sb.append(" "); + } + stream.print(sb.toString()); + } + + private void addIndent() { + indent++; + } + + private void closeIndent() { + indent--; + } + + private void printBeginEnvelope(String tag) { + if (!compoundStack.empty()) { + String s = compoundStack.peek(); + if ("struct".equals(s)) { + putIndent(); + stream.print("\n"); + addIndent(); + putIndent(); + stream.print(""+tag+"\n"); + putIndent(); + stream.print(""); + } else if ("vector".equals(s)) { + stream.print(""); + } else if ("map".equals(s)) { + stream.print(""); + } + } else { + stream.print(""); + } + } + + private void printEndEnvelope(String tag) { + if (!compoundStack.empty()) { + String s = compoundStack.peek(); + if ("struct".equals(s)) { + stream.print("\n"); + closeIndent(); + putIndent(); + stream.print("\n"); + } else if ("vector".equals(s)) { + stream.print("\n"); + } else if ("map".equals(s)) { + stream.print("\n"); + } + } else { + stream.print("\n"); + } + } + + private void insideVector(String tag) { + printBeginEnvelope(tag); + compoundStack.push("vector"); + } + + private void outsideVector(String tag) throws IOException { + String s = compoundStack.pop(); + if (!"vector".equals(s)) { + throw new IOException("Error serializing vector."); + } + printEndEnvelope(tag); + } + + private void insideMap(String tag) { + printBeginEnvelope(tag); + compoundStack.push("map"); + } + + private void outsideMap(String tag) throws IOException { + String s = compoundStack.pop(); + if (!"map".equals(s)) { + throw new IOException("Error serializing map."); + } + printEndEnvelope(tag); + } + + private void insideRecord(String tag) { + printBeginEnvelope(tag); + compoundStack.push("struct"); + } + + private void outsideRecord(String tag) throws IOException { + String s = compoundStack.pop(); + if (!"struct".equals(s)) { + throw new IOException("Error serializing record."); + } + printEndEnvelope(tag); + } + + /** Creates a new instance of XmlRecordOutput */ + public XmlRecordOutput(OutputStream out) { + try { + stream = new PrintStream(out, true, "UTF-8"); + compoundStack = new Stack(); + } catch (UnsupportedEncodingException ex) { + throw new RuntimeException(ex); + } + } + + public void writeByte(byte b, String tag) throws IOException { + printBeginEnvelope(tag); + stream.print(""); + stream.print(Byte.toString(b)); + stream.print(""); + printEndEnvelope(tag); + } + + public void writeBool(boolean b, String tag) throws IOException { + printBeginEnvelope(tag); + stream.print(""); + stream.print(b ? "1" : "0"); + stream.print(""); + printEndEnvelope(tag); + } + + public void writeInt(int i, String tag) throws IOException { + printBeginEnvelope(tag); + stream.print(""); + stream.print(Integer.toString(i)); + stream.print(""); + printEndEnvelope(tag); + } + + public void writeLong(long l, String tag) throws IOException { + printBeginEnvelope(tag); + stream.print(""); + stream.print(Long.toString(l)); + stream.print(""); + printEndEnvelope(tag); + } + + public void writeFloat(float f, String tag) throws IOException { + printBeginEnvelope(tag); + stream.print(""); + stream.print(Float.toString(f)); + stream.print(""); + printEndEnvelope(tag); + } + + public void writeDouble(double d, String tag) throws IOException { + printBeginEnvelope(tag); + stream.print(""); + stream.print(Double.toString(d)); + stream.print(""); + printEndEnvelope(tag); + } + + public void writeString(String s, String tag) throws IOException { + printBeginEnvelope(tag); + stream.print(""); + stream.print(Utils.toXMLString(s)); + stream.print(""); + printEndEnvelope(tag); + } + + public void writeBuffer(Buffer buf, String tag) + throws IOException { + printBeginEnvelope(tag); + stream.print(""); + stream.print(Utils.toXMLBuffer(buf)); + stream.print(""); + printEndEnvelope(tag); + } + + public void startRecord(Record r, String tag) throws IOException { + insideRecord(tag); + stream.print("\n"); + addIndent(); + } + + public void endRecord(Record r, String tag) throws IOException { + closeIndent(); + putIndent(); + stream.print(""); + outsideRecord(tag); + } + + public void startVector(ArrayList v, String tag) throws IOException { + insideVector(tag); + stream.print("\n"); + addIndent(); + } + + public void endVector(ArrayList v, String tag) throws IOException { + closeIndent(); + putIndent(); + stream.print(""); + outsideVector(tag); + } + + public void startMap(TreeMap v, String tag) throws IOException { + insideMap(tag); + stream.print("\n"); + addIndent(); + } + + public void endMap(TreeMap v, String tag) throws IOException { + closeIndent(); + putIndent(); + stream.print(""); + outsideMap(tag); + } + +} diff --git a/src/java/org/apache/hadoop/record/compiler/CGenerator.java b/src/java/org/apache/hadoop/record/compiler/CGenerator.java new file mode 100644 index 00000000000..b62b62924bf --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/CGenerator.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +import java.util.ArrayList; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Iterator; + +/** + * C Code generator front-end for Hadoop record I/O. + */ +class CGenerator extends CodeGenerator { + + CGenerator() { + } + + /** + * Generate C code. This method only creates the requested file(s) + * and spits-out file-level elements (such as include statements etc.) + * record-level code is generated by JRecord. + */ + void genCode(String name, ArrayList ilist, + ArrayList rlist, String destDir, ArrayList options) + throws IOException { + name = new File(destDir, (new File(name)).getName()).getAbsolutePath(); + FileWriter cc = new FileWriter(name+".c"); + try { + FileWriter hh = new FileWriter(name+".h"); + try { + hh.write("#ifndef __"+name.toUpperCase().replace('.','_')+"__\n"); + hh.write("#define __"+name.toUpperCase().replace('.','_')+"__\n"); + hh.write("#include \"recordio.h\"\n"); + for (Iterator iter = ilist.iterator(); iter.hasNext();) { + hh.write("#include \""+iter.next().getName()+".h\"\n"); + } + + cc.write("#include \""+name+".h\"\n"); + + /* + for (Iterator iter = rlist.iterator(); iter.hasNext();) { + iter.next().genCppCode(hh, cc); + } + */ + + hh.write("#endif //"+name.toUpperCase().replace('.','_')+"__\n"); + } finally { + hh.close(); + } + } finally { + cc.close(); + } + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/CodeBuffer.java b/src/java/org/apache/hadoop/record/compiler/CodeBuffer.java new file mode 100644 index 00000000000..5ba8c9fa62c --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/CodeBuffer.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.record.compiler; + +import java.util.ArrayList; + +/** + * A wrapper around StringBuffer that automatically does indentation + */ +public class CodeBuffer { + + static private ArrayList startMarkers = new ArrayList(); + static private ArrayList endMarkers = new ArrayList(); + + static { + addMarkers('{', '}'); + addMarkers('(', ')'); + } + + static void addMarkers(char ch1, char ch2) { + startMarkers.add(ch1); + endMarkers.add(ch2); + } + + private int level = 0; + private int numSpaces = 2; + private boolean firstChar = true; + private StringBuffer sb; + + /** Creates a new instance of CodeBuffer */ + CodeBuffer() { + this(2, ""); + } + + CodeBuffer(String s) { + this(2, s); + } + + CodeBuffer(int numSpaces, String s) { + sb = new StringBuffer(); + this.numSpaces = numSpaces; + this.append(s); + } + + void append(String s) { + int length = s.length(); + for (int idx = 0; idx < length; idx++) { + char ch = s.charAt(idx); + append(ch); + } + } + + void append(char ch) { + if (endMarkers.contains(ch)) { + level--; + } + if (firstChar) { + for (int idx = 0; idx < level; idx++) { + for (int num = 0; num < numSpaces; num++) { + rawAppend(' '); + } + } + } + rawAppend(ch); + firstChar = false; + if (startMarkers.contains(ch)) { + level++; + } + if (ch == '\n') { + firstChar = true; + } + } + + private void rawAppend(char ch) { + sb.append(ch); + } + + public String toString() { + return sb.toString(); + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/CodeGenerator.java b/src/java/org/apache/hadoop/record/compiler/CodeGenerator.java new file mode 100644 index 00000000000..6224eaf3927 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/CodeGenerator.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; + +/** + * CodeGenerator is a Factory and a base class for Hadoop Record I/O translators. + * Different translators register creation methods with this factory. + */ +abstract class CodeGenerator { + + private static HashMap generators = + new HashMap(); + + static { + register("c", new CGenerator()); + register("c++", new CppGenerator()); + register("java", new JavaGenerator()); + } + + static void register(String lang, CodeGenerator gen) { + generators.put(lang, gen); + } + + static CodeGenerator get(String lang) { + return generators.get(lang); + } + + abstract void genCode(String file, + ArrayList inclFiles, + ArrayList records, + String destDir, + ArrayList options) throws IOException; +} diff --git a/src/java/org/apache/hadoop/record/compiler/Consts.java b/src/java/org/apache/hadoop/record/compiler/Consts.java new file mode 100644 index 00000000000..6bfd5360d5c --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/Consts.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +import java.io.IOException; +import java.util.Iterator; +import org.apache.hadoop.record.RecordInput; + +/** + * const definitions for Record I/O compiler + */ +public class Consts { + + /** Cannot create a new instance */ + private Consts() { + } + + // prefix to use for variables in generated classes + public static final String RIO_PREFIX = "_rio_"; + // other vars used in generated classes + public static final String RTI_VAR = RIO_PREFIX + "recTypeInfo"; + public static final String RTI_FILTER = RIO_PREFIX + "rtiFilter"; + public static final String RTI_FILTER_FIELDS = RIO_PREFIX + "rtiFilterFields"; + public static final String RECORD_OUTPUT = RIO_PREFIX + "a"; + public static final String RECORD_INPUT = RIO_PREFIX + "a"; + public static final String TAG = RIO_PREFIX + "tag"; + +} diff --git a/src/java/org/apache/hadoop/record/compiler/CppGenerator.java b/src/java/org/apache/hadoop/record/compiler/CppGenerator.java new file mode 100644 index 00000000000..e1fb599c049 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/CppGenerator.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +import java.util.ArrayList; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Iterator; + +/** + * C++ Code generator front-end for Hadoop record I/O. + */ +class CppGenerator extends CodeGenerator { + + CppGenerator() { + } + + /** + * Generate C++ code. This method only creates the requested file(s) + * and spits-out file-level elements (such as include statements etc.) + * record-level code is generated by JRecord. + */ + void genCode(String name, ArrayList ilist, + ArrayList rlist, String destDir, ArrayList options) + throws IOException { + name = new File(destDir, (new File(name)).getName()).getAbsolutePath(); + + FileWriter cc = new FileWriter(name+".cc"); + try { + FileWriter hh = new FileWriter(name+".hh"); + + try { + String fileName = (new File(name)).getName(); + hh.write("#ifndef __"+fileName.toUpperCase().replace('.','_')+"__\n"); + hh.write("#define __"+fileName.toUpperCase().replace('.','_')+"__\n"); + hh.write("#include \"recordio.hh\"\n"); + hh.write("#include \"recordTypeInfo.hh\"\n"); + for (Iterator iter = ilist.iterator(); iter.hasNext();) { + hh.write("#include \""+iter.next().getName()+".hh\"\n"); + } + + cc.write("#include \""+fileName+".hh\"\n"); + cc.write("#include \"utils.hh\"\n"); + + for (Iterator iter = rlist.iterator(); iter.hasNext();) { + iter.next().genCppCode(hh, cc, options); + } + + hh.write("#endif //"+fileName.toUpperCase().replace('.','_')+"__\n"); + } finally { + hh.close(); + } + } finally { + cc.close(); + } + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JBoolean.java b/src/java/org/apache/hadoop/record/compiler/JBoolean.java new file mode 100644 index 00000000000..28ddff09e33 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JBoolean.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + + +/** + */ +public class JBoolean extends JType { + + class JavaBoolean extends JType.JavaType { + + JavaBoolean() { + super("boolean", "Bool", "Boolean", "TypeID.RIOType.BOOL"); + } + + void genCompareTo(CodeBuffer cb, String fname, String other) { + cb.append(Consts.RIO_PREFIX + "ret = ("+fname+" == "+other+")? 0 : ("+ + fname+"?1:-1);\n"); + } + + String getTypeIDObjectString() { + return "org.apache.hadoop.record.meta.TypeID.BoolTypeID"; + } + + void genHashCode(CodeBuffer cb, String fname) { + cb.append(Consts.RIO_PREFIX + "ret = ("+fname+")?0:1;\n"); + } + + // In Binary format, boolean is written as byte. true = 1, false = 0 + void genSlurpBytes(CodeBuffer cb, String b, String s, String l) { + cb.append("{\n"); + cb.append("if ("+l+"<1) {\n"); + cb.append("throw new java.io.IOException(\"Boolean is exactly 1 byte."+ + " Provided buffer is smaller.\");\n"); + cb.append("}\n"); + cb.append(s+"++; "+l+"--;\n"); + cb.append("}\n"); + } + + // In Binary format, boolean is written as byte. true = 1, false = 0 + void genCompareBytes(CodeBuffer cb) { + cb.append("{\n"); + cb.append("if (l1<1 || l2<1) {\n"); + cb.append("throw new java.io.IOException(\"Boolean is exactly 1 byte."+ + " Provided buffer is smaller.\");\n"); + cb.append("}\n"); + cb.append("if (b1[s1] != b2[s2]) {\n"); + cb.append("return (b1[s1]>>32));\n"); + } + + void genSlurpBytes(CodeBuffer cb, String b, String s, String l) { + cb.append("{\n"); + cb.append("if ("+l+"<8) {\n"); + cb.append("throw new java.io.IOException(\"Double is exactly 8 bytes."+ + " Provided buffer is smaller.\");\n"); + cb.append("}\n"); + cb.append(s+"+=8; "+l+"-=8;\n"); + cb.append("}\n"); + } + + void genCompareBytes(CodeBuffer cb) { + cb.append("{\n"); + cb.append("if (l1<8 || l2<8) {\n"); + cb.append("throw new java.io.IOException(\"Double is exactly 8 bytes."+ + " Provided buffer is smaller.\");\n"); + cb.append("}\n"); + cb.append("double d1 = org.apache.hadoop.record.Utils.readDouble(b1, s1);\n"); + cb.append("double d2 = org.apache.hadoop.record.Utils.readDouble(b2, s2);\n"); + cb.append("if (d1 != d2) {\n"); + cb.append("return ((d1-d2) < 0) ? -1 : 0;\n"); + cb.append("}\n"); + cb.append("s1+=8; s2+=8; l1-=8; l2-=8;\n"); + cb.append("}\n"); + } + } + + class CppDouble extends CppType { + + CppDouble() { + super("double"); + } + + String getTypeIDObjectString() { + return "new ::hadoop::TypeID(::hadoop::RIOTYPE_DOUBLE)"; + } + } + + + /** Creates a new instance of JDouble */ + public JDouble() { + setJavaType(new JavaDouble()); + setCppType(new CppDouble()); + setCType(new CType()); + } + + String getSignature() { + return "d"; + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JField.java b/src/java/org/apache/hadoop/record/compiler/JField.java new file mode 100644 index 00000000000..f6ff6f0832b --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JField.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +/** + * A thin wrappper around record field. + */ +public class JField { + + private String name; + private T type; + + /** + * Creates a new instance of JField + */ + public JField(String name, T type) { + this.type = type; + this.name = name; + } + + String getName() { + return name; + } + + T getType() { + return type; + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JFile.java b/src/java/org/apache/hadoop/record/compiler/JFile.java new file mode 100644 index 00000000000..5bff60e1b9a --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JFile.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +import java.io.IOException; +import java.util.ArrayList; + +/** + * Container for the Hadoop Record DDL. + * The main components of the file are filename, list of included files, + * and records defined in that file. + */ +public class JFile { + /** Possibly full name of the file */ + private String mName; + /** Ordered list of included files */ + private ArrayList mInclFiles; + /** Ordered list of records declared in this file */ + private ArrayList mRecords; + + /** Creates a new instance of JFile + * + * @param name possibly full pathname to the file + * @param inclFiles included files (as JFile) + * @param recList List of records defined within this file + */ + public JFile(String name, ArrayList inclFiles, + ArrayList recList) { + mName = name; + mInclFiles = inclFiles; + mRecords = recList; + } + + /** Strip the other pathname components and return the basename */ + String getName() { + int idx = mName.lastIndexOf('/'); + return (idx > 0) ? mName.substring(idx) : mName; + } + + /** Generate record code in given language. Language should be all + * lowercase. + */ + public int genCode(String language, String destDir, ArrayList options) + throws IOException { + CodeGenerator gen = CodeGenerator.get(language); + if (gen != null) { + gen.genCode(mName, mInclFiles, mRecords, destDir, options); + } else { + System.err.println("Cannnot recognize language:"+language); + return 1; + } + return 0; + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JFloat.java b/src/java/org/apache/hadoop/record/compiler/JFloat.java new file mode 100644 index 00000000000..08d772dd41d --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JFloat.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +/** + */ +public class JFloat extends JType { + + class JavaFloat extends JavaType { + + JavaFloat() { + super("float", "Float", "Float", "TypeID.RIOType.FLOAT"); + } + + String getTypeIDObjectString() { + return "org.apache.hadoop.record.meta.TypeID.FloatTypeID"; + } + + void genHashCode(CodeBuffer cb, String fname) { + cb.append(Consts.RIO_PREFIX + "ret = Float.floatToIntBits("+fname+");\n"); + } + + void genSlurpBytes(CodeBuffer cb, String b, String s, String l) { + cb.append("{\n"); + cb.append("if ("+l+"<4) {\n"); + cb.append("throw new java.io.IOException(\"Float is exactly 4 bytes."+ + " Provided buffer is smaller.\");\n"); + cb.append("}\n"); + cb.append(s+"+=4; "+l+"-=4;\n"); + cb.append("}\n"); + } + + void genCompareBytes(CodeBuffer cb) { + cb.append("{\n"); + cb.append("if (l1<4 || l2<4) {\n"); + cb.append("throw new java.io.IOException(\"Float is exactly 4 bytes."+ + " Provided buffer is smaller.\");\n"); + cb.append("}\n"); + cb.append("float f1 = org.apache.hadoop.record.Utils.readFloat(b1, s1);\n"); + cb.append("float f2 = org.apache.hadoop.record.Utils.readFloat(b2, s2);\n"); + cb.append("if (f1 != f2) {\n"); + cb.append("return ((f1-f2) < 0) ? -1 : 0;\n"); + cb.append("}\n"); + cb.append("s1+=4; s2+=4; l1-=4; l2-=4;\n"); + cb.append("}\n"); + } + } + + class CppFloat extends CppType { + + CppFloat() { + super("float"); + } + + String getTypeIDObjectString() { + return "new ::hadoop::TypeID(::hadoop::RIOTYPE_FLOAT)"; + } + } + + /** Creates a new instance of JFloat */ + public JFloat() { + setJavaType(new JavaFloat()); + setCppType(new CppFloat()); + setCType(new CType()); + } + + String getSignature() { + return "f"; + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JInt.java b/src/java/org/apache/hadoop/record/compiler/JInt.java new file mode 100644 index 00000000000..ecf735b0e41 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JInt.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + + +/** + * Code generator for "int" type + */ +public class JInt extends JType { + + class JavaInt extends JavaType { + + JavaInt() { + super("int", "Int", "Integer", "TypeID.RIOType.INT"); + } + + String getTypeIDObjectString() { + return "org.apache.hadoop.record.meta.TypeID.IntTypeID"; + } + + void genSlurpBytes(CodeBuffer cb, String b, String s, String l) { + cb.append("{\n"); + cb.append("int i = org.apache.hadoop.record.Utils.readVInt("+b+", "+s+");\n"); + cb.append("int z = org.apache.hadoop.record.Utils.getVIntSize(i);\n"); + cb.append(s+"+=z; "+l+"-=z;\n"); + cb.append("}\n"); + } + + void genCompareBytes(CodeBuffer cb) { + cb.append("{\n"); + cb.append("int i1 = org.apache.hadoop.record.Utils.readVInt(b1, s1);\n"); + cb.append("int i2 = org.apache.hadoop.record.Utils.readVInt(b2, s2);\n"); + cb.append("if (i1 != i2) {\n"); + cb.append("return ((i1-i2) < 0) ? -1 : 0;\n"); + cb.append("}\n"); + cb.append("int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);\n"); + cb.append("int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);\n"); + cb.append("s1+=z1; s2+=z2; l1-=z1; l2-=z2;\n"); + cb.append("}\n"); + } + } + + class CppInt extends CppType { + + CppInt() { + super("int32_t"); + } + + String getTypeIDObjectString() { + return "new ::hadoop::TypeID(::hadoop::RIOTYPE_INT)"; + } + } + + /** Creates a new instance of JInt */ + public JInt() { + setJavaType(new JavaInt()); + setCppType(new CppInt()); + setCType(new CType()); + } + + String getSignature() { + return "i"; + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JLong.java b/src/java/org/apache/hadoop/record/compiler/JLong.java new file mode 100644 index 00000000000..38df1e87b3c --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JLong.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +/** + * Code generator for "long" type + */ +public class JLong extends JType { + + class JavaLong extends JavaType { + + JavaLong() { + super("long", "Long", "Long", "TypeID.RIOType.LONG"); + } + + String getTypeIDObjectString() { + return "org.apache.hadoop.record.meta.TypeID.LongTypeID"; + } + + void genHashCode(CodeBuffer cb, String fname) { + cb.append(Consts.RIO_PREFIX + "ret = (int) ("+fname+"^("+ + fname+">>>32));\n"); + } + + void genSlurpBytes(CodeBuffer cb, String b, String s, String l) { + cb.append("{\n"); + cb.append("long i = org.apache.hadoop.record.Utils.readVLong("+b+", "+s+");\n"); + cb.append("int z = org.apache.hadoop.record.Utils.getVIntSize(i);\n"); + cb.append(s+"+=z; "+l+"-=z;\n"); + cb.append("}\n"); + } + + void genCompareBytes(CodeBuffer cb) { + cb.append("{\n"); + cb.append("long i1 = org.apache.hadoop.record.Utils.readVLong(b1, s1);\n"); + cb.append("long i2 = org.apache.hadoop.record.Utils.readVLong(b2, s2);\n"); + cb.append("if (i1 != i2) {\n"); + cb.append("return ((i1-i2) < 0) ? -1 : 0;\n"); + cb.append("}\n"); + cb.append("int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);\n"); + cb.append("int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);\n"); + cb.append("s1+=z1; s2+=z2; l1-=z1; l2-=z2;\n"); + cb.append("}\n"); + } + } + + class CppLong extends CppType { + + CppLong() { + super("int64_t"); + } + + String getTypeIDObjectString() { + return "new ::hadoop::TypeID(::hadoop::RIOTYPE_LONG)"; + } + } + + /** Creates a new instance of JLong */ + public JLong() { + setJavaType(new JavaLong()); + setCppType(new CppLong()); + setCType(new CType()); + } + + String getSignature() { + return "l"; + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JMap.java b/src/java/org/apache/hadoop/record/compiler/JMap.java new file mode 100644 index 00000000000..fb42dc496f5 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JMap.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +import java.util.Map; + + +/** + */ +public class JMap extends JCompType { + + static private int level = 0; + + static private String getLevel() { return Integer.toString(level); } + + static private void incrLevel() { level++; } + + static private void decrLevel() { level--; } + + static private String getId(String id) { return id+getLevel(); } + + private JType keyType; + private JType valueType; + + class JavaMap extends JavaCompType { + + JType.JavaType key; + JType.JavaType value; + + JavaMap(JType.JavaType key, JType.JavaType value) { + super("java.util.TreeMap<"+key.getWrapperType()+","+value.getWrapperType()+">", + "Map", + "java.util.TreeMap<"+key.getWrapperType()+","+value.getWrapperType()+">", + "TypeID.RIOType.MAP"); + this.key = key; + this.value = value; + } + + String getTypeIDObjectString() { + return "new org.apache.hadoop.record.meta.MapTypeID(" + + key.getTypeIDObjectString() + ", " + + value.getTypeIDObjectString() + ")"; + } + + void genSetRTIFilter(CodeBuffer cb, Map nestedStructMap) { + key.genSetRTIFilter(cb, nestedStructMap); + value.genSetRTIFilter(cb, nestedStructMap); + } + + void genCompareTo(CodeBuffer cb, String fname, String other) { + String setType = "java.util.Set<"+key.getWrapperType()+"> "; + String iterType = "java.util.Iterator<"+key.getWrapperType()+"> "; + cb.append("{\n"); + cb.append(setType+getId(Consts.RIO_PREFIX + "set1")+" = "+ + fname+".keySet();\n"); + cb.append(setType+getId(Consts.RIO_PREFIX + "set2")+" = "+ + other+".keySet();\n"); + cb.append(iterType+getId(Consts.RIO_PREFIX + "miter1")+" = "+ + getId(Consts.RIO_PREFIX + "set1")+".iterator();\n"); + cb.append(iterType+getId(Consts.RIO_PREFIX + "miter2")+" = "+ + getId(Consts.RIO_PREFIX + "set2")+".iterator();\n"); + cb.append("for(; "+getId(Consts.RIO_PREFIX + "miter1")+".hasNext() && "+ + getId(Consts.RIO_PREFIX + "miter2")+".hasNext();) {\n"); + cb.append(key.getType()+" "+getId(Consts.RIO_PREFIX + "k1")+ + " = "+getId(Consts.RIO_PREFIX + "miter1")+".next();\n"); + cb.append(key.getType()+" "+getId(Consts.RIO_PREFIX + "k2")+ + " = "+getId(Consts.RIO_PREFIX + "miter2")+".next();\n"); + key.genCompareTo(cb, getId(Consts.RIO_PREFIX + "k1"), + getId(Consts.RIO_PREFIX + "k2")); + cb.append("if (" + Consts.RIO_PREFIX + "ret != 0) { return " + + Consts.RIO_PREFIX + "ret; }\n"); + cb.append("}\n"); + cb.append(Consts.RIO_PREFIX + "ret = ("+getId(Consts.RIO_PREFIX + "set1")+ + ".size() - "+getId(Consts.RIO_PREFIX + "set2")+".size());\n"); + cb.append("}\n"); + } + + void genReadMethod(CodeBuffer cb, String fname, String tag, boolean decl) { + if (decl) { + cb.append(getType()+" "+fname+";\n"); + } + cb.append("{\n"); + incrLevel(); + cb.append("org.apache.hadoop.record.Index " + + getId(Consts.RIO_PREFIX + "midx")+" = " + + Consts.RECORD_INPUT + ".startMap(\""+tag+"\");\n"); + cb.append(fname+"=new "+getType()+"();\n"); + cb.append("for (; !"+getId(Consts.RIO_PREFIX + "midx")+".done(); "+ + getId(Consts.RIO_PREFIX + "midx")+".incr()) {\n"); + key.genReadMethod(cb, getId(Consts.RIO_PREFIX + "k"), + getId(Consts.RIO_PREFIX + "k"), true); + value.genReadMethod(cb, getId(Consts.RIO_PREFIX + "v"), + getId(Consts.RIO_PREFIX + "v"), true); + cb.append(fname+".put("+getId(Consts.RIO_PREFIX + "k")+","+ + getId(Consts.RIO_PREFIX + "v")+");\n"); + cb.append("}\n"); + cb.append(Consts.RECORD_INPUT + ".endMap(\""+tag+"\");\n"); + decrLevel(); + cb.append("}\n"); + } + + void genWriteMethod(CodeBuffer cb, String fname, String tag) { + String setType = "java.util.Set> "; + String entryType = "java.util.Map.Entry<"+ + key.getWrapperType()+","+value.getWrapperType()+"> "; + String iterType = "java.util.Iterator> "; + cb.append("{\n"); + incrLevel(); + cb.append(Consts.RECORD_OUTPUT + ".startMap("+fname+",\""+tag+"\");\n"); + cb.append(setType+getId(Consts.RIO_PREFIX + "es")+" = "+ + fname+".entrySet();\n"); + cb.append("for("+iterType+getId(Consts.RIO_PREFIX + "midx")+" = "+ + getId(Consts.RIO_PREFIX + "es")+".iterator(); "+ + getId(Consts.RIO_PREFIX + "midx")+".hasNext();) {\n"); + cb.append(entryType+getId(Consts.RIO_PREFIX + "me")+" = "+ + getId(Consts.RIO_PREFIX + "midx")+".next();\n"); + cb.append(key.getType()+" "+getId(Consts.RIO_PREFIX + "k")+" = "+ + getId(Consts.RIO_PREFIX + "me")+".getKey();\n"); + cb.append(value.getType()+" "+getId(Consts.RIO_PREFIX + "v")+" = "+ + getId(Consts.RIO_PREFIX + "me")+".getValue();\n"); + key.genWriteMethod(cb, getId(Consts.RIO_PREFIX + "k"), + getId(Consts.RIO_PREFIX + "k")); + value.genWriteMethod(cb, getId(Consts.RIO_PREFIX + "v"), + getId(Consts.RIO_PREFIX + "v")); + cb.append("}\n"); + cb.append(Consts.RECORD_OUTPUT + ".endMap("+fname+",\""+tag+"\");\n"); + cb.append("}\n"); + decrLevel(); + } + + void genSlurpBytes(CodeBuffer cb, String b, String s, String l) { + cb.append("{\n"); + incrLevel(); + cb.append("int "+getId("mi")+ + " = org.apache.hadoop.record.Utils.readVInt("+b+", "+s+");\n"); + cb.append("int "+getId("mz")+ + " = org.apache.hadoop.record.Utils.getVIntSize("+getId("mi")+");\n"); + cb.append(s+"+="+getId("mz")+"; "+l+"-="+getId("mz")+";\n"); + cb.append("for (int "+getId("midx")+" = 0; "+getId("midx")+ + " < "+getId("mi")+"; "+getId("midx")+"++) {"); + key.genSlurpBytes(cb, b, s, l); + value.genSlurpBytes(cb, b, s, l); + cb.append("}\n"); + decrLevel(); + cb.append("}\n"); + } + + void genCompareBytes(CodeBuffer cb) { + cb.append("{\n"); + incrLevel(); + cb.append("int "+getId("mi1")+ + " = org.apache.hadoop.record.Utils.readVInt(b1, s1);\n"); + cb.append("int "+getId("mi2")+ + " = org.apache.hadoop.record.Utils.readVInt(b2, s2);\n"); + cb.append("int "+getId("mz1")+ + " = org.apache.hadoop.record.Utils.getVIntSize("+getId("mi1")+");\n"); + cb.append("int "+getId("mz2")+ + " = org.apache.hadoop.record.Utils.getVIntSize("+getId("mi2")+");\n"); + cb.append("s1+="+getId("mz1")+"; s2+="+getId("mz2")+ + "; l1-="+getId("mz1")+"; l2-="+getId("mz2")+";\n"); + cb.append("for (int "+getId("midx")+" = 0; "+getId("midx")+ + " < "+getId("mi1")+" && "+getId("midx")+" < "+getId("mi2")+ + "; "+getId("midx")+"++) {"); + key.genCompareBytes(cb); + value.genSlurpBytes(cb, "b1", "s1", "l1"); + value.genSlurpBytes(cb, "b2", "s2", "l2"); + cb.append("}\n"); + cb.append("if ("+getId("mi1")+" != "+getId("mi2")+ + ") { return ("+getId("mi1")+"<"+getId("mi2")+")?-1:0; }\n"); + decrLevel(); + cb.append("}\n"); + } + } + + class CppMap extends CppCompType { + + JType.CppType key; + JType.CppType value; + + CppMap(JType.CppType key, JType.CppType value) { + super("::std::map< "+key.getType()+", "+ value.getType()+" >"); + this.key = key; + this.value = value; + } + + String getTypeIDObjectString() { + return "new ::hadoop::MapTypeID(" + + key.getTypeIDObjectString() + ", " + + value.getTypeIDObjectString() + ")"; + } + + void genSetRTIFilter(CodeBuffer cb) { + key.genSetRTIFilter(cb); + value.genSetRTIFilter(cb); + } + + } + + /** Creates a new instance of JMap */ + public JMap(JType t1, JType t2) { + setJavaType(new JavaMap(t1.getJavaType(), t2.getJavaType())); + setCppType(new CppMap(t1.getCppType(), t2.getCppType())); + setCType(new CType()); + keyType = t1; + valueType = t2; + } + + String getSignature() { + return "{" + keyType.getSignature() + valueType.getSignature() +"}"; + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JRecord.java b/src/java/org/apache/hadoop/record/compiler/JRecord.java new file mode 100644 index 00000000000..96955f365f0 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JRecord.java @@ -0,0 +1,806 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.*; + +/** + */ +public class JRecord extends JCompType { + + class JavaRecord extends JavaCompType { + + private String fullName; + private String name; + private String module; + private ArrayList> fields = + new ArrayList>(); + + JavaRecord(String name, ArrayList> flist) { + super(name, "Record", name, "TypeID.RIOType.STRUCT"); + this.fullName = name; + int idx = name.lastIndexOf('.'); + this.name = name.substring(idx+1); + this.module = name.substring(0, idx); + for (Iterator> iter = flist.iterator(); iter.hasNext();) { + JField f = iter.next(); + fields.add(new JField(f.getName(), f.getType().getJavaType())); + } + } + + String getTypeIDObjectString() { + return "new org.apache.hadoop.record.meta.StructTypeID(" + + fullName + ".getTypeInfo())"; + } + + void genSetRTIFilter(CodeBuffer cb, Map nestedStructMap) { + // ignore, if we'ev already set the type filter for this record + if (!nestedStructMap.containsKey(fullName)) { + // we set the RTI filter here + cb.append(fullName + ".setTypeFilter(rti.getNestedStructTypeInfo(\""+ + name + "\"));\n"); + nestedStructMap.put(fullName, null); + } + } + + // for each typeInfo in the filter, we see if there's a similar one in the record. + // Since we store typeInfos in ArrayLists, thsi search is O(n squared). We do it faster + // if we also store a map (of TypeInfo to index), but since setupRtiFields() is called + // only once when deserializing, we're sticking with the former, as the code is easier. + void genSetupRtiFields(CodeBuffer cb) { + cb.append("private static void setupRtiFields()\n{\n"); + cb.append("if (null == " + Consts.RTI_FILTER + ") return;\n"); + cb.append("// we may already have done this\n"); + cb.append("if (null != " + Consts.RTI_FILTER_FIELDS + ") return;\n"); + cb.append("int " + Consts.RIO_PREFIX + "i, " + Consts.RIO_PREFIX + "j;\n"); + cb.append(Consts.RTI_FILTER_FIELDS + " = new int [" + + Consts.RIO_PREFIX + "rtiFilter.getFieldTypeInfos().size()];\n"); + cb.append("for (" + Consts.RIO_PREFIX + "i=0; " + Consts.RIO_PREFIX + "i<"+ + Consts.RTI_FILTER_FIELDS + ".length; " + Consts.RIO_PREFIX + "i++) {\n"); + cb.append(Consts.RTI_FILTER_FIELDS + "[" + Consts.RIO_PREFIX + "i] = 0;\n"); + cb.append("}\n"); + cb.append("java.util.Iterator " + Consts.RIO_PREFIX + "itFilter = " + + Consts.RIO_PREFIX + "rtiFilter.getFieldTypeInfos().iterator();\n"); + cb.append(Consts.RIO_PREFIX + "i=0;\n"); + cb.append("while (" + Consts.RIO_PREFIX + "itFilter.hasNext()) {\n"); + cb.append("org.apache.hadoop.record.meta.FieldTypeInfo " + + Consts.RIO_PREFIX + "tInfoFilter = " + + Consts.RIO_PREFIX + "itFilter.next();\n"); + cb.append("java.util.Iterator " + Consts.RIO_PREFIX + "it = " + Consts.RTI_VAR + + ".getFieldTypeInfos().iterator();\n"); + cb.append(Consts.RIO_PREFIX + "j=1;\n"); + cb.append("while (" + Consts.RIO_PREFIX + "it.hasNext()) {\n"); + cb.append("org.apache.hadoop.record.meta.FieldTypeInfo " + + Consts.RIO_PREFIX + "tInfo = " + Consts.RIO_PREFIX + "it.next();\n"); + cb.append("if (" + Consts.RIO_PREFIX + "tInfo.equals(" + + Consts.RIO_PREFIX + "tInfoFilter)) {\n"); + cb.append(Consts.RTI_FILTER_FIELDS + "[" + Consts.RIO_PREFIX + "i] = " + + Consts.RIO_PREFIX + "j;\n"); + cb.append("break;\n"); + cb.append("}\n"); + cb.append(Consts.RIO_PREFIX + "j++;\n"); + cb.append("}\n"); + /*int ct = 0; + for (Iterator> i = fields.iterator(); i.hasNext();) { + ct++; + JField jf = i.next(); + JavaType type = jf.getType(); + String name = jf.getName(); + if (ct != 1) { + cb.append("else "); + } + type.genRtiFieldCondition(cb, name, ct); + } + if (ct != 0) { + cb.append("else {\n"); + cb.append("rtiFilterFields[i] = 0;\n"); + cb.append("}\n"); + }*/ + cb.append(Consts.RIO_PREFIX + "i++;\n"); + cb.append("}\n"); + cb.append("}\n"); + } + + void genReadMethod(CodeBuffer cb, String fname, String tag, boolean decl) { + if (decl) { + cb.append(fullName+" "+fname+";\n"); + } + cb.append(fname+"= new "+fullName+"();\n"); + cb.append(fname+".deserialize(" + Consts.RECORD_INPUT + ",\""+tag+"\");\n"); + } + + void genWriteMethod(CodeBuffer cb, String fname, String tag) { + cb.append(fname+".serialize(" + Consts.RECORD_OUTPUT + ",\""+tag+"\");\n"); + } + + void genSlurpBytes(CodeBuffer cb, String b, String s, String l) { + cb.append("{\n"); + cb.append("int r = "+fullName+ + ".Comparator.slurpRaw("+b+","+s+","+l+");\n"); + cb.append(s+"+=r; "+l+"-=r;\n"); + cb.append("}\n"); + } + + void genCompareBytes(CodeBuffer cb) { + cb.append("{\n"); + cb.append("int r1 = "+fullName+ + ".Comparator.compareRaw(b1,s1,l1,b2,s2,l2);\n"); + cb.append("if (r1 <= 0) { return r1; }\n"); + cb.append("s1+=r1; s2+=r1; l1-=r1; l2-=r1;\n"); + cb.append("}\n"); + } + + void genCode(String destDir, ArrayList options) throws IOException { + String pkg = module; + String pkgpath = pkg.replaceAll("\\.", "/"); + File pkgdir = new File(destDir, pkgpath); + + final File jfile = new File(pkgdir, name+".java"); + if (!pkgdir.exists()) { + // create the pkg directory + boolean ret = pkgdir.mkdirs(); + if (!ret) { + throw new IOException("Cannnot create directory: "+pkgpath); + } + } else if (!pkgdir.isDirectory()) { + // not a directory + throw new IOException(pkgpath+" is not a directory."); + } + + CodeBuffer cb = new CodeBuffer(); + cb.append("// File generated by hadoop record compiler. Do not edit.\n"); + cb.append("package "+module+";\n\n"); + cb.append("public class "+name+ + " extends org.apache.hadoop.record.Record {\n"); + + // type information declarations + cb.append("private static final " + + "org.apache.hadoop.record.meta.RecordTypeInfo " + + Consts.RTI_VAR + ";\n"); + cb.append("private static " + + "org.apache.hadoop.record.meta.RecordTypeInfo " + + Consts.RTI_FILTER + ";\n"); + cb.append("private static int[] " + Consts.RTI_FILTER_FIELDS + ";\n"); + + // static init for type information + cb.append("static {\n"); + cb.append(Consts.RTI_VAR + " = " + + "new org.apache.hadoop.record.meta.RecordTypeInfo(\"" + + name + "\");\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genStaticTypeInfo(cb, name); + } + cb.append("}\n\n"); + + // field definitions + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genDecl(cb, name); + } + + // default constructor + cb.append("public "+name+"() { }\n"); + + // constructor + cb.append("public "+name+"(\n"); + int fIdx = 0; + for (Iterator> i = fields.iterator(); i.hasNext(); fIdx++) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genConstructorParam(cb, name); + cb.append((!i.hasNext())?"":",\n"); + } + cb.append(") {\n"); + fIdx = 0; + for (Iterator> i = fields.iterator(); i.hasNext(); fIdx++) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genConstructorSet(cb, name); + } + cb.append("}\n"); + + // getter/setter for type info + cb.append("public static org.apache.hadoop.record.meta.RecordTypeInfo" + + " getTypeInfo() {\n"); + cb.append("return " + Consts.RTI_VAR + ";\n"); + cb.append("}\n"); + cb.append("public static void setTypeFilter(" + + "org.apache.hadoop.record.meta.RecordTypeInfo rti) {\n"); + cb.append("if (null == rti) return;\n"); + cb.append(Consts.RTI_FILTER + " = rti;\n"); + cb.append(Consts.RTI_FILTER_FIELDS + " = null;\n"); + // set RTIFilter for nested structs. + // To prevent setting up the type filter for the same struct more than once, + // we use a hash map to keep track of what we've set. + Map nestedStructMap = new HashMap(); + for (JField jf : fields) { + JavaType type = jf.getType(); + type.genSetRTIFilter(cb, nestedStructMap); + } + cb.append("}\n"); + + // setupRtiFields() + genSetupRtiFields(cb); + + // getters/setters for member variables + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genGetSet(cb, name); + } + + // serialize() + cb.append("public void serialize("+ + "final org.apache.hadoop.record.RecordOutput " + + Consts.RECORD_OUTPUT + ", final String " + Consts.TAG + ")\n"+ + "throws java.io.IOException {\n"); + cb.append(Consts.RECORD_OUTPUT + ".startRecord(this," + Consts.TAG + ");\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genWriteMethod(cb, name, name); + } + cb.append(Consts.RECORD_OUTPUT + ".endRecord(this," + Consts.TAG+");\n"); + cb.append("}\n"); + + // deserializeWithoutFilter() + cb.append("private void deserializeWithoutFilter("+ + "final org.apache.hadoop.record.RecordInput " + + Consts.RECORD_INPUT + ", final String " + Consts.TAG + ")\n"+ + "throws java.io.IOException {\n"); + cb.append(Consts.RECORD_INPUT + ".startRecord(" + Consts.TAG + ");\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genReadMethod(cb, name, name, false); + } + cb.append(Consts.RECORD_INPUT + ".endRecord(" + Consts.TAG+");\n"); + cb.append("}\n"); + + // deserialize() + cb.append("public void deserialize(final " + + "org.apache.hadoop.record.RecordInput " + + Consts.RECORD_INPUT + ", final String " + Consts.TAG + ")\n"+ + "throws java.io.IOException {\n"); + cb.append("if (null == " + Consts.RTI_FILTER + ") {\n"); + cb.append("deserializeWithoutFilter(" + Consts.RECORD_INPUT + ", " + + Consts.TAG + ");\n"); + cb.append("return;\n"); + cb.append("}\n"); + cb.append("// if we're here, we need to read based on version info\n"); + cb.append(Consts.RECORD_INPUT + ".startRecord(" + Consts.TAG + ");\n"); + cb.append("setupRtiFields();\n"); + cb.append("for (int " + Consts.RIO_PREFIX + "i=0; " + Consts.RIO_PREFIX + + "i<" + Consts.RTI_FILTER + ".getFieldTypeInfos().size(); " + + Consts.RIO_PREFIX + "i++) {\n"); + int ct = 0; + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + ct++; + if (1 != ct) { + cb.append("else "); + } + cb.append("if (" + ct + " == " + Consts.RTI_FILTER_FIELDS + "[" + + Consts.RIO_PREFIX + "i]) {\n"); + type.genReadMethod(cb, name, name, false); + cb.append("}\n"); + } + if (0 != ct) { + cb.append("else {\n"); + cb.append("java.util.ArrayList<" + + "org.apache.hadoop.record.meta.FieldTypeInfo> typeInfos = " + + "(java.util.ArrayList<" + + "org.apache.hadoop.record.meta.FieldTypeInfo>)" + + "(" + Consts.RTI_FILTER + ".getFieldTypeInfos());\n"); + cb.append("org.apache.hadoop.record.meta.Utils.skip(" + + Consts.RECORD_INPUT + ", " + "typeInfos.get(" + Consts.RIO_PREFIX + + "i).getFieldID(), typeInfos.get(" + + Consts.RIO_PREFIX + "i).getTypeID());\n"); + cb.append("}\n"); + } + cb.append("}\n"); + cb.append(Consts.RECORD_INPUT + ".endRecord(" + Consts.TAG+");\n"); + cb.append("}\n"); + + // compareTo() + cb.append("public int compareTo (final Object " + Consts.RIO_PREFIX + + "peer_) throws ClassCastException {\n"); + cb.append("if (!(" + Consts.RIO_PREFIX + "peer_ instanceof "+name+")) {\n"); + cb.append("throw new ClassCastException(\"Comparing different types of records.\");\n"); + cb.append("}\n"); + cb.append(name+" " + Consts.RIO_PREFIX + "peer = ("+name+") " + + Consts.RIO_PREFIX + "peer_;\n"); + cb.append("int " + Consts.RIO_PREFIX + "ret = 0;\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genCompareTo(cb, name, Consts.RIO_PREFIX + "peer."+name); + cb.append("if (" + Consts.RIO_PREFIX + "ret != 0) return " + + Consts.RIO_PREFIX + "ret;\n"); + } + cb.append("return " + Consts.RIO_PREFIX + "ret;\n"); + cb.append("}\n"); + + // equals() + cb.append("public boolean equals(final Object " + Consts.RIO_PREFIX + + "peer_) {\n"); + cb.append("if (!(" + Consts.RIO_PREFIX + "peer_ instanceof "+name+")) {\n"); + cb.append("return false;\n"); + cb.append("}\n"); + cb.append("if (" + Consts.RIO_PREFIX + "peer_ == this) {\n"); + cb.append("return true;\n"); + cb.append("}\n"); + cb.append(name+" " + Consts.RIO_PREFIX + "peer = ("+name+") " + + Consts.RIO_PREFIX + "peer_;\n"); + cb.append("boolean " + Consts.RIO_PREFIX + "ret = false;\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genEquals(cb, name, Consts.RIO_PREFIX + "peer."+name); + cb.append("if (!" + Consts.RIO_PREFIX + "ret) return " + + Consts.RIO_PREFIX + "ret;\n"); + } + cb.append("return " + Consts.RIO_PREFIX + "ret;\n"); + cb.append("}\n"); + + // clone() + cb.append("public Object clone() throws CloneNotSupportedException {\n"); + cb.append(name+" " + Consts.RIO_PREFIX + "other = new "+name+"();\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genClone(cb, name); + } + cb.append("return " + Consts.RIO_PREFIX + "other;\n"); + cb.append("}\n"); + + cb.append("public int hashCode() {\n"); + cb.append("int " + Consts.RIO_PREFIX + "result = 17;\n"); + cb.append("int " + Consts.RIO_PREFIX + "ret;\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genHashCode(cb, name); + cb.append(Consts.RIO_PREFIX + "result = 37*" + Consts.RIO_PREFIX + + "result + " + Consts.RIO_PREFIX + "ret;\n"); + } + cb.append("return " + Consts.RIO_PREFIX + "result;\n"); + cb.append("}\n"); + + cb.append("public static String signature() {\n"); + cb.append("return \""+getSignature()+"\";\n"); + cb.append("}\n"); + + cb.append("public static class Comparator extends"+ + " org.apache.hadoop.record.RecordComparator {\n"); + cb.append("public Comparator() {\n"); + cb.append("super("+name+".class);\n"); + cb.append("}\n"); + + cb.append("static public int slurpRaw(byte[] b, int s, int l) {\n"); + cb.append("try {\n"); + cb.append("int os = s;\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genSlurpBytes(cb, "b","s","l"); + } + cb.append("return (os - s);\n"); + cb.append("} catch(java.io.IOException e) {\n"); + cb.append("throw new RuntimeException(e);\n"); + cb.append("}\n"); + cb.append("}\n"); + + cb.append("static public int compareRaw(byte[] b1, int s1, int l1,\n"); + cb.append(" byte[] b2, int s2, int l2) {\n"); + cb.append("try {\n"); + cb.append("int os1 = s1;\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + JavaType type = jf.getType(); + type.genCompareBytes(cb); + } + cb.append("return (os1 - s1);\n"); + cb.append("} catch(java.io.IOException e) {\n"); + cb.append("throw new RuntimeException(e);\n"); + cb.append("}\n"); + cb.append("}\n"); + cb.append("public int compare(byte[] b1, int s1, int l1,\n"); + cb.append(" byte[] b2, int s2, int l2) {\n"); + cb.append("int ret = compareRaw(b1,s1,l1,b2,s2,l2);\n"); + cb.append("return (ret == -1)? -1 : ((ret==0)? 1 : 0);"); + cb.append("}\n"); + cb.append("}\n\n"); + cb.append("static {\n"); + cb.append("org.apache.hadoop.record.RecordComparator.define(" + +name+".class, new Comparator());\n"); + cb.append("}\n"); + cb.append("}\n"); + + FileWriter jj = new FileWriter(jfile); + try { + jj.write(cb.toString()); + } finally { + jj.close(); + } + } + } + + class CppRecord extends CppCompType { + + private String fullName; + private String name; + private String module; + private ArrayList> fields = + new ArrayList>(); + + CppRecord(String name, ArrayList> flist) { + super(name.replaceAll("\\.","::")); + this.fullName = name.replaceAll("\\.", "::"); + int idx = name.lastIndexOf('.'); + this.name = name.substring(idx+1); + this.module = name.substring(0, idx).replaceAll("\\.", "::"); + for (Iterator> iter = flist.iterator(); iter.hasNext();) { + JField f = iter.next(); + fields.add(new JField(f.getName(), f.getType().getCppType())); + } + } + + String getTypeIDObjectString() { + return "new ::hadoop::StructTypeID(" + + fullName + "::getTypeInfo().getFieldTypeInfos())"; + } + + String genDecl(String fname) { + return " "+name+" "+fname+";\n"; + } + + void genSetRTIFilter(CodeBuffer cb) { + // we set the RTI filter here + cb.append(fullName + "::setTypeFilter(rti.getNestedStructTypeInfo(\""+ + name + "\"));\n"); + } + + void genSetupRTIFields(CodeBuffer cb) { + cb.append("void " + fullName + "::setupRtiFields() {\n"); + cb.append("if (NULL == p" + Consts.RTI_FILTER + ") return;\n"); + cb.append("if (NULL != p" + Consts.RTI_FILTER_FIELDS + ") return;\n"); + cb.append("p" + Consts.RTI_FILTER_FIELDS + " = new int[p" + + Consts.RTI_FILTER + "->getFieldTypeInfos().size()];\n"); + cb.append("for (unsigned int " + Consts.RIO_PREFIX + "i=0; " + + Consts.RIO_PREFIX + "igetFieldTypeInfos().size(); " + Consts.RIO_PREFIX + "i++) {\n"); + cb.append("p" + Consts.RTI_FILTER_FIELDS + "[" + Consts.RIO_PREFIX + + "i] = 0;\n"); + cb.append("}\n"); + cb.append("for (unsigned int " + Consts.RIO_PREFIX + "i=0; " + + Consts.RIO_PREFIX + "igetFieldTypeInfos().size(); " + Consts.RIO_PREFIX + "i++) {\n"); + cb.append("for (unsigned int " + Consts.RIO_PREFIX + "j=0; " + + Consts.RIO_PREFIX + "jgetFieldTypeInfos().size(); " + Consts.RIO_PREFIX + "j++) {\n"); + cb.append("if (*(p" + Consts.RTI_FILTER + "->getFieldTypeInfos()[" + + Consts.RIO_PREFIX + "i]) == *(p" + Consts.RTI_VAR + + "->getFieldTypeInfos()[" + Consts.RIO_PREFIX + "j])) {\n"); + cb.append("p" + Consts.RTI_FILTER_FIELDS + "[" + Consts.RIO_PREFIX + + "i] = " + Consts.RIO_PREFIX + "j+1;\n"); + cb.append("break;\n"); + cb.append("}\n"); + cb.append("}\n"); + cb.append("}\n"); + cb.append("}\n"); + } + + void genCode(FileWriter hh, FileWriter cc, ArrayList options) + throws IOException { + CodeBuffer hb = new CodeBuffer(); + + String[] ns = module.split("::"); + for (int i = 0; i < ns.length; i++) { + hb.append("namespace "+ns[i]+" {\n"); + } + + hb.append("class "+name+" : public ::hadoop::Record {\n"); + hb.append("private:\n"); + + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + CppType type = jf.getType(); + type.genDecl(hb, name); + } + + // type info vars + hb.append("static ::hadoop::RecordTypeInfo* p" + Consts.RTI_VAR + ";\n"); + hb.append("static ::hadoop::RecordTypeInfo* p" + Consts.RTI_FILTER + ";\n"); + hb.append("static int* p" + Consts.RTI_FILTER_FIELDS + ";\n"); + hb.append("static ::hadoop::RecordTypeInfo* setupTypeInfo();\n"); + hb.append("static void setupRtiFields();\n"); + hb.append("virtual void deserializeWithoutFilter(::hadoop::IArchive& " + + Consts.RECORD_INPUT + ", const char* " + Consts.TAG + ");\n"); + hb.append("public:\n"); + hb.append("static const ::hadoop::RecordTypeInfo& getTypeInfo() " + + "{return *p" + Consts.RTI_VAR + ";}\n"); + hb.append("static void setTypeFilter(const ::hadoop::RecordTypeInfo& rti);\n"); + hb.append("static void setTypeFilter(const ::hadoop::RecordTypeInfo* prti);\n"); + hb.append("virtual void serialize(::hadoop::OArchive& " + + Consts.RECORD_OUTPUT + ", const char* " + Consts.TAG + ") const;\n"); + hb.append("virtual void deserialize(::hadoop::IArchive& " + + Consts.RECORD_INPUT + ", const char* " + Consts.TAG + ");\n"); + hb.append("virtual const ::std::string& type() const;\n"); + hb.append("virtual const ::std::string& signature() const;\n"); + hb.append("virtual bool operator<(const "+name+"& peer_) const;\n"); + hb.append("virtual bool operator==(const "+name+"& peer_) const;\n"); + hb.append("virtual ~"+name+"() {};\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + CppType type = jf.getType(); + type.genGetSet(hb, name); + } + hb.append("}; // end record "+name+"\n"); + for (int i=ns.length-1; i>=0; i--) { + hb.append("} // end namespace "+ns[i]+"\n"); + } + + hh.write(hb.toString()); + + CodeBuffer cb = new CodeBuffer(); + + // initialize type info vars + cb.append("::hadoop::RecordTypeInfo* " + fullName + "::p" + + Consts.RTI_VAR + " = " + fullName + "::setupTypeInfo();\n"); + cb.append("::hadoop::RecordTypeInfo* " + fullName + "::p" + + Consts.RTI_FILTER + " = NULL;\n"); + cb.append("int* " + fullName + "::p" + + Consts.RTI_FILTER_FIELDS + " = NULL;\n\n"); + + // setupTypeInfo() + cb.append("::hadoop::RecordTypeInfo* "+fullName+"::setupTypeInfo() {\n"); + cb.append("::hadoop::RecordTypeInfo* p = new ::hadoop::RecordTypeInfo(\"" + + name + "\");\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + CppType type = jf.getType(); + type.genStaticTypeInfo(cb, name); + } + cb.append("return p;\n"); + cb.append("}\n"); + + // setTypeFilter() + cb.append("void "+fullName+"::setTypeFilter(const " + + "::hadoop::RecordTypeInfo& rti) {\n"); + cb.append("if (NULL != p" + Consts.RTI_FILTER + ") {\n"); + cb.append("delete p" + Consts.RTI_FILTER + ";\n"); + cb.append("}\n"); + cb.append("p" + Consts.RTI_FILTER + " = new ::hadoop::RecordTypeInfo(rti);\n"); + cb.append("if (NULL != p" + Consts.RTI_FILTER_FIELDS + ") {\n"); + cb.append("delete p" + Consts.RTI_FILTER_FIELDS + ";\n"); + cb.append("}\n"); + cb.append("p" + Consts.RTI_FILTER_FIELDS + " = NULL;\n"); + // set RTIFilter for nested structs. We may end up with multiple lines that + // do the same thing, if the same struct is nested in more than one field, + // but that's OK. + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + CppType type = jf.getType(); + type.genSetRTIFilter(cb); + } + cb.append("}\n"); + + // setTypeFilter() + cb.append("void "+fullName+"::setTypeFilter(const " + + "::hadoop::RecordTypeInfo* prti) {\n"); + cb.append("if (NULL != prti) {\n"); + cb.append("setTypeFilter(*prti);\n"); + cb.append("}\n"); + cb.append("}\n"); + + // setupRtiFields() + genSetupRTIFields(cb); + + // serialize() + cb.append("void "+fullName+"::serialize(::hadoop::OArchive& " + + Consts.RECORD_OUTPUT + ", const char* " + Consts.TAG + ") const {\n"); + cb.append(Consts.RECORD_OUTPUT + ".startRecord(*this," + + Consts.TAG + ");\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + CppType type = jf.getType(); + if (type instanceof JBuffer.CppBuffer) { + cb.append(Consts.RECORD_OUTPUT + ".serialize("+name+","+name+ + ".length(),\""+name+"\");\n"); + } else { + cb.append(Consts.RECORD_OUTPUT + ".serialize("+name+",\""+ + name+"\");\n"); + } + } + cb.append(Consts.RECORD_OUTPUT + ".endRecord(*this," + Consts.TAG + ");\n"); + cb.append("return;\n"); + cb.append("}\n"); + + // deserializeWithoutFilter() + cb.append("void "+fullName+"::deserializeWithoutFilter(::hadoop::IArchive& " + + Consts.RECORD_INPUT + ", const char* " + Consts.TAG + ") {\n"); + cb.append(Consts.RECORD_INPUT + ".startRecord(*this," + + Consts.TAG + ");\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + CppType type = jf.getType(); + if (type instanceof JBuffer.CppBuffer) { + cb.append("{\nsize_t len=0; " + Consts.RECORD_INPUT + ".deserialize("+ + name+",len,\""+name+"\");\n}\n"); + } else { + cb.append(Consts.RECORD_INPUT + ".deserialize("+name+",\""+ + name+"\");\n"); + } + } + cb.append(Consts.RECORD_INPUT + ".endRecord(*this," + Consts.TAG + ");\n"); + cb.append("return;\n"); + cb.append("}\n"); + + // deserialize() + cb.append("void "+fullName+"::deserialize(::hadoop::IArchive& " + + Consts.RECORD_INPUT + ", const char* " + Consts.TAG + ") {\n"); + cb.append("if (NULL == p" + Consts.RTI_FILTER + ") {\n"); + cb.append("deserializeWithoutFilter(" + Consts.RECORD_INPUT + ", " + + Consts.TAG + ");\n"); + cb.append("return;\n"); + cb.append("}\n"); + cb.append("// if we're here, we need to read based on version info\n"); + cb.append(Consts.RECORD_INPUT + ".startRecord(*this," + + Consts.TAG + ");\n"); + cb.append("setupRtiFields();\n"); + cb.append("for (unsigned int " + Consts.RIO_PREFIX + "i=0; " + + Consts.RIO_PREFIX + "igetFieldTypeInfos().size(); " + Consts.RIO_PREFIX + "i++) {\n"); + int ct = 0; + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + CppType type = jf.getType(); + ct++; + if (1 != ct) { + cb.append("else "); + } + cb.append("if (" + ct + " == p" + Consts.RTI_FILTER_FIELDS + "[" + + Consts.RIO_PREFIX + "i]) {\n"); + if (type instanceof JBuffer.CppBuffer) { + cb.append("{\nsize_t len=0; " + Consts.RECORD_INPUT + ".deserialize("+ + name+",len,\""+name+"\");\n}\n"); + } else { + cb.append(Consts.RECORD_INPUT + ".deserialize("+name+",\""+ + name+"\");\n"); + } + cb.append("}\n"); + } + if (0 != ct) { + cb.append("else {\n"); + cb.append("const std::vector< ::hadoop::FieldTypeInfo* >& typeInfos = p" + + Consts.RTI_FILTER + "->getFieldTypeInfos();\n"); + cb.append("::hadoop::Utils::skip(" + Consts.RECORD_INPUT + + ", typeInfos[" + Consts.RIO_PREFIX + "i]->getFieldID()->c_str()" + + ", *(typeInfos[" + Consts.RIO_PREFIX + "i]->getTypeID()));\n"); + cb.append("}\n"); + } + cb.append("}\n"); + cb.append(Consts.RECORD_INPUT + ".endRecord(*this, " + Consts.TAG+");\n"); + cb.append("}\n"); + + // operator < + cb.append("bool "+fullName+"::operator< (const "+fullName+"& peer_) const {\n"); + cb.append("return (1\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + cb.append("&& ("+name+" < peer_."+name+")\n"); + } + cb.append(");\n"); + cb.append("}\n"); + + cb.append("bool "+fullName+"::operator== (const "+fullName+"& peer_) const {\n"); + cb.append("return (1\n"); + for (Iterator> i = fields.iterator(); i.hasNext();) { + JField jf = i.next(); + String name = jf.getName(); + cb.append("&& ("+name+" == peer_."+name+")\n"); + } + cb.append(");\n"); + cb.append("}\n"); + + cb.append("const ::std::string&"+fullName+"::type() const {\n"); + cb.append("static const ::std::string type_(\""+name+"\");\n"); + cb.append("return type_;\n"); + cb.append("}\n"); + + cb.append("const ::std::string&"+fullName+"::signature() const {\n"); + cb.append("static const ::std::string sig_(\""+getSignature()+"\");\n"); + cb.append("return sig_;\n"); + cb.append("}\n"); + + cc.write(cb.toString()); + } + } + + class CRecord extends CCompType { + + } + + private String signature; + + /** + * Creates a new instance of JRecord + */ + public JRecord(String name, ArrayList> flist) { + setJavaType(new JavaRecord(name, flist)); + setCppType(new CppRecord(name, flist)); + setCType(new CRecord()); + // precompute signature + int idx = name.lastIndexOf('.'); + String recName = name.substring(idx+1); + StringBuffer sb = new StringBuffer(); + sb.append("L").append(recName).append("("); + for (Iterator> i = flist.iterator(); i.hasNext();) { + String s = i.next().getType().getSignature(); + sb.append(s); + } + sb.append(")"); + signature = sb.toString(); + } + + String getSignature() { + return signature; + } + + void genCppCode(FileWriter hh, FileWriter cc, ArrayList options) + throws IOException { + ((CppRecord)getCppType()).genCode(hh, cc, options); + } + + void genJavaCode(String destDir, ArrayList options) + throws IOException { + ((JavaRecord)getJavaType()).genCode(destDir, options); + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JString.java b/src/java/org/apache/hadoop/record/compiler/JString.java new file mode 100644 index 00000000000..931359b993c --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JString.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + + +/** + */ +public class JString extends JCompType { + + class JavaString extends JavaCompType { + + JavaString() { + super("String", "String", "String", "TypeID.RIOType.STRING"); + } + + String getTypeIDObjectString() { + return "org.apache.hadoop.record.meta.TypeID.StringTypeID"; + } + + void genSlurpBytes(CodeBuffer cb, String b, String s, String l) { + cb.append("{\n"); + cb.append("int i = org.apache.hadoop.record.Utils.readVInt("+b+", "+s+");\n"); + cb.append("int z = org.apache.hadoop.record.Utils.getVIntSize(i);\n"); + cb.append(s+"+=(z+i); "+l+"-= (z+i);\n"); + cb.append("}\n"); + } + + void genCompareBytes(CodeBuffer cb) { + cb.append("{\n"); + cb.append("int i1 = org.apache.hadoop.record.Utils.readVInt(b1, s1);\n"); + cb.append("int i2 = org.apache.hadoop.record.Utils.readVInt(b2, s2);\n"); + cb.append("int z1 = org.apache.hadoop.record.Utils.getVIntSize(i1);\n"); + cb.append("int z2 = org.apache.hadoop.record.Utils.getVIntSize(i2);\n"); + cb.append("s1+=z1; s2+=z2; l1-=z1; l2-=z2;\n"); + cb.append("int r1 = org.apache.hadoop.record.Utils.compareBytes(b1,s1,i1,b2,s2,i2);\n"); + cb.append("if (r1 != 0) { return (r1<0)?-1:0; }\n"); + cb.append("s1+=i1; s2+=i2; l1-=i1; l1-=i2;\n"); + cb.append("}\n"); + } + + void genClone(CodeBuffer cb, String fname) { + cb.append(Consts.RIO_PREFIX + "other."+fname+" = this."+fname+";\n"); + } + } + + class CppString extends CppCompType { + + CppString() { + super("::std::string"); + } + + String getTypeIDObjectString() { + return "new ::hadoop::TypeID(::hadoop::RIOTYPE_STRING)"; + } + } + + /** Creates a new instance of JString */ + public JString() { + setJavaType(new JavaString()); + setCppType(new CppString()); + setCType(new CCompType()); + } + + String getSignature() { + return "s"; + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JType.java b/src/java/org/apache/hadoop/record/compiler/JType.java new file mode 100644 index 00000000000..6f1ff67ccb4 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JType.java @@ -0,0 +1,222 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +import java.util.Map; + + +/** + * Abstract Base class for all types supported by Hadoop Record I/O. + */ +abstract public class JType { + + static String toCamelCase(String name) { + char firstChar = name.charAt(0); + if (Character.isLowerCase(firstChar)) { + return ""+Character.toUpperCase(firstChar) + name.substring(1); + } + return name; + } + + JavaType javaType; + CppType cppType; + CType cType; + + abstract class JavaType { + private String name; + private String methodSuffix; + private String wrapper; + private String typeIDByteString; // points to TypeID.RIOType + + JavaType(String javaname, + String suffix, + String wrapper, + String typeIDByteString) { + this.name = javaname; + this.methodSuffix = suffix; + this.wrapper = wrapper; + this.typeIDByteString = typeIDByteString; + } + + void genDecl(CodeBuffer cb, String fname) { + cb.append("private "+name+" "+fname+";\n"); + } + + void genStaticTypeInfo(CodeBuffer cb, String fname) { + cb.append(Consts.RTI_VAR + ".addField(\"" + fname + "\", " + + getTypeIDObjectString() + ");\n"); + } + + abstract String getTypeIDObjectString(); + + void genSetRTIFilter(CodeBuffer cb, Map nestedStructMap) { + // do nothing by default + return; + } + + /*void genRtiFieldCondition(CodeBuffer cb, String fname, int ct) { + cb.append("if ((tInfo.fieldID.equals(\"" + fname + "\")) && (typeVal ==" + + " org.apache.hadoop.record.meta." + getTypeIDByteString() + ")) {\n"); + cb.append("rtiFilterFields[i] = " + ct + ";\n"); + cb.append("}\n"); + } + + void genRtiNestedFieldCondition(CodeBuffer cb, String varName, int ct) { + cb.append("if (" + varName + ".getElementTypeID().getTypeVal() == " + + "org.apache.hadoop.record.meta." + getTypeIDByteString() + + ") {\n"); + cb.append("rtiFilterFields[i] = " + ct + ";\n"); + cb.append("}\n"); + }*/ + + void genConstructorParam(CodeBuffer cb, String fname) { + cb.append("final "+name+" "+fname); + } + + void genGetSet(CodeBuffer cb, String fname) { + cb.append("public "+name+" get"+toCamelCase(fname)+"() {\n"); + cb.append("return "+fname+";\n"); + cb.append("}\n"); + cb.append("public void set"+toCamelCase(fname)+"(final "+name+" "+fname+") {\n"); + cb.append("this."+fname+"="+fname+";\n"); + cb.append("}\n"); + } + + String getType() { + return name; + } + + String getWrapperType() { + return wrapper; + } + + String getMethodSuffix() { + return methodSuffix; + } + + String getTypeIDByteString() { + return typeIDByteString; + } + + void genWriteMethod(CodeBuffer cb, String fname, String tag) { + cb.append(Consts.RECORD_OUTPUT + ".write"+methodSuffix + + "("+fname+",\""+tag+"\");\n"); + } + + void genReadMethod(CodeBuffer cb, String fname, String tag, boolean decl) { + if (decl) { + cb.append(name+" "+fname+";\n"); + } + cb.append(fname+"=" + Consts.RECORD_INPUT + ".read" + + methodSuffix+"(\""+tag+"\");\n"); + } + + void genCompareTo(CodeBuffer cb, String fname, String other) { + cb.append(Consts.RIO_PREFIX + "ret = ("+fname+" == "+other+")? 0 :(("+ + fname+"<"+other+")?-1:1);\n"); + } + + abstract void genCompareBytes(CodeBuffer cb); + + abstract void genSlurpBytes(CodeBuffer cb, String b, String s, String l); + + void genEquals(CodeBuffer cb, String fname, String peer) { + cb.append(Consts.RIO_PREFIX + "ret = ("+fname+"=="+peer+");\n"); + } + + void genHashCode(CodeBuffer cb, String fname) { + cb.append(Consts.RIO_PREFIX + "ret = (int)"+fname+";\n"); + } + + void genConstructorSet(CodeBuffer cb, String fname) { + cb.append("this."+fname+" = "+fname+";\n"); + } + + void genClone(CodeBuffer cb, String fname) { + cb.append(Consts.RIO_PREFIX + "other."+fname+" = this."+fname+";\n"); + } + } + + abstract class CppType { + private String name; + + CppType(String cppname) { + name = cppname; + } + + void genDecl(CodeBuffer cb, String fname) { + cb.append(name+" "+fname+";\n"); + } + + void genStaticTypeInfo(CodeBuffer cb, String fname) { + cb.append("p->addField(new ::std::string(\"" + + fname + "\"), " + getTypeIDObjectString() + ");\n"); + } + + void genGetSet(CodeBuffer cb, String fname) { + cb.append("virtual "+name+" get"+toCamelCase(fname)+"() const {\n"); + cb.append("return "+fname+";\n"); + cb.append("}\n"); + cb.append("virtual void set"+toCamelCase(fname)+"("+name+" m_) {\n"); + cb.append(fname+"=m_;\n"); + cb.append("}\n"); + } + + abstract String getTypeIDObjectString(); + + void genSetRTIFilter(CodeBuffer cb) { + // do nothing by default + return; + } + + String getType() { + return name; + } + } + + class CType { + + } + + abstract String getSignature(); + + void setJavaType(JavaType jType) { + this.javaType = jType; + } + + JavaType getJavaType() { + return javaType; + } + + void setCppType(CppType cppType) { + this.cppType = cppType; + } + + CppType getCppType() { + return cppType; + } + + void setCType(CType cType) { + this.cType = cType; + } + + CType getCType() { + return cType; + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JVector.java b/src/java/org/apache/hadoop/record/compiler/JVector.java new file mode 100644 index 00000000000..f87442ad716 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JVector.java @@ -0,0 +1,197 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +import java.util.Map; + +/** + */ +public class JVector extends JCompType { + + static private int level = 0; + + static private String getId(String id) { return id+getLevel(); } + + static private String getLevel() { return Integer.toString(level); } + + static private void incrLevel() { level++; } + + static private void decrLevel() { level--; } + + private JType type; + + class JavaVector extends JavaCompType { + + private JType.JavaType element; + + JavaVector(JType.JavaType t) { + super("java.util.ArrayList<"+t.getWrapperType()+">", + "Vector", "java.util.ArrayList<"+t.getWrapperType()+">", + "TypeID.RIOType.VECTOR"); + element = t; + } + + String getTypeIDObjectString() { + return "new org.apache.hadoop.record.meta.VectorTypeID(" + + element.getTypeIDObjectString() + ")"; + } + + void genSetRTIFilter(CodeBuffer cb, Map nestedStructMap) { + element.genSetRTIFilter(cb, nestedStructMap); + } + + void genCompareTo(CodeBuffer cb, String fname, String other) { + cb.append("{\n"); + incrLevel(); + cb.append("int "+getId(Consts.RIO_PREFIX + "len1")+" = "+fname+ + ".size();\n"); + cb.append("int "+getId(Consts.RIO_PREFIX + "len2")+" = "+other+ + ".size();\n"); + cb.append("for(int "+getId(Consts.RIO_PREFIX + "vidx")+" = 0; "+ + getId(Consts.RIO_PREFIX + "vidx")+"<"+getId(Consts.RIO_PREFIX + "len1")+ + " && "+getId(Consts.RIO_PREFIX + "vidx")+"<"+ + getId(Consts.RIO_PREFIX + "len2")+"; "+ + getId(Consts.RIO_PREFIX + "vidx")+"++) {\n"); + cb.append(element.getType()+" "+getId(Consts.RIO_PREFIX + "e1")+ + " = "+fname+ + ".get("+getId(Consts.RIO_PREFIX + "vidx")+");\n"); + cb.append(element.getType()+" "+getId(Consts.RIO_PREFIX + "e2")+ + " = "+other+ + ".get("+getId(Consts.RIO_PREFIX + "vidx")+");\n"); + element.genCompareTo(cb, getId(Consts.RIO_PREFIX + "e1"), + getId(Consts.RIO_PREFIX + "e2")); + cb.append("if (" + Consts.RIO_PREFIX + "ret != 0) { return " + + Consts.RIO_PREFIX + "ret; }\n"); + cb.append("}\n"); + cb.append(Consts.RIO_PREFIX + "ret = ("+getId(Consts.RIO_PREFIX + "len1")+ + " - "+getId(Consts.RIO_PREFIX + "len2")+");\n"); + decrLevel(); + cb.append("}\n"); + } + + void genReadMethod(CodeBuffer cb, String fname, String tag, boolean decl) { + if (decl) { + cb.append(getType()+" "+fname+";\n"); + } + cb.append("{\n"); + incrLevel(); + cb.append("org.apache.hadoop.record.Index "+ + getId(Consts.RIO_PREFIX + "vidx")+" = " + + Consts.RECORD_INPUT + ".startVector(\""+tag+"\");\n"); + cb.append(fname+"=new "+getType()+"();\n"); + cb.append("for (; !"+getId(Consts.RIO_PREFIX + "vidx")+".done(); " + + getId(Consts.RIO_PREFIX + "vidx")+".incr()) {\n"); + element.genReadMethod(cb, getId(Consts.RIO_PREFIX + "e"), + getId(Consts.RIO_PREFIX + "e"), true); + cb.append(fname+".add("+getId(Consts.RIO_PREFIX + "e")+");\n"); + cb.append("}\n"); + cb.append(Consts.RECORD_INPUT + ".endVector(\""+tag+"\");\n"); + decrLevel(); + cb.append("}\n"); + } + + void genWriteMethod(CodeBuffer cb, String fname, String tag) { + cb.append("{\n"); + incrLevel(); + cb.append(Consts.RECORD_OUTPUT + ".startVector("+fname+",\""+tag+"\");\n"); + cb.append("int "+getId(Consts.RIO_PREFIX + "len")+" = "+fname+".size();\n"); + cb.append("for(int "+getId(Consts.RIO_PREFIX + "vidx")+" = 0; " + + getId(Consts.RIO_PREFIX + "vidx")+"<"+getId(Consts.RIO_PREFIX + "len")+ + "; "+getId(Consts.RIO_PREFIX + "vidx")+"++) {\n"); + cb.append(element.getType()+" "+getId(Consts.RIO_PREFIX + "e")+" = "+ + fname+".get("+getId(Consts.RIO_PREFIX + "vidx")+");\n"); + element.genWriteMethod(cb, getId(Consts.RIO_PREFIX + "e"), + getId(Consts.RIO_PREFIX + "e")); + cb.append("}\n"); + cb.append(Consts.RECORD_OUTPUT + ".endVector("+fname+",\""+tag+"\");\n"); + cb.append("}\n"); + decrLevel(); + } + + void genSlurpBytes(CodeBuffer cb, String b, String s, String l) { + cb.append("{\n"); + incrLevel(); + cb.append("int "+getId("vi")+ + " = org.apache.hadoop.record.Utils.readVInt("+b+", "+s+");\n"); + cb.append("int "+getId("vz")+ + " = org.apache.hadoop.record.Utils.getVIntSize("+getId("vi")+");\n"); + cb.append(s+"+="+getId("vz")+"; "+l+"-="+getId("vz")+";\n"); + cb.append("for (int "+getId("vidx")+" = 0; "+getId("vidx")+ + " < "+getId("vi")+"; "+getId("vidx")+"++)"); + element.genSlurpBytes(cb, b, s, l); + decrLevel(); + cb.append("}\n"); + } + + void genCompareBytes(CodeBuffer cb) { + cb.append("{\n"); + incrLevel(); + cb.append("int "+getId("vi1")+ + " = org.apache.hadoop.record.Utils.readVInt(b1, s1);\n"); + cb.append("int "+getId("vi2")+ + " = org.apache.hadoop.record.Utils.readVInt(b2, s2);\n"); + cb.append("int "+getId("vz1")+ + " = org.apache.hadoop.record.Utils.getVIntSize("+getId("vi1")+");\n"); + cb.append("int "+getId("vz2")+ + " = org.apache.hadoop.record.Utils.getVIntSize("+getId("vi2")+");\n"); + cb.append("s1+="+getId("vz1")+"; s2+="+getId("vz2")+ + "; l1-="+getId("vz1")+"; l2-="+getId("vz2")+";\n"); + cb.append("for (int "+getId("vidx")+" = 0; "+getId("vidx")+ + " < "+getId("vi1")+" && "+getId("vidx")+" < "+getId("vi2")+ + "; "+getId("vidx")+"++)"); + element.genCompareBytes(cb); + cb.append("if ("+getId("vi1")+" != "+getId("vi2")+ + ") { return ("+getId("vi1")+"<"+getId("vi2")+")?-1:0; }\n"); + decrLevel(); + cb.append("}\n"); + } + } + + class CppVector extends CppCompType { + + private JType.CppType element; + + CppVector(JType.CppType t) { + super("::std::vector< "+t.getType()+" >"); + element = t; + } + + String getTypeIDObjectString() { + return "new ::hadoop::VectorTypeID(" + + element.getTypeIDObjectString() + ")"; + } + + void genSetRTIFilter(CodeBuffer cb) { + element.genSetRTIFilter(cb); + } + + } + + /** Creates a new instance of JVector */ + public JVector(JType t) { + type = t; + setJavaType(new JavaVector(t.getJavaType())); + setCppType(new CppVector(t.getCppType())); + setCType(new CCompType()); + } + + String getSignature() { + return "[" + type.getSignature() + "]"; + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/JavaGenerator.java b/src/java/org/apache/hadoop/record/compiler/JavaGenerator.java new file mode 100644 index 00000000000..04c4bd84733 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/JavaGenerator.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler; + +import java.util.ArrayList; +import java.io.IOException; +import java.util.Iterator; + +/** + * Java Code generator front-end for Hadoop record I/O. + */ +class JavaGenerator extends CodeGenerator { + + JavaGenerator() { + } + + /** + * Generate Java code for records. This method is only a front-end to + * JRecord, since one file is generated for each record. + * + * @param name possibly full pathname to the file + * @param ilist included files (as JFile) + * @param rlist List of records defined within this file + * @param destDir output directory + */ + void genCode(String name, ArrayList ilist, + ArrayList rlist, String destDir, ArrayList options) + throws IOException { + for (Iterator iter = rlist.iterator(); iter.hasNext();) { + JRecord rec = iter.next(); + rec.genJavaCode(destDir, options); + } + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/ant/RccTask.java b/src/java/org/apache/hadoop/record/compiler/ant/RccTask.java new file mode 100644 index 00000000000..ce1bc2cbf9d --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/ant/RccTask.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.record.compiler.ant; + +import java.io.File; +import java.util.ArrayList; +import org.apache.hadoop.record.compiler.generated.Rcc; +import org.apache.tools.ant.BuildException; +import org.apache.tools.ant.DirectoryScanner; +import org.apache.tools.ant.Project; +import org.apache.tools.ant.Task; +import org.apache.tools.ant.types.FileSet; + +/** + * Hadoop record compiler ant Task + *

This task takes the given record definition files and compiles them into + * java or c++ + * files. It is then up to the user to compile the generated files. + * + *

The task requires the file or the nested fileset element to be + * specified. Optional attributes are language (set the output + * language, default is "java"), + * destdir (name of the destination directory for generated java/c++ + * code, default is ".") and failonerror (specifies error handling + * behavior. default is true). + *

Usage

+ *
+ * <recordcc
+ *       destdir="${basedir}/gensrc"
+ *       language="java">
+ *   <fileset include="**\/*.jr" />
+ * </recordcc>
+ * 
+ */ +public class RccTask extends Task { + + private String language = "java"; + private File src; + private File dest = new File("."); + private final ArrayList filesets = new ArrayList(); + private boolean failOnError = true; + + /** Creates a new instance of RccTask */ + public RccTask() { + } + + /** + * Sets the output language option + * @param language "java"/"c++" + */ + public void setLanguage(String language) { + this.language = language; + } + + /** + * Sets the record definition file attribute + * @param file record definition file + */ + public void setFile(File file) { + this.src = file; + } + + /** + * Given multiple files (via fileset), set the error handling behavior + * @param flag true will throw build exception in case of failure (default) + */ + public void setFailonerror(boolean flag) { + this.failOnError = flag; + } + + /** + * Sets directory where output files will be generated + * @param dir output directory + */ + public void setDestdir(File dir) { + this.dest = dir; + } + + /** + * Adds a fileset that can consist of one or more files + * @param set Set of record definition files + */ + public void addFileset(FileSet set) { + filesets.add(set); + } + + /** + * Invoke the Hadoop record compiler on each record definition file + */ + public void execute() throws BuildException { + if (src == null && filesets.size()==0) { + throw new BuildException("There must be a file attribute or a fileset child element"); + } + if (src != null) { + doCompile(src); + } + Project myProject = getProject(); + for (int i = 0; i < filesets.size(); i++) { + FileSet fs = filesets.get(i); + DirectoryScanner ds = fs.getDirectoryScanner(myProject); + File dir = fs.getDir(myProject); + String[] srcs = ds.getIncludedFiles(); + for (int j = 0; j < srcs.length; j++) { + doCompile(new File(dir, srcs[j])); + } + } + } + + private void doCompile(File file) throws BuildException { + String[] args = new String[5]; + args[0] = "--language"; + args[1] = this.language; + args[2] = "--destdir"; + args[3] = this.dest.getPath(); + args[4] = file.getPath(); + int retVal = Rcc.driver(args); + if (retVal != 0 && failOnError) { + throw new BuildException("Hadoop record compiler returned error code "+retVal); + } + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/generated/ParseException.java b/src/java/org/apache/hadoop/record/compiler/generated/ParseException.java new file mode 100644 index 00000000000..59d2e467623 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/generated/ParseException.java @@ -0,0 +1,210 @@ +/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler.generated; + +/** + * This exception is thrown when parse errors are encountered. + * You can explicitly create objects of this exception type by + * calling the method generateParseException in the generated + * parser. + * + * You can modify this class to customize your error reporting + * mechanisms so long as you retain the public fields. + */ +public class ParseException extends Exception { + + /** + * This constructor is used by the method "generateParseException" + * in the generated parser. Calling this constructor generates + * a new object of this type with the fields "currentToken", + * "expectedTokenSequences", and "tokenImage" set. The boolean + * flag "specialConstructor" is also set to true to indicate that + * this constructor was used to create this object. + * This constructor calls its super class with the empty string + * to force the "toString" method of parent class "Throwable" to + * print the error message in the form: + * ParseException: + */ + public ParseException(Token currentTokenVal, + int[][] expectedTokenSequencesVal, + String[] tokenImageVal + ) + { + super(""); + specialConstructor = true; + currentToken = currentTokenVal; + expectedTokenSequences = expectedTokenSequencesVal; + tokenImage = tokenImageVal; + } + + /** + * The following constructors are for use by you for whatever + * purpose you can think of. Constructing the exception in this + * manner makes the exception behave in the normal way - i.e., as + * documented in the class "Throwable". The fields "errorToken", + * "expectedTokenSequences", and "tokenImage" do not contain + * relevant information. The JavaCC generated code does not use + * these constructors. + */ + + public ParseException() { + super(); + specialConstructor = false; + } + + public ParseException(String message) { + super(message); + specialConstructor = false; + } + + /** + * This variable determines which constructor was used to create + * this object and thereby affects the semantics of the + * "getMessage" method (see below). + */ + protected boolean specialConstructor; + + /** + * This is the last token that has been consumed successfully. If + * this object has been created due to a parse error, the token + * followng this token will (therefore) be the first error token. + */ + public Token currentToken; + + /** + * Each entry in this array is an array of integers. Each array + * of integers represents a sequence of tokens (by their ordinal + * values) that is expected at this point of the parse. + */ + public int[][] expectedTokenSequences; + + /** + * This is a reference to the "tokenImage" array of the generated + * parser within which the parse error occurred. This array is + * defined in the generated ...Constants interface. + */ + public String[] tokenImage; + + /** + * This method has the standard behavior when this object has been + * created using the standard constructors. Otherwise, it uses + * "currentToken" and "expectedTokenSequences" to generate a parse + * error message and returns it. If this object has been created + * due to a parse error, and you do not catch it (it gets thrown + * from the parser), then this method is called during the printing + * of the final stack trace, and hence the correct error message + * gets displayed. + */ + public String getMessage() { + if (!specialConstructor) { + return super.getMessage(); + } + StringBuffer expected = new StringBuffer(); + int maxSize = 0; + for (int i = 0; i < expectedTokenSequences.length; i++) { + if (maxSize < expectedTokenSequences[i].length) { + maxSize = expectedTokenSequences[i].length; + } + for (int j = 0; j < expectedTokenSequences[i].length; j++) { + expected.append(tokenImage[expectedTokenSequences[i][j]]).append(" "); + } + if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) { + expected.append("..."); + } + expected.append(eol).append(" "); + } + String retval = "Encountered \""; + Token tok = currentToken.next; + for (int i = 0; i < maxSize; i++) { + if (i != 0) retval += " "; + if (tok.kind == 0) { + retval += tokenImage[0]; + break; + } + retval += add_escapes(tok.image); + tok = tok.next; + } + retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn; + retval += "." + eol; + if (expectedTokenSequences.length == 1) { + retval += "Was expecting:" + eol + " "; + } else { + retval += "Was expecting one of:" + eol + " "; + } + retval += expected.toString(); + return retval; + } + + /** + * The end of line string for this machine. + */ + protected String eol = System.getProperty("line.separator", "\n"); + + /** + * Used to convert raw characters to their escaped version + * when these raw version cannot be used as part of an ASCII + * string literal. + */ + protected String add_escapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); + continue; + case '\t': + retval.append("\\t"); + continue; + case '\n': + retval.append("\\n"); + continue; + case '\f': + retval.append("\\f"); + continue; + case '\r': + retval.append("\\r"); + continue; + case '\"': + retval.append("\\\""); + continue; + case '\'': + retval.append("\\\'"); + continue; + case '\\': + retval.append("\\\\"); + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + +} diff --git a/src/java/org/apache/hadoop/record/compiler/generated/Rcc.java b/src/java/org/apache/hadoop/record/compiler/generated/Rcc.java new file mode 100644 index 00000000000..933710a683b --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/generated/Rcc.java @@ -0,0 +1,535 @@ +/* Generated By:JavaCC: Do not edit this line. Rcc.java */ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler.generated; + +import org.apache.hadoop.record.compiler.*; +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.Iterator; +import java.io.File; +import java.io.FileReader; +import java.io.FileNotFoundException; +import java.io.IOException; + +public class Rcc implements RccConstants { + private static String language = "java"; + private static String destDir = "."; + private static ArrayList recFiles = new ArrayList(); + private static ArrayList cmdargs = new ArrayList(); + private static JFile curFile; + private static Hashtable recTab; + private static String curDir = "."; + private static String curFileName; + private static String curModuleName; + + public static void main(String[] args) { + System.exit(driver(args)); + } + + public static void usage() { + System.err.println("Usage: rcc --language [java|c++] ddl-files"); + } + + public static int driver(String[] args) { + for (int i=0; i(); + curFile = parser.Input(); + } catch (ParseException e) { + System.err.println(e.toString()); + return 1; + } + try { + reader.close(); + } catch (IOException e) { + } + } catch (FileNotFoundException e) { + System.err.println("File " + recFiles.get(i) + + " Not found."); + return 1; + } + try { + int retCode = curFile.genCode(language, destDir, cmdargs); + if (retCode != 0) { return retCode; } + } catch (IOException e) { + System.err.println(e.toString()); + return 1; + } + } + return 0; + } + + final public JFile Input() throws ParseException { + ArrayList ilist = new ArrayList(); + ArrayList rlist = new ArrayList(); + JFile i; + ArrayList l; + label_1: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case INCLUDE_TKN: + i = Include(); + ilist.add(i); + break; + case MODULE_TKN: + l = Module(); + rlist.addAll(l); + break; + default: + jj_la1[0] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case MODULE_TKN: + case INCLUDE_TKN: + ; + break; + default: + jj_la1[1] = jj_gen; + break label_1; + } + } + jj_consume_token(0); + {if (true) return new JFile(curFileName, ilist, rlist);} + throw new Error("Missing return statement in function"); + } + + final public JFile Include() throws ParseException { + String fname; + Token t; + jj_consume_token(INCLUDE_TKN); + t = jj_consume_token(CSTRING_TKN); + JFile ret = null; + fname = t.image.replaceAll("^\"", "").replaceAll("\"$",""); + File file = new File(curDir, fname); + String tmpDir = curDir; + String tmpFile = curFileName; + curDir = file.getParent(); + curFileName = file.getName(); + try { + FileReader reader = new FileReader(file); + Rcc parser = new Rcc(reader); + try { + ret = parser.Input(); + System.out.println(fname + " Parsed Successfully"); + } catch (ParseException e) { + System.out.println(e.toString()); + System.exit(1); + } + try { + reader.close(); + } catch (IOException e) { + } + } catch (FileNotFoundException e) { + System.out.println("File " + fname + + " Not found."); + System.exit(1); + } + curDir = tmpDir; + curFileName = tmpFile; + {if (true) return ret;} + throw new Error("Missing return statement in function"); + } + + final public ArrayList Module() throws ParseException { + String mName; + ArrayList rlist; + jj_consume_token(MODULE_TKN); + mName = ModuleName(); + curModuleName = mName; + jj_consume_token(LBRACE_TKN); + rlist = RecordList(); + jj_consume_token(RBRACE_TKN); + {if (true) return rlist;} + throw new Error("Missing return statement in function"); + } + + final public String ModuleName() throws ParseException { + String name = ""; + Token t; + t = jj_consume_token(IDENT_TKN); + name += t.image; + label_2: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case DOT_TKN: + ; + break; + default: + jj_la1[2] = jj_gen; + break label_2; + } + jj_consume_token(DOT_TKN); + t = jj_consume_token(IDENT_TKN); + name += "." + t.image; + } + {if (true) return name;} + throw new Error("Missing return statement in function"); + } + + final public ArrayList RecordList() throws ParseException { + ArrayList rlist = new ArrayList(); + JRecord r; + label_3: + while (true) { + r = Record(); + rlist.add(r); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case RECORD_TKN: + ; + break; + default: + jj_la1[3] = jj_gen; + break label_3; + } + } + {if (true) return rlist;} + throw new Error("Missing return statement in function"); + } + + final public JRecord Record() throws ParseException { + String rname; + ArrayList> flist = new ArrayList>(); + Token t; + JField f; + jj_consume_token(RECORD_TKN); + t = jj_consume_token(IDENT_TKN); + rname = t.image; + jj_consume_token(LBRACE_TKN); + label_4: + while (true) { + f = Field(); + flist.add(f); + jj_consume_token(SEMICOLON_TKN); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case BYTE_TKN: + case BOOLEAN_TKN: + case INT_TKN: + case LONG_TKN: + case FLOAT_TKN: + case DOUBLE_TKN: + case USTRING_TKN: + case BUFFER_TKN: + case VECTOR_TKN: + case MAP_TKN: + case IDENT_TKN: + ; + break; + default: + jj_la1[4] = jj_gen; + break label_4; + } + } + jj_consume_token(RBRACE_TKN); + String fqn = curModuleName + "." + rname; + JRecord r = new JRecord(fqn, flist); + recTab.put(fqn, r); + {if (true) return r;} + throw new Error("Missing return statement in function"); + } + + final public JField Field() throws ParseException { + JType jt; + Token t; + jt = Type(); + t = jj_consume_token(IDENT_TKN); + {if (true) return new JField(t.image, jt);} + throw new Error("Missing return statement in function"); + } + + final public JType Type() throws ParseException { + JType jt; + Token t; + String rname; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case MAP_TKN: + jt = Map(); + {if (true) return jt;} + break; + case VECTOR_TKN: + jt = Vector(); + {if (true) return jt;} + break; + case BYTE_TKN: + jj_consume_token(BYTE_TKN); + {if (true) return new JByte();} + break; + case BOOLEAN_TKN: + jj_consume_token(BOOLEAN_TKN); + {if (true) return new JBoolean();} + break; + case INT_TKN: + jj_consume_token(INT_TKN); + {if (true) return new JInt();} + break; + case LONG_TKN: + jj_consume_token(LONG_TKN); + {if (true) return new JLong();} + break; + case FLOAT_TKN: + jj_consume_token(FLOAT_TKN); + {if (true) return new JFloat();} + break; + case DOUBLE_TKN: + jj_consume_token(DOUBLE_TKN); + {if (true) return new JDouble();} + break; + case USTRING_TKN: + jj_consume_token(USTRING_TKN); + {if (true) return new JString();} + break; + case BUFFER_TKN: + jj_consume_token(BUFFER_TKN); + {if (true) return new JBuffer();} + break; + case IDENT_TKN: + rname = ModuleName(); + if (rname.indexOf('.', 0) < 0) { + rname = curModuleName + "." + rname; + } + JRecord r = recTab.get(rname); + if (r == null) { + System.out.println("Type " + rname + " not known. Exiting."); + System.exit(1); + } + {if (true) return r;} + break; + default: + jj_la1[5] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + throw new Error("Missing return statement in function"); + } + + final public JMap Map() throws ParseException { + JType jt1; + JType jt2; + jj_consume_token(MAP_TKN); + jj_consume_token(LT_TKN); + jt1 = Type(); + jj_consume_token(COMMA_TKN); + jt2 = Type(); + jj_consume_token(GT_TKN); + {if (true) return new JMap(jt1, jt2);} + throw new Error("Missing return statement in function"); + } + + final public JVector Vector() throws ParseException { + JType jt; + jj_consume_token(VECTOR_TKN); + jj_consume_token(LT_TKN); + jt = Type(); + jj_consume_token(GT_TKN); + {if (true) return new JVector(jt);} + throw new Error("Missing return statement in function"); + } + + public RccTokenManager token_source; + SimpleCharStream jj_input_stream; + public Token token, jj_nt; + private int jj_ntk; + private int jj_gen; + final private int[] jj_la1 = new int[6]; + static private int[] jj_la1_0; + static private int[] jj_la1_1; + static { + jj_la1_0(); + jj_la1_1(); + } + private static void jj_la1_0() { + jj_la1_0 = new int[] {0x2800, 0x2800, 0x40000000, 0x1000, 0xffc000, 0xffc000,}; + } + private static void jj_la1_1() { + jj_la1_1 = new int[] {0x0, 0x0, 0x0, 0x0, 0x1, 0x1,}; + } + + public Rcc(java.io.InputStream stream) { + this(stream, null); + } + public Rcc(java.io.InputStream stream, String encoding) { + try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } + token_source = new RccTokenManager(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 6; i++) jj_la1[i] = -1; + } + + public void ReInit(java.io.InputStream stream) { + ReInit(stream, null); + } + public void ReInit(java.io.InputStream stream, String encoding) { + try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } + token_source.ReInit(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 6; i++) jj_la1[i] = -1; + } + + public Rcc(java.io.Reader stream) { + jj_input_stream = new SimpleCharStream(stream, 1, 1); + token_source = new RccTokenManager(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 6; i++) jj_la1[i] = -1; + } + + public void ReInit(java.io.Reader stream) { + jj_input_stream.ReInit(stream, 1, 1); + token_source.ReInit(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 6; i++) jj_la1[i] = -1; + } + + public Rcc(RccTokenManager tm) { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 6; i++) jj_la1[i] = -1; + } + + public void ReInit(RccTokenManager tm) { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 6; i++) jj_la1[i] = -1; + } + + final private Token jj_consume_token(int kind) throws ParseException { + Token oldToken; + if ((oldToken = token).next != null) token = token.next; + else token = token.next = token_source.getNextToken(); + jj_ntk = -1; + if (token.kind == kind) { + jj_gen++; + return token; + } + token = oldToken; + jj_kind = kind; + throw generateParseException(); + } + + final public Token getNextToken() { + if (token.next != null) token = token.next; + else token = token.next = token_source.getNextToken(); + jj_ntk = -1; + jj_gen++; + return token; + } + + final public Token getToken(int index) { + Token t = token; + for (int i = 0; i < index; i++) { + if (t.next != null) t = t.next; + else t = t.next = token_source.getNextToken(); + } + return t; + } + + final private int jj_ntk() { + if ((jj_nt=token.next) == null) + return (jj_ntk = (token.next=token_source.getNextToken()).kind); + else + return (jj_ntk = jj_nt.kind); + } + + private java.util.Vector jj_expentries = new java.util.Vector(); + private int[] jj_expentry; + private int jj_kind = -1; + + public ParseException generateParseException() { + jj_expentries.removeAllElements(); + boolean[] la1tokens = new boolean[33]; + for (int i = 0; i < 33; i++) { + la1tokens[i] = false; + } + if (jj_kind >= 0) { + la1tokens[jj_kind] = true; + jj_kind = -1; + } + for (int i = 0; i < 6; i++) { + if (jj_la1[i] == jj_gen) { + for (int j = 0; j < 32; j++) { + if ((jj_la1_0[i] & (1<", + "\" \"", + "\"\\t\"", + "\"\\n\"", + "\"\\r\"", + "\"//\"", + "", + "", + "\"/*\"", + "\"*/\"", + "", + "\"module\"", + "\"class\"", + "\"include\"", + "\"byte\"", + "\"boolean\"", + "\"int\"", + "\"long\"", + "\"float\"", + "\"double\"", + "\"ustring\"", + "\"buffer\"", + "\"vector\"", + "\"map\"", + "\"{\"", + "\"}\"", + "\"<\"", + "\">\"", + "\";\"", + "\",\"", + "\".\"", + "", + "", + }; + +} diff --git a/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java b/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java new file mode 100644 index 00000000000..42b04733ebf --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java @@ -0,0 +1,833 @@ +/* Generated By:JavaCC: Do not edit this line. RccTokenManager.java */ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler.generated; +import org.apache.hadoop.record.compiler.*; +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.Iterator; +import java.io.File; +import java.io.FileReader; +import java.io.FileNotFoundException; +import java.io.IOException; + +public class RccTokenManager implements RccConstants +{ + public java.io.PrintStream debugStream = System.out; + public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; } + private final int jjMoveStringLiteralDfa0_1() + { + return jjMoveNfa_1(0, 0); + } + private final void jjCheckNAdd(int state) + { + if (jjrounds[state] != jjround) + { + jjstateSet[jjnewStateCnt++] = state; + jjrounds[state] = jjround; + } + } + private final void jjAddStates(int start, int end) + { + do { + jjstateSet[jjnewStateCnt++] = jjnextStates[start]; + } while (start++ != end); + } + private final void jjCheckNAddTwoStates(int state1, int state2) + { + jjCheckNAdd(state1); + jjCheckNAdd(state2); + } + private final void jjCheckNAddStates(int start, int end) + { + do { + jjCheckNAdd(jjnextStates[start]); + } while (start++ != end); + } + private final void jjCheckNAddStates(int start) + { + jjCheckNAdd(jjnextStates[start]); + jjCheckNAdd(jjnextStates[start + 1]); + } + private final int jjMoveNfa_1(int startState, int curPos) + { + int[] nextStates; + int startsAt = 0; + jjnewStateCnt = 3; + int i = 1; + jjstateSet[0] = startState; + int j, kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0x2400L & l) != 0L) + { + if (kind > 6) + kind = 6; + } + if (curChar == 13) + jjstateSet[jjnewStateCnt++] = 1; + break; + case 1: + if (curChar == 10 && kind > 6) + kind = 6; + break; + case 2: + if (curChar == 13) + jjstateSet[jjnewStateCnt++] = 1; + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + default : break; + } + } while(i != startsAt); + } + else + { + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } + } + private final int jjStopStringLiteralDfa_0(int pos, long active0) + { + switch (pos) + { + case 0: + if ((active0 & 0xfff800L) != 0L) + { + jjmatchedKind = 32; + return 4; + } + return -1; + case 1: + if ((active0 & 0xfff800L) != 0L) + { + jjmatchedKind = 32; + jjmatchedPos = 1; + return 4; + } + return -1; + case 2: + if ((active0 & 0x7ef800L) != 0L) + { + jjmatchedKind = 32; + jjmatchedPos = 2; + return 4; + } + if ((active0 & 0x810000L) != 0L) + return 4; + return -1; + case 3: + if ((active0 & 0x24000L) != 0L) + return 4; + if ((active0 & 0x7cb800L) != 0L) + { + jjmatchedKind = 32; + jjmatchedPos = 3; + return 4; + } + return -1; + case 4: + if ((active0 & 0x41000L) != 0L) + return 4; + if ((active0 & 0x78a800L) != 0L) + { + jjmatchedKind = 32; + jjmatchedPos = 4; + return 4; + } + return -1; + case 5: + if ((active0 & 0x680800L) != 0L) + return 4; + if ((active0 & 0x10a000L) != 0L) + { + jjmatchedKind = 32; + jjmatchedPos = 5; + return 4; + } + return -1; + default : + return -1; + } + } + private final int jjStartNfa_0(int pos, long active0) + { + return jjMoveNfa_0(jjStopStringLiteralDfa_0(pos, active0), pos + 1); + } + private final int jjStopAtPos(int pos, int kind) + { + jjmatchedKind = kind; + jjmatchedPos = pos; + return pos + 1; + } + private final int jjStartNfaWithStates_0(int pos, int kind, int state) + { + jjmatchedKind = kind; + jjmatchedPos = pos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return pos + 1; } + return jjMoveNfa_0(state, pos + 1); + } + private final int jjMoveStringLiteralDfa0_0() + { + switch(curChar) + { + case 44: + return jjStopAtPos(0, 29); + case 46: + return jjStopAtPos(0, 30); + case 47: + return jjMoveStringLiteralDfa1_0(0x120L); + case 59: + return jjStopAtPos(0, 28); + case 60: + return jjStopAtPos(0, 26); + case 62: + return jjStopAtPos(0, 27); + case 98: + return jjMoveStringLiteralDfa1_0(0x20c000L); + case 99: + return jjMoveStringLiteralDfa1_0(0x1000L); + case 100: + return jjMoveStringLiteralDfa1_0(0x80000L); + case 102: + return jjMoveStringLiteralDfa1_0(0x40000L); + case 105: + return jjMoveStringLiteralDfa1_0(0x12000L); + case 108: + return jjMoveStringLiteralDfa1_0(0x20000L); + case 109: + return jjMoveStringLiteralDfa1_0(0x800800L); + case 117: + return jjMoveStringLiteralDfa1_0(0x100000L); + case 118: + return jjMoveStringLiteralDfa1_0(0x400000L); + case 123: + return jjStopAtPos(0, 24); + case 125: + return jjStopAtPos(0, 25); + default : + return jjMoveNfa_0(0, 0); + } + } + private final int jjMoveStringLiteralDfa1_0(long active0) + { + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(0, active0); + return 1; + } + switch(curChar) + { + case 42: + if ((active0 & 0x100L) != 0L) + return jjStopAtPos(1, 8); + break; + case 47: + if ((active0 & 0x20L) != 0L) + return jjStopAtPos(1, 5); + break; + case 97: + return jjMoveStringLiteralDfa2_0(active0, 0x800000L); + case 101: + return jjMoveStringLiteralDfa2_0(active0, 0x400000L); + case 108: + return jjMoveStringLiteralDfa2_0(active0, 0x41000L); + case 110: + return jjMoveStringLiteralDfa2_0(active0, 0x12000L); + case 111: + return jjMoveStringLiteralDfa2_0(active0, 0xa8800L); + case 115: + return jjMoveStringLiteralDfa2_0(active0, 0x100000L); + case 117: + return jjMoveStringLiteralDfa2_0(active0, 0x200000L); + case 121: + return jjMoveStringLiteralDfa2_0(active0, 0x4000L); + default : + break; + } + return jjStartNfa_0(0, active0); + } + private final int jjMoveStringLiteralDfa2_0(long old0, long active0) + { + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(0, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(1, active0); + return 2; + } + switch(curChar) + { + case 97: + return jjMoveStringLiteralDfa3_0(active0, 0x1000L); + case 99: + return jjMoveStringLiteralDfa3_0(active0, 0x402000L); + case 100: + return jjMoveStringLiteralDfa3_0(active0, 0x800L); + case 102: + return jjMoveStringLiteralDfa3_0(active0, 0x200000L); + case 110: + return jjMoveStringLiteralDfa3_0(active0, 0x20000L); + case 111: + return jjMoveStringLiteralDfa3_0(active0, 0x48000L); + case 112: + if ((active0 & 0x800000L) != 0L) + return jjStartNfaWithStates_0(2, 23, 4); + break; + case 116: + if ((active0 & 0x10000L) != 0L) + return jjStartNfaWithStates_0(2, 16, 4); + return jjMoveStringLiteralDfa3_0(active0, 0x104000L); + case 117: + return jjMoveStringLiteralDfa3_0(active0, 0x80000L); + default : + break; + } + return jjStartNfa_0(1, active0); + } + private final int jjMoveStringLiteralDfa3_0(long old0, long active0) + { + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(1, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(2, active0); + return 3; + } + switch(curChar) + { + case 97: + return jjMoveStringLiteralDfa4_0(active0, 0x40000L); + case 98: + return jjMoveStringLiteralDfa4_0(active0, 0x80000L); + case 101: + if ((active0 & 0x4000L) != 0L) + return jjStartNfaWithStates_0(3, 14, 4); + break; + case 102: + return jjMoveStringLiteralDfa4_0(active0, 0x200000L); + case 103: + if ((active0 & 0x20000L) != 0L) + return jjStartNfaWithStates_0(3, 17, 4); + break; + case 108: + return jjMoveStringLiteralDfa4_0(active0, 0xa000L); + case 114: + return jjMoveStringLiteralDfa4_0(active0, 0x100000L); + case 115: + return jjMoveStringLiteralDfa4_0(active0, 0x1000L); + case 116: + return jjMoveStringLiteralDfa4_0(active0, 0x400000L); + case 117: + return jjMoveStringLiteralDfa4_0(active0, 0x800L); + default : + break; + } + return jjStartNfa_0(2, active0); + } + private final int jjMoveStringLiteralDfa4_0(long old0, long active0) + { + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(2, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(3, active0); + return 4; + } + switch(curChar) + { + case 101: + return jjMoveStringLiteralDfa5_0(active0, 0x208000L); + case 105: + return jjMoveStringLiteralDfa5_0(active0, 0x100000L); + case 108: + return jjMoveStringLiteralDfa5_0(active0, 0x80800L); + case 111: + return jjMoveStringLiteralDfa5_0(active0, 0x400000L); + case 115: + if ((active0 & 0x1000L) != 0L) + return jjStartNfaWithStates_0(4, 12, 4); + break; + case 116: + if ((active0 & 0x40000L) != 0L) + return jjStartNfaWithStates_0(4, 18, 4); + break; + case 117: + return jjMoveStringLiteralDfa5_0(active0, 0x2000L); + default : + break; + } + return jjStartNfa_0(3, active0); + } + private final int jjMoveStringLiteralDfa5_0(long old0, long active0) + { + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(3, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(4, active0); + return 5; + } + switch(curChar) + { + case 97: + return jjMoveStringLiteralDfa6_0(active0, 0x8000L); + case 100: + return jjMoveStringLiteralDfa6_0(active0, 0x2000L); + case 101: + if ((active0 & 0x800L) != 0L) + return jjStartNfaWithStates_0(5, 11, 4); + else if ((active0 & 0x80000L) != 0L) + return jjStartNfaWithStates_0(5, 19, 4); + break; + case 110: + return jjMoveStringLiteralDfa6_0(active0, 0x100000L); + case 114: + if ((active0 & 0x200000L) != 0L) + return jjStartNfaWithStates_0(5, 21, 4); + else if ((active0 & 0x400000L) != 0L) + return jjStartNfaWithStates_0(5, 22, 4); + break; + default : + break; + } + return jjStartNfa_0(4, active0); + } + private final int jjMoveStringLiteralDfa6_0(long old0, long active0) + { + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(4, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(5, active0); + return 6; + } + switch(curChar) + { + case 101: + if ((active0 & 0x2000L) != 0L) + return jjStartNfaWithStates_0(6, 13, 4); + break; + case 103: + if ((active0 & 0x100000L) != 0L) + return jjStartNfaWithStates_0(6, 20, 4); + break; + case 110: + if ((active0 & 0x8000L) != 0L) + return jjStartNfaWithStates_0(6, 15, 4); + break; + default : + break; + } + return jjStartNfa_0(5, active0); + } + static final long[] jjbitVec0 = { + 0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL + }; + private final int jjMoveNfa_0(int startState, int curPos) + { + int[] nextStates; + int startsAt = 0; + jjnewStateCnt = 5; + int i = 1; + jjstateSet[0] = startState; + int j, kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if (curChar == 34) + jjCheckNAdd(1); + break; + case 1: + if ((0xfffffffbffffffffL & l) != 0L) + jjCheckNAddTwoStates(1, 2); + break; + case 2: + if (curChar == 34 && kind > 31) + kind = 31; + break; + case 4: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 32) + kind = 32; + jjstateSet[jjnewStateCnt++] = 4; + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 32) + kind = 32; + jjCheckNAdd(4); + break; + case 1: + jjAddStates(0, 1); + break; + case 4: + if ((0x7fffffe87fffffeL & l) == 0L) + break; + if (kind > 32) + kind = 32; + jjCheckNAdd(4); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 1: + if ((jjbitVec0[i2] & l2) != 0L) + jjAddStates(0, 1); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 5 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } + } + private final int jjMoveStringLiteralDfa0_2() + { + switch(curChar) + { + case 42: + return jjMoveStringLiteralDfa1_2(0x200L); + default : + return 1; + } + } + private final int jjMoveStringLiteralDfa1_2(long active0) + { + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + return 1; + } + switch(curChar) + { + case 47: + if ((active0 & 0x200L) != 0L) + return jjStopAtPos(1, 9); + break; + default : + return 2; + } + return 2; + } + static final int[] jjnextStates = { + 1, 2, + }; + public static final String[] jjstrLiteralImages = { + "", null, null, null, null, null, null, null, null, null, null, + "\155\157\144\165\154\145", "\143\154\141\163\163", "\151\156\143\154\165\144\145", "\142\171\164\145", + "\142\157\157\154\145\141\156", "\151\156\164", "\154\157\156\147", "\146\154\157\141\164", + "\144\157\165\142\154\145", "\165\163\164\162\151\156\147", "\142\165\146\146\145\162", + "\166\145\143\164\157\162", "\155\141\160", "\173", "\175", "\74", "\76", "\73", "\54", "\56", null, null, }; + public static final String[] lexStateNames = { + "DEFAULT", + "WithinOneLineComment", + "WithinMultiLineComment", + }; + public static final int[] jjnewLexState = { + -1, -1, -1, -1, -1, 1, 0, -1, 2, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + }; + static final long[] jjtoToken = { + 0x1fffff801L, + }; + static final long[] jjtoSkip = { + 0x37eL, + }; + static final long[] jjtoSpecial = { + 0x360L, + }; + static final long[] jjtoMore = { + 0x480L, + }; + protected SimpleCharStream input_stream; + private final int[] jjrounds = new int[5]; + private final int[] jjstateSet = new int[10]; + StringBuffer image; + int jjimageLen; + int lengthOfMatch; + protected char curChar; + public RccTokenManager(SimpleCharStream stream){ + if (SimpleCharStream.staticFlag) + throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer."); + input_stream = stream; + } + public RccTokenManager(SimpleCharStream stream, int lexState){ + this(stream); + SwitchTo(lexState); + } + public void ReInit(SimpleCharStream stream) + { + jjmatchedPos = jjnewStateCnt = 0; + curLexState = defaultLexState; + input_stream = stream; + ReInitRounds(); + } + private final void ReInitRounds() + { + int i; + jjround = 0x80000001; + for (i = 5; i-- > 0;) + jjrounds[i] = 0x80000000; + } + public void ReInit(SimpleCharStream stream, int lexState) + { + ReInit(stream); + SwitchTo(lexState); + } + public void SwitchTo(int lexState) + { + if (lexState >= 3 || lexState < 0) + throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); + else + curLexState = lexState; + } + + protected Token jjFillToken() + { + Token t = Token.newToken(jjmatchedKind); + t.kind = jjmatchedKind; + String im = jjstrLiteralImages[jjmatchedKind]; + t.image = (im == null) ? input_stream.GetImage() : im; + t.beginLine = input_stream.getBeginLine(); + t.beginColumn = input_stream.getBeginColumn(); + t.endLine = input_stream.getEndLine(); + t.endColumn = input_stream.getEndColumn(); + return t; + } + + int curLexState = 0; + int defaultLexState = 0; + int jjnewStateCnt; + int jjround; + int jjmatchedPos; + int jjmatchedKind; + + public Token getNextToken() + { + int kind; + Token specialToken = null; + Token matchedToken; + int curPos = 0; + + EOFLoop : + for (;;) + { + try + { + curChar = input_stream.BeginToken(); + } + catch(java.io.IOException e) + { + jjmatchedKind = 0; + matchedToken = jjFillToken(); + matchedToken.specialToken = specialToken; + return matchedToken; + } + image = null; + jjimageLen = 0; + + for (;;) + { + switch(curLexState) + { + case 0: + try { input_stream.backup(0); + while (curChar <= 32 && (0x100002600L & (1L << curChar)) != 0L) + curChar = input_stream.BeginToken(); + } + catch (java.io.IOException e1) { continue EOFLoop; } + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_0(); + break; + case 1: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_1(); + if (jjmatchedPos == 0 && jjmatchedKind > 7) + { + jjmatchedKind = 7; + } + break; + case 2: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_2(); + if (jjmatchedPos == 0 && jjmatchedKind > 10) + { + jjmatchedKind = 10; + } + break; + } + if (jjmatchedKind != 0x7fffffff) + { + if (jjmatchedPos + 1 < curPos) + input_stream.backup(curPos - jjmatchedPos - 1); + if ((jjtoToken[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) + { + matchedToken = jjFillToken(); + matchedToken.specialToken = specialToken; + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + return matchedToken; + } + else if ((jjtoSkip[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) + { + if ((jjtoSpecial[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) + { + matchedToken = jjFillToken(); + if (specialToken == null) + specialToken = matchedToken; + else + { + matchedToken.specialToken = specialToken; + specialToken = (specialToken.next = matchedToken); + } + SkipLexicalActions(matchedToken); + } + else + SkipLexicalActions(null); + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + continue EOFLoop; + } + jjimageLen += jjmatchedPos + 1; + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + curPos = 0; + jjmatchedKind = 0x7fffffff; + try { + curChar = input_stream.readChar(); + continue; + } + catch (java.io.IOException e1) { } + } + int error_line = input_stream.getEndLine(); + int error_column = input_stream.getEndColumn(); + String error_after = null; + boolean EOFSeen = false; + try { input_stream.readChar(); input_stream.backup(1); } + catch (java.io.IOException e1) { + EOFSeen = true; + error_after = curPos <= 1 ? "" : input_stream.GetImage(); + if (curChar == '\n' || curChar == '\r') { + error_line++; + error_column = 0; + } + else + error_column++; + } + if (!EOFSeen) { + input_stream.backup(1); + error_after = curPos <= 1 ? "" : input_stream.GetImage(); + } + throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR); + } + } + } + + void SkipLexicalActions(Token matchedToken) + { + switch(jjmatchedKind) + { + default : + break; + } + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java b/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java new file mode 100644 index 00000000000..364d708e462 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java @@ -0,0 +1,439 @@ +/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.0 */ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler.generated; + +/** + * An implementation of interface CharStream, where the stream is assumed to + * contain only ASCII characters (without unicode processing). + */ + +public class SimpleCharStream +{ + public static final boolean staticFlag = false; + int bufsize; + int available; + int tokenBegin; + public int bufpos = -1; + protected int bufline[]; + protected int bufcolumn[]; + + protected int column = 0; + protected int line = 1; + + protected boolean prevCharIsCR = false; + protected boolean prevCharIsLF = false; + + protected java.io.Reader inputStream; + + protected char[] buffer; + protected int maxNextCharInd = 0; + protected int inBuf = 0; + protected int tabSize = 8; + + protected void setTabSize(int i) { tabSize = i; } + protected int getTabSize(int i) { return tabSize; } + + + protected void ExpandBuff(boolean wrapAround) + { + char[] newbuffer = new char[bufsize + 2048]; + int newbufline[] = new int[bufsize + 2048]; + int newbufcolumn[] = new int[bufsize + 2048]; + + try + { + if (wrapAround) + { + System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); + System.arraycopy(buffer, 0, newbuffer, + bufsize - tokenBegin, bufpos); + buffer = newbuffer; + + System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); + System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos); + bufline = newbufline; + + System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); + System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos); + bufcolumn = newbufcolumn; + + maxNextCharInd = (bufpos += (bufsize - tokenBegin)); + } + else + { + System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); + buffer = newbuffer; + + System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); + bufline = newbufline; + + System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); + bufcolumn = newbufcolumn; + + maxNextCharInd = (bufpos -= tokenBegin); + } + } + catch (Throwable t) + { + throw new Error(t.getMessage()); + } + + + bufsize += 2048; + available = bufsize; + tokenBegin = 0; + } + + protected void FillBuff() throws java.io.IOException + { + if (maxNextCharInd == available) + { + if (available == bufsize) + { + if (tokenBegin > 2048) + { + bufpos = maxNextCharInd = 0; + available = tokenBegin; + } + else if (tokenBegin < 0) + bufpos = maxNextCharInd = 0; + else + ExpandBuff(false); + } + else if (available > tokenBegin) + available = bufsize; + else if ((tokenBegin - available) < 2048) + ExpandBuff(true); + else + available = tokenBegin; + } + + int i; + try { + if ((i = inputStream.read(buffer, maxNextCharInd, + available - maxNextCharInd)) == -1) + { + inputStream.close(); + throw new java.io.IOException(); + } + else + maxNextCharInd += i; + return; + } + catch(java.io.IOException e) { + --bufpos; + backup(0); + if (tokenBegin == -1) + tokenBegin = bufpos; + throw e; + } + } + + public char BeginToken() throws java.io.IOException + { + tokenBegin = -1; + char c = readChar(); + tokenBegin = bufpos; + + return c; + } + + protected void UpdateLineColumn(char c) + { + column++; + + if (prevCharIsLF) + { + prevCharIsLF = false; + line += (column = 1); + } + else if (prevCharIsCR) + { + prevCharIsCR = false; + if (c == '\n') + { + prevCharIsLF = true; + } + else + line += (column = 1); + } + + switch (c) + { + case '\r' : + prevCharIsCR = true; + break; + case '\n' : + prevCharIsLF = true; + break; + case '\t' : + column--; + column += (tabSize - (column % tabSize)); + break; + default : + break; + } + + bufline[bufpos] = line; + bufcolumn[bufpos] = column; + } + + public char readChar() throws java.io.IOException + { + if (inBuf > 0) + { + --inBuf; + + if (++bufpos == bufsize) + bufpos = 0; + + return buffer[bufpos]; + } + + if (++bufpos >= maxNextCharInd) + FillBuff(); + + char c = buffer[bufpos]; + + UpdateLineColumn(c); + return (c); + } + + public int getEndColumn() { + return bufcolumn[bufpos]; + } + + public int getEndLine() { + return bufline[bufpos]; + } + + public int getBeginColumn() { + return bufcolumn[tokenBegin]; + } + + public int getBeginLine() { + return bufline[tokenBegin]; + } + + public void backup(int amount) { + + inBuf += amount; + if ((bufpos -= amount) < 0) + bufpos += bufsize; + } + + public SimpleCharStream(java.io.Reader dstream, int startline, + int startcolumn, int buffersize) + { + inputStream = dstream; + line = startline; + column = startcolumn - 1; + + available = bufsize = buffersize; + buffer = new char[buffersize]; + bufline = new int[buffersize]; + bufcolumn = new int[buffersize]; + } + + public SimpleCharStream(java.io.Reader dstream, int startline, + int startcolumn) + { + this(dstream, startline, startcolumn, 4096); + } + + public SimpleCharStream(java.io.Reader dstream) + { + this(dstream, 1, 1, 4096); + } + public void ReInit(java.io.Reader dstream, int startline, + int startcolumn, int buffersize) + { + inputStream = dstream; + line = startline; + column = startcolumn - 1; + + if (buffer == null || buffersize != buffer.length) + { + available = bufsize = buffersize; + buffer = new char[buffersize]; + bufline = new int[buffersize]; + bufcolumn = new int[buffersize]; + } + prevCharIsLF = prevCharIsCR = false; + tokenBegin = inBuf = maxNextCharInd = 0; + bufpos = -1; + } + + public void ReInit(java.io.Reader dstream, int startline, + int startcolumn) + { + ReInit(dstream, startline, startcolumn, 4096); + } + + public void ReInit(java.io.Reader dstream) + { + ReInit(dstream, 1, 1, 4096); + } + public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline, + int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException + { + this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); + } + + public SimpleCharStream(java.io.InputStream dstream, int startline, + int startcolumn, int buffersize) + { + this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); + } + + public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline, + int startcolumn) throws java.io.UnsupportedEncodingException + { + this(dstream, encoding, startline, startcolumn, 4096); + } + + public SimpleCharStream(java.io.InputStream dstream, int startline, + int startcolumn) + { + this(dstream, startline, startcolumn, 4096); + } + + public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException + { + this(dstream, encoding, 1, 1, 4096); + } + + public SimpleCharStream(java.io.InputStream dstream) + { + this(dstream, 1, 1, 4096); + } + + public void ReInit(java.io.InputStream dstream, String encoding, int startline, + int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException + { + ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); + } + + public void ReInit(java.io.InputStream dstream, int startline, + int startcolumn, int buffersize) + { + ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); + } + + public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException + { + ReInit(dstream, encoding, 1, 1, 4096); + } + + public void ReInit(java.io.InputStream dstream) + { + ReInit(dstream, 1, 1, 4096); + } + public void ReInit(java.io.InputStream dstream, String encoding, int startline, + int startcolumn) throws java.io.UnsupportedEncodingException + { + ReInit(dstream, encoding, startline, startcolumn, 4096); + } + public void ReInit(java.io.InputStream dstream, int startline, + int startcolumn) + { + ReInit(dstream, startline, startcolumn, 4096); + } + public String GetImage() + { + if (bufpos >= tokenBegin) + return new String(buffer, tokenBegin, bufpos - tokenBegin + 1); + else + return new String(buffer, tokenBegin, bufsize - tokenBegin) + + new String(buffer, 0, bufpos + 1); + } + + public char[] GetSuffix(int len) + { + char[] ret = new char[len]; + + if ((bufpos + 1) >= len) + System.arraycopy(buffer, bufpos - len + 1, ret, 0, len); + else + { + System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0, + len - bufpos - 1); + System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1); + } + + return ret; + } + + public void Done() + { + buffer = null; + bufline = null; + bufcolumn = null; + } + + /** + * Method to adjust line and column numbers for the start of a token. + */ + public void adjustBeginLineColumn(int newLine, int newCol) + { + int start = tokenBegin; + int len; + + if (bufpos >= tokenBegin) + { + len = bufpos - tokenBegin + inBuf + 1; + } + else + { + len = bufsize - tokenBegin + bufpos + 1 + inBuf; + } + + int i = 0, j = 0, k = 0; + int nextColDiff = 0, columnDiff = 0; + + while (i < len && + bufline[j = start % bufsize] == bufline[k = ++start % bufsize]) + { + bufline[j] = newLine; + nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j]; + bufcolumn[j] = newCol + columnDiff; + columnDiff = nextColDiff; + i++; + } + + if (i < len) + { + bufline[j] = newLine++; + bufcolumn[j] = newCol + columnDiff; + + while (i++ < len) + { + if (bufline[j = start % bufsize] != bufline[++start % bufsize]) + bufline[j] = newLine++; + else + bufline[j] = newLine; + } + } + + line = bufline[j]; + column = bufcolumn[j]; + } + +} diff --git a/src/java/org/apache/hadoop/record/compiler/generated/Token.java b/src/java/org/apache/hadoop/record/compiler/generated/Token.java new file mode 100644 index 00000000000..29f36ab1e07 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/generated/Token.java @@ -0,0 +1,99 @@ +/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler.generated; + +/** + * Describes the input token stream. + */ + +public class Token { + + /** + * An integer that describes the kind of this token. This numbering + * system is determined by JavaCCParser, and a table of these numbers is + * stored in the file ...Constants.java. + */ + public int kind; + + /** + * beginLine and beginColumn describe the position of the first character + * of this token; endLine and endColumn describe the position of the + * last character of this token. + */ + public int beginLine, beginColumn, endLine, endColumn; + + /** + * The string image of the token. + */ + public String image; + + /** + * A reference to the next regular (non-special) token from the input + * stream. If this is the last token from the input stream, or if the + * token manager has not read tokens beyond this one, this field is + * set to null. This is true only if this token is also a regular + * token. Otherwise, see below for a description of the contents of + * this field. + */ + public Token next; + + /** + * This field is used to access special tokens that occur prior to this + * token, but after the immediately preceding regular (non-special) token. + * If there are no such special tokens, this field is set to null. + * When there are more than one such special token, this field refers + * to the last of these special tokens, which in turn refers to the next + * previous special token through its specialToken field, and so on + * until the first special token (whose specialToken field is null). + * The next fields of special tokens refer to other special tokens that + * immediately follow it (without an intervening regular token). If there + * is no such token, this field is null. + */ + public Token specialToken; + + /** + * Returns the image. + */ + public String toString() + { + return image; + } + + /** + * Returns a new Token object, by default. However, if you want, you + * can create and return subclass objects based on the value of ofKind. + * Simply add the cases to the switch for all those special cases. + * For example, if you have a subclass of Token called IDToken that + * you want to create if ofKind is ID, simlpy add something like : + * + * case MyParserConstants.ID : return new IDToken(); + * + * to the following switch statement. Then you can cast matchedToken + * variable to the appropriate type and use it in your lexical actions. + */ + public static final Token newToken(int ofKind) + { + switch(ofKind) + { + default : return new Token(); + } + } + +} diff --git a/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java b/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java new file mode 100644 index 00000000000..14f3ae34805 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java @@ -0,0 +1,151 @@ +/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler.generated; + +public class TokenMgrError extends Error +{ + /* + * Ordinals for various reasons why an Error of this type can be thrown. + */ + + /** + * Lexical error occured. + */ + static final int LEXICAL_ERROR = 0; + + /** + * An attempt wass made to create a second instance of a static token manager. + */ + static final int STATIC_LEXER_ERROR = 1; + + /** + * Tried to change to an invalid lexical state. + */ + static final int INVALID_LEXICAL_STATE = 2; + + /** + * Detected (and bailed out of) an infinite loop in the token manager. + */ + static final int LOOP_DETECTED = 3; + + /** + * Indicates the reason why the exception is thrown. It will have + * one of the above 4 values. + */ + int errorCode; + + /** + * Replaces unprintable characters by their espaced (or unicode escaped) + * equivalents in the given string + */ + protected static final String addEscapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); + continue; + case '\t': + retval.append("\\t"); + continue; + case '\n': + retval.append("\\n"); + continue; + case '\f': + retval.append("\\f"); + continue; + case '\r': + retval.append("\\r"); + continue; + case '\"': + retval.append("\\\""); + continue; + case '\'': + retval.append("\\\'"); + continue; + case '\\': + retval.append("\\\\"); + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + + /** + * Returns a detailed message for the Error when it is thrown by the + * token manager to indicate a lexical error. + * Parameters : + * EOFSeen : indicates if EOF caused the lexicl error + * curLexState : lexical state in which this error occured + * errorLine : line number when the error occured + * errorColumn : column number when the error occured + * errorAfter : prefix that was seen before this error occured + * curchar : the offending character + * Note: You can customize the lexical error message by modifying this method. + */ + protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { + return("Lexical error at line " + + errorLine + ", column " + + errorColumn + ". Encountered: " + + (EOFSeen ? " " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + + "after : \"" + addEscapes(errorAfter) + "\""); + } + + /** + * You can also modify the body of this method to customize your error messages. + * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + * of end-users concern, so you can return something like : + * + * "Internal Error : Please file a bug report .... " + * + * from this method for such cases in the release version of your parser. + */ + public String getMessage() { + return super.getMessage(); + } + + /* + * Constructors of various flavors follow. + */ + + public TokenMgrError() { + } + + public TokenMgrError(String message, int reason) { + super(message); + errorCode = reason; + } + + public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { + this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); + } +} diff --git a/src/java/org/apache/hadoop/record/compiler/generated/package.html b/src/java/org/apache/hadoop/record/compiler/generated/package.html new file mode 100644 index 00000000000..2fd0f68967d --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/generated/package.html @@ -0,0 +1,29 @@ + + + + + + + Hadoop Record Compiler: Parser + + + This package contains code generated by JavaCC from the + Hadoop record syntax file rcc.jj. For details about the + record file syntax please @see org.apache.hadoop.record. + + diff --git a/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj b/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj new file mode 100644 index 00000000000..4eeae3e47db --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj @@ -0,0 +1,384 @@ +options { +STATIC=false; +} + +PARSER_BEGIN(Rcc) +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.compiler.generated; + +import org.apache.hadoop.record.compiler.*; +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.Iterator; +import java.io.File; +import java.io.FileReader; +import java.io.FileNotFoundException; +import java.io.IOException; + +public class Rcc { + private static String language = "java"; + private static String destDir = "."; + private static ArrayList recFiles = new ArrayList(); + private static ArrayList cmdargs = new ArrayList(); + private static JFile curFile; + private static Hashtable recTab; + private static String curDir = "."; + private static String curFileName; + private static String curModuleName; + + public static void main(String[] args) { + System.exit(driver(args)); + } + + public static void usage() { + System.err.println("Usage: rcc --language [java|c++] ddl-files"); + } + + public static int driver(String[] args) { + for (int i=0; i(); + curFile = parser.Input(); + } catch (ParseException e) { + System.err.println(e.toString()); + return 1; + } + try { + reader.close(); + } catch (IOException e) { + } + } catch (FileNotFoundException e) { + System.err.println("File " + (String) recFiles.get(i) + + " Not found."); + return 1; + } + try { + int retCode = curFile.genCode(language, destDir, cmdargs); + if (retCode != 0) { return retCode; } + } catch (IOException e) { + System.err.println(e.toString()); + return 1; + } + } + return 0; + } +} + +PARSER_END(Rcc) + +SKIP : +{ + " " +| "\t" +| "\n" +| "\r" +} + +SPECIAL_TOKEN : +{ + "//" : WithinOneLineComment +} + + SPECIAL_TOKEN : +{ + <("\n" | "\r" | "\r\n" )> : DEFAULT +} + + MORE : +{ + <~[]> +} + +SPECIAL_TOKEN : +{ + "/*" : WithinMultiLineComment +} + + SPECIAL_TOKEN : +{ + "*/" : DEFAULT +} + + MORE : +{ + <~[]> +} + +TOKEN : +{ + +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| "> +| +| +| +| +| +} + +JFile Input() : +{ + ArrayList ilist = new ArrayList(); + ArrayList rlist = new ArrayList(); + JFile i; + ArrayList l; +} +{ + ( + i = Include() + { ilist.add(i); } + | l = Module() + { rlist.addAll(l); } + )+ + + { return new JFile(curFileName, ilist, rlist); } +} + +JFile Include() : +{ + String fname; + Token t; +} +{ + + t = + { + JFile ret = null; + fname = t.image.replaceAll("^\"", "").replaceAll("\"$",""); + File file = new File(curDir, fname); + String tmpDir = curDir; + String tmpFile = curFileName; + curDir = file.getParent(); + curFileName = file.getName(); + try { + FileReader reader = new FileReader(file); + Rcc parser = new Rcc(reader); + try { + ret = parser.Input(); + System.out.println(fname + " Parsed Successfully"); + } catch (ParseException e) { + System.out.println(e.toString()); + System.exit(1); + } + try { + reader.close(); + } catch (IOException e) { + } + } catch (FileNotFoundException e) { + System.out.println("File " + fname + + " Not found."); + System.exit(1); + } + curDir = tmpDir; + curFileName = tmpFile; + return ret; + } +} + +ArrayList Module() : +{ + String mName; + ArrayList rlist; +} +{ + + mName = ModuleName() + { curModuleName = mName; } + + rlist = RecordList() + + { return rlist; } +} + +String ModuleName() : +{ + String name = ""; + Token t; +} +{ + t = + { name += t.image; } + ( + + t = + { name += "." + t.image; } + )* + { return name; } +} + +ArrayList RecordList() : +{ + ArrayList rlist = new ArrayList(); + JRecord r; +} +{ + ( + r = Record() + { rlist.add(r); } + )+ + { return rlist; } +} + +JRecord Record() : +{ + String rname; + ArrayList> flist = new ArrayList>(); + Token t; + JField f; +} +{ + + t = + { rname = t.image; } + + ( + f = Field() + { flist.add(f); } + + )+ + + { + String fqn = curModuleName + "." + rname; + JRecord r = new JRecord(fqn, flist); + recTab.put(fqn, r); + return r; + } +} + +JField Field() : +{ + JType jt; + Token t; +} +{ + jt = Type() + t = + { return new JField(t.image, jt); } +} + +JType Type() : +{ + JType jt; + Token t; + String rname; +} +{ + jt = Map() + { return jt; } +| jt = Vector() + { return jt; } +| + { return new JByte(); } +| + { return new JBoolean(); } +| + { return new JInt(); } +| + { return new JLong(); } +| + { return new JFloat(); } +| + { return new JDouble(); } +| + { return new JString(); } +| + { return new JBuffer(); } +| rname = ModuleName() + { + if (rname.indexOf('.', 0) < 0) { + rname = curModuleName + "." + rname; + } + JRecord r = recTab.get(rname); + if (r == null) { + System.out.println("Type " + rname + " not known. Exiting."); + System.exit(1); + } + return r; + } +} + +JMap Map() : +{ + JType jt1; + JType jt2; +} +{ + + + jt1 = Type() + + jt2 = Type() + + { return new JMap(jt1, jt2); } +} + +JVector Vector() : +{ + JType jt; +} +{ + + + jt = Type() + + { return new JVector(jt); } +} diff --git a/src/java/org/apache/hadoop/record/compiler/package.html b/src/java/org/apache/hadoop/record/compiler/package.html new file mode 100644 index 00000000000..23cac15ffb4 --- /dev/null +++ b/src/java/org/apache/hadoop/record/compiler/package.html @@ -0,0 +1,31 @@ + + + + + + + Hadoop Record Compiler + + + This package contains classes needed for code generation + from the hadoop record compiler. CppGenerator and JavaGenerator + are the main entry points from the parser. There are classes + corrsponding to every primitive type and compound type + included in Hadoop record I/O syntax. + + diff --git a/src/java/org/apache/hadoop/record/meta/FieldTypeInfo.java b/src/java/org/apache/hadoop/record/meta/FieldTypeInfo.java new file mode 100644 index 00000000000..82d4c8affc4 --- /dev/null +++ b/src/java/org/apache/hadoop/record/meta/FieldTypeInfo.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.meta; + +import java.io.IOException; + +import org.apache.hadoop.record.RecordOutput; + +/** + * Represents a type information for a field, which is made up of its + * ID (name) and its type (a TypeID object). + */ +public class FieldTypeInfo +{ + + private String fieldID; + private TypeID typeID; + + /** + * Construct a FiledTypeInfo with the given field name and the type + */ + FieldTypeInfo(String fieldID, TypeID typeID) { + this.fieldID = fieldID; + this.typeID = typeID; + } + + /** + * get the field's TypeID object + */ + public TypeID getTypeID() { + return typeID; + } + + /** + * get the field's id (name) + */ + public String getFieldID() { + return fieldID; + } + + void write(RecordOutput rout, String tag) throws IOException { + rout.writeString(fieldID, tag); + typeID.write(rout, tag); + } + + /** + * Two FieldTypeInfos are equal if ach of their fields matches + */ + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof FieldTypeInfo)) + return false; + FieldTypeInfo fti = (FieldTypeInfo) o; + // first check if fieldID matches + if (!this.fieldID.equals(fti.fieldID)) { + return false; + } + // now see if typeID matches + return (this.typeID.equals(fti.typeID)); + } + + /** + * We use a basic hashcode implementation, since this class will likely not + * be used as a hashmap key + */ + public int hashCode() { + return 37*17+typeID.hashCode() + 37*17+fieldID.hashCode(); + } + + + public boolean equals(FieldTypeInfo ti) { + // first check if fieldID matches + if (!this.fieldID.equals(ti.fieldID)) { + return false; + } + // now see if typeID matches + return (this.typeID.equals(ti.typeID)); + } + +} + diff --git a/src/java/org/apache/hadoop/record/meta/MapTypeID.java b/src/java/org/apache/hadoop/record/meta/MapTypeID.java new file mode 100644 index 00000000000..2180d94adc1 --- /dev/null +++ b/src/java/org/apache/hadoop/record/meta/MapTypeID.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.meta; + +import java.io.IOException; +import java.util.*; + +import org.apache.hadoop.record.RecordOutput; + +/** + * Represents typeID for a Map + */ +public class MapTypeID extends TypeID { + + private TypeID typeIDKey; + private TypeID typeIDValue; + + public MapTypeID(TypeID typeIDKey, TypeID typeIDValue) { + super(RIOType.MAP); + this.typeIDKey = typeIDKey; + this.typeIDValue = typeIDValue; + } + + /** + * get the TypeID of the map's key element + */ + public TypeID getKeyTypeID() { + return this.typeIDKey; + } + + /** + * get the TypeID of the map's value element + */ + public TypeID getValueTypeID() { + return this.typeIDValue; + } + + void write(RecordOutput rout, String tag) throws IOException { + rout.writeByte(typeVal, tag); + typeIDKey.write(rout, tag); + typeIDValue.write(rout, tag); + } + + /** + * Two map typeIDs are equal if their constituent elements have the + * same type + */ + public boolean equals(Object o) { + if (!super.equals(o)) + return false; + + MapTypeID mti = (MapTypeID) o; + + return this.typeIDKey.equals(mti.typeIDKey) && + this.typeIDValue.equals(mti.typeIDValue); + } + + /** + * We use a basic hashcode implementation, since this class will likely not + * be used as a hashmap key + */ + public int hashCode() { + return 37*17+typeIDKey.hashCode() + 37*17+typeIDValue.hashCode(); + } + +} diff --git a/src/java/org/apache/hadoop/record/meta/RecordTypeInfo.java b/src/java/org/apache/hadoop/record/meta/RecordTypeInfo.java new file mode 100644 index 00000000000..2e24d7861c7 --- /dev/null +++ b/src/java/org/apache/hadoop/record/meta/RecordTypeInfo.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.meta; + +import java.io.IOException; +import java.util.*; + +import org.apache.hadoop.record.RecordInput; +import org.apache.hadoop.record.RecordOutput; + + +/** + * A record's Type Information object which can read/write itself. + * + * Type information for a record comprises metadata about the record, + * as well as a collection of type information for each field in the record. + */ +public class RecordTypeInfo extends org.apache.hadoop.record.Record +{ + + private String name; + // A RecordTypeInfo is really just a wrapper around StructTypeID + StructTypeID sTid; + // A RecordTypeInfo object is just a collection of TypeInfo objects for each of its fields. + //private ArrayList typeInfos = new ArrayList(); + // we keep a hashmap of struct/record names and their type information, as we need it to + // set filters when reading nested structs. This map is used during deserialization. + //private Map structRTIs = new HashMap(); + + /** + * Create an empty RecordTypeInfo object. + */ + public RecordTypeInfo() { + sTid = new StructTypeID(); + } + + /** + * Create a RecordTypeInfo object representing a record with the given name + * @param name Name of the record + */ + public RecordTypeInfo(String name) { + this.name = name; + sTid = new StructTypeID(); + } + + /* + * private constructor + */ + private RecordTypeInfo(String name, StructTypeID stid) { + this.sTid = stid; + this.name = name; + } + + /** + * return the name of the record + */ + public String getName() { + return name; + } + + /** + * set the name of the record + */ + public void setName(String name) { + this.name = name; + } + + /** + * Add a field. + * @param fieldName Name of the field + * @param tid Type ID of the field + */ + public void addField(String fieldName, TypeID tid) { + sTid.getFieldTypeInfos().add(new FieldTypeInfo(fieldName, tid)); + } + + private void addAll(Collection tis) { + sTid.getFieldTypeInfos().addAll(tis); + } + + /** + * Return a collection of field type infos + */ + public Collection getFieldTypeInfos() { + return sTid.getFieldTypeInfos(); + } + + /** + * Return the type info of a nested record. We only consider nesting + * to one level. + * @param name Name of the nested record + */ + public RecordTypeInfo getNestedStructTypeInfo(String name) { + StructTypeID stid = sTid.findStruct(name); + if (null == stid) return null; + return new RecordTypeInfo(name, stid); + } + + /** + * Serialize the type information for a record + */ + public void serialize(RecordOutput rout, String tag) throws IOException { + // write out any header, version info, here + rout.startRecord(this, tag); + rout.writeString(name, tag); + sTid.writeRest(rout, tag); + rout.endRecord(this, tag); + } + + /** + * Deserialize the type information for a record + */ + public void deserialize(RecordInput rin, String tag) throws IOException { + // read in any header, version info + rin.startRecord(tag); + // name + this.name = rin.readString(tag); + sTid.read(rin, tag); + rin.endRecord(tag); + } + + /** + * This class doesn't implement Comparable as it's not meant to be used + * for anything besides de/serializing. + * So we always throw an exception. + * Not implemented. Always returns 0 if another RecordTypeInfo is passed in. + */ + public int compareTo (final Object peer_) throws ClassCastException { + if (!(peer_ instanceof RecordTypeInfo)) { + throw new ClassCastException("Comparing different types of records."); + } + throw new UnsupportedOperationException("compareTo() is not supported"); + } +} + diff --git a/src/java/org/apache/hadoop/record/meta/StructTypeID.java b/src/java/org/apache/hadoop/record/meta/StructTypeID.java new file mode 100644 index 00000000000..e18ed27a3ac --- /dev/null +++ b/src/java/org/apache/hadoop/record/meta/StructTypeID.java @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.meta; + +import java.io.IOException; +import java.util.*; + +import org.apache.hadoop.record.RecordInput; +import org.apache.hadoop.record.RecordOutput; + +/** + * Represents typeID for a struct + */ +public class StructTypeID extends TypeID { + private ArrayList typeInfos = new ArrayList(); + + StructTypeID() { + super(RIOType.STRUCT); + } + + /** + * Create a StructTypeID based on the RecordTypeInfo of some record + */ + public StructTypeID(RecordTypeInfo rti) { + super(RIOType.STRUCT); + typeInfos.addAll(rti.getFieldTypeInfos()); + } + + void add (FieldTypeInfo ti) { + typeInfos.add(ti); + } + + public Collection getFieldTypeInfos() { + return typeInfos; + } + + /* + * return the StructTypeiD, if any, of the given field + */ + StructTypeID findStruct(String name) { + // walk through the list, searching. Not the most efficient way, but this + // in intended to be used rarely, so we keep it simple. + // As an optimization, we can keep a hashmap of record name to its RTI, for later. + for (FieldTypeInfo ti : typeInfos) { + if ((0 == ti.getFieldID().compareTo(name)) && (ti.getTypeID().getTypeVal() == RIOType.STRUCT)) { + return (StructTypeID) ti.getTypeID(); + } + } + return null; + } + + void write(RecordOutput rout, String tag) throws IOException { + rout.writeByte(typeVal, tag); + writeRest(rout, tag); + } + + /* + * Writes rest of the struct (excluding type value). + * As an optimization, this method is directly called by RTI + * for the top level record so that we don't write out the byte + * indicating that this is a struct (since top level records are + * always structs). + */ + void writeRest(RecordOutput rout, String tag) throws IOException { + rout.writeInt(typeInfos.size(), tag); + for (FieldTypeInfo ti : typeInfos) { + ti.write(rout, tag); + } + } + + /* + * deserialize ourselves. Called by RTI. + */ + void read(RecordInput rin, String tag) throws IOException { + // number of elements + int numElems = rin.readInt(tag); + for (int i=0; i it = stID.getFieldTypeInfos().iterator(); + while (it.hasNext()) { + FieldTypeInfo tInfo = it.next(); + skip(rin, tag, tInfo.getTypeID()); + } + rin.endRecord(tag); + break; + case TypeID.RIOType.VECTOR: + org.apache.hadoop.record.Index vidx1 = rin.startVector(tag); + VectorTypeID vtID = (VectorTypeID) typeID; + for (; !vidx1.done(); vidx1.incr()) { + skip(rin, tag, vtID.getElementTypeID()); + } + rin.endVector(tag); + break; + default: + // shouldn't be here + throw new IOException("Unknown typeID when skipping bytes"); + } + } +} diff --git a/src/java/org/apache/hadoop/record/meta/VectorTypeID.java b/src/java/org/apache/hadoop/record/meta/VectorTypeID.java new file mode 100644 index 00000000000..e4a2b3f0bd7 --- /dev/null +++ b/src/java/org/apache/hadoop/record/meta/VectorTypeID.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.record.meta; + +import java.io.IOException; + +import org.apache.hadoop.record.RecordOutput; + +/** + * Represents typeID for vector. + */ +public class VectorTypeID extends TypeID { + private TypeID typeIDElement; + + public VectorTypeID(TypeID typeIDElement) { + super(RIOType.VECTOR); + this.typeIDElement = typeIDElement; + } + + public TypeID getElementTypeID() { + return this.typeIDElement; + } + + void write(RecordOutput rout, String tag) throws IOException { + rout.writeByte(typeVal, tag); + typeIDElement.write(rout, tag); + } + + /** + * Two vector typeIDs are equal if their constituent elements have the + * same type + */ + public boolean equals(Object o) { + if (!super.equals (o)) + return false; + + VectorTypeID vti = (VectorTypeID) o; + return this.typeIDElement.equals(vti.typeIDElement); + } + + /** + * We use a basic hashcode implementation, since this class will likely not + * be used as a hashmap key + */ + public int hashCode() { + return 37*17+typeIDElement.hashCode(); + } + +} diff --git a/src/java/org/apache/hadoop/record/package.html b/src/java/org/apache/hadoop/record/package.html new file mode 100644 index 00000000000..d736f4a38f7 --- /dev/null +++ b/src/java/org/apache/hadoop/record/package.html @@ -0,0 +1,800 @@ + + + + + + + Hadoop Record I/O + + + Hadoop record I/O contains classes and a record description language + translator for simplifying serialization and deserialization of records in a + language-neutral manner. + +

Introduction

+ + Software systems of any significant complexity require mechanisms for data +interchange with the outside world. These interchanges typically involve the +marshaling and unmarshaling of logical units of data to and from data streams +(files, network connections, memory buffers etc.). Applications usually have +some code for serializing and deserializing the data types that they manipulate +embedded in them. The work of serialization has several features that make +automatic code generation for it worthwhile. Given a particular output encoding +(binary, XML, etc.), serialization of primitive types and simple compositions +of primitives (structs, vectors etc.) is a very mechanical task. Manually +written serialization code can be susceptible to bugs especially when records +have a large number of fields or a record definition changes between software +versions. Lastly, it can be very useful for applications written in different +programming languages to be able to share and interchange data. This can be +made a lot easier by describing the data records manipulated by these +applications in a language agnostic manner and using the descriptions to derive +implementations of serialization in multiple target languages. + +This document describes Hadoop Record I/O, a mechanism that is aimed +at +
    +
  • enabling the specification of simple serializable data types (records) +
  • enabling the generation of code in multiple target languages for +marshaling and unmarshaling such types +
  • providing target language specific support that will enable application +programmers to incorporate generated code into their applications +
+ +The goals of Hadoop Record I/O are similar to those of mechanisms such as XDR, +ASN.1, PADS and ICE. While these systems all include a DDL that enables +the specification of most record types, they differ widely in what else they +focus on. The focus in Hadoop Record I/O is on data marshaling and +multi-lingual support. We take a translator-based approach to serialization. +Hadoop users have to describe their data in a simple data description +language. The Hadoop DDL translator rcc generates code that users +can invoke in order to read/write their data from/to simple stream +abstractions. Next we list explicitly some of the goals and non-goals of +Hadoop Record I/O. + + +

Goals

+ +
    +
  • Support for commonly used primitive types. Hadoop should include as +primitives commonly used builtin types from programming languages we intend to +support. + +
  • Support for common data compositions (including recursive compositions). +Hadoop should support widely used composite types such as structs and +vectors. + +
  • Code generation in multiple target languages. Hadoop should be capable of +generating serialization code in multiple target languages and should be +easily extensible to new target languages. The initial target languages are +C++ and Java. + +
  • Support for generated target languages. Hadooop should include support +in the form of headers, libraries, packages for supported target languages +that enable easy inclusion and use of generated code in applications. + +
  • Support for multiple output encodings. Candidates include +packed binary, comma-separated text, XML etc. + +
  • Support for specifying record types in a backwards/forwards compatible +manner. This will probably be in the form of support for optional fields in +records. This version of the document does not include a description of the +planned mechanism, we intend to include it in the next iteration. + +
+ +

Non-Goals

+ +
    +
  • Serializing existing arbitrary C++ classes. +
  • Serializing complex data structures such as trees, linked lists etc. +
  • Built-in indexing schemes, compression, or check-sums. +
  • Dynamic construction of objects from an XML schema. +
+ +The remainder of this document describes the features of Hadoop record I/O +in more detail. Section 2 describes the data types supported by the system. +Section 3 lays out the DDL syntax with some examples of simple records. +Section 4 describes the process of code generation with rcc. Section 5 +describes target language mappings and support for Hadoop types. We include a +fairly complete description of C++ mappings with intent to include Java and +others in upcoming iterations of this document. The last section talks about +supported output encodings. + + +

Data Types and Streams

+ +This section describes the primitive and composite types supported by Hadoop. +We aim to support a set of types that can be used to simply and efficiently +express a wide range of record types in different programming languages. + +

Primitive Types

+ +For the most part, the primitive types of Hadoop map directly to primitive +types in high level programming languages. Special cases are the +ustring (a Unicode string) and buffer types, which we believe +find wide use and which are usually implemented in library code and not +available as language built-ins. Hadoop also supplies these via library code +when a target language built-in is not present and there is no widely +adopted "standard" implementation. The complete list of primitive types is: + +
    +
  • byte: An 8-bit unsigned integer. +
  • boolean: A boolean value. +
  • int: A 32-bit signed integer. +
  • long: A 64-bit signed integer. +
  • float: A single precision floating point number as described by + IEEE-754. +
  • double: A double precision floating point number as described by + IEEE-754. +
  • ustring: A string consisting of Unicode characters. +
  • buffer: An arbitrary sequence of bytes. +
+ + +

Composite Types

+Hadoop supports a small set of composite types that enable the description +of simple aggregate types and containers. A composite type is serialized +by sequentially serializing it constituent elements. The supported +composite types are: + +
    + +
  • record: An aggregate type like a C-struct. This is a list of +typed fields that are together considered a single unit of data. A record +is serialized by sequentially serializing its constituent fields. In addition +to serialization a record has comparison operations (equality and less-than) +implemented for it, these are defined as memberwise comparisons. + +
  • vector: A sequence of entries of the same data type, primitive +or composite. + +
  • map: An associative container mapping instances of a key type to +instances of a value type. The key and value types may themselves be primitive +or composite types. + +
+ +

Streams

+ +Hadoop generates code for serializing and deserializing record types to +abstract streams. For each target language Hadoop defines very simple input +and output stream interfaces. Application writers can usually develop +concrete implementations of these by putting a one method wrapper around +an existing stream implementation. + + +

DDL Syntax and Examples

+ +We now describe the syntax of the Hadoop data description language. This is +followed by a few examples of DDL usage. + +

Hadoop DDL Syntax

+ +

+recfile = *include module *record
+include = "include" path
+path = (relative-path / absolute-path)
+module = "module" module-name
+module-name = name *("." name)
+record := "class" name "{" 1*(field) "}"
+field := type name ";"
+name :=  ALPHA (ALPHA / DIGIT / "_" )*
+type := (ptype / ctype)
+ptype := ("byte" / "boolean" / "int" |
+          "long" / "float" / "double"
+          "ustring" / "buffer")
+ctype := (("vector" "<" type ">") /
+          ("map" "<" type "," type ">" ) ) / name)
+
+ +A DDL file describes one or more record types. It begins with zero or +more include declarations, a single mandatory module declaration +followed by zero or more class declarations. The semantics of each of +these declarations are described below: + +
    + +
  • include: An include declaration specifies a DDL file to be +referenced when generating code for types in the current DDL file. Record types +in the current compilation unit may refer to types in all included files. +File inclusion is recursive. An include does not trigger code +generation for the referenced file. + +
  • module: Every Hadoop DDL file must have a single module +declaration that follows the list of includes and precedes all record +declarations. A module declaration identifies a scope within which +the names of all types in the current file are visible. Module names are +mapped to C++ namespaces, Java packages etc. in generated code. + +
  • class: Records types are specified through class +declarations. A class declaration is like a Java class declaration. +It specifies a named record type and a list of fields that constitute records +of the type. Usage is illustrated in the following examples. + +
+ +

Examples

+ +
    +
  • A simple DDL file links.jr with just one record declaration. +
    
    +module links {
    +    class Link {
    +        ustring URL;
    +        boolean isRelative;
    +        ustring anchorText;
    +    };
    +}
    +
    + +
  • A DDL file outlinks.jr which includes another +
    
    +include "links.jr"
    +
    +module outlinks {
    +    class OutLinks {
    +        ustring baseURL;
    +        vector outLinks;
    +    };
    +}
    +
    +
+ +

Code Generation

+ +The Hadoop translator is written in Java. Invocation is done by executing a +wrapper shell script named named rcc. It takes a list of +record description files as a mandatory argument and an +optional language argument (the default is Java) --language or +-l. Thus a typical invocation would look like: +

+$ rcc -l C++  ...
+
+ + +

Target Language Mappings and Support

+ +For all target languages, the unit of code generation is a record type. +For each record type, Hadoop generates code for serialization and +deserialization, record comparison and access to record members. + +

C++

+ +Support for including Hadoop generated C++ code in applications comes in the +form of a header file recordio.hh which needs to be included in source +that uses Hadoop types and a library librecordio.a which applications need +to be linked with. The header declares the Hadoop C++ namespace which defines +appropriate types for the various primitives, the basic interfaces for +records and streams and enumerates the supported serialization encodings. +Declarations of these interfaces and a description of their semantics follow: + +

+namespace hadoop {
+
+  enum RecFormat { kBinary, kXML, kCSV };
+
+  class InStream {
+  public:
+    virtual ssize_t read(void *buf, size_t n) = 0;
+  };
+
+  class OutStream {
+  public:
+    virtual ssize_t write(const void *buf, size_t n) = 0;
+  };
+
+  class IOError : public runtime_error {
+  public:
+    explicit IOError(const std::string& msg);
+  };
+
+  class IArchive;
+  class OArchive;
+
+  class RecordReader {
+  public:
+    RecordReader(InStream& in, RecFormat fmt);
+    virtual ~RecordReader(void);
+
+    virtual void read(Record& rec);
+  };
+
+  class RecordWriter {
+  public:
+    RecordWriter(OutStream& out, RecFormat fmt);
+    virtual ~RecordWriter(void);
+
+    virtual void write(Record& rec);
+  };
+
+
+  class Record {
+  public:
+    virtual std::string type(void) const = 0;
+    virtual std::string signature(void) const = 0;
+  protected:
+    virtual bool validate(void) const = 0;
+
+    virtual void
+    serialize(OArchive& oa, const std::string& tag) const = 0;
+
+    virtual void
+    deserialize(IArchive& ia, const std::string& tag) = 0;
+  };
+}
+
+ +
    + +
  • RecFormat: An enumeration of the serialization encodings supported +by this implementation of Hadoop. + +
  • InStream: A simple abstraction for an input stream. This has a +single public read method that reads n bytes from the stream into +the buffer buf. Has the same semantics as a blocking read system +call. Returns the number of bytes read or -1 if an error occurs. + +
  • OutStream: A simple abstraction for an output stream. This has a +single write method that writes n bytes to the stream from the +buffer buf. Has the same semantics as a blocking write system +call. Returns the number of bytes written or -1 if an error occurs. + +
  • RecordReader: A RecordReader reads records one at a time from +an underlying stream in a specified record format. The reader is instantiated +with a stream and a serialization format. It has a read method that +takes an instance of a record and deserializes the record from the stream. + +
  • RecordWriter: A RecordWriter writes records one at a +time to an underlying stream in a specified record format. The writer is +instantiated with a stream and a serialization format. It has a +write method that takes an instance of a record and serializes the +record to the stream. + +
  • Record: The base class for all generated record types. This has two +public methods type and signature that return the typename and the +type signature of the record. + +
+ +Two files are generated for each record file (note: not for each record). If a +record file is named "name.jr", the generated files are +"name.jr.cc" and "name.jr.hh" containing serialization +implementations and record type declarations respectively. + +For each record in the DDL file, the generated header file will contain a +class definition corresponding to the record type, method definitions for the +generated type will be present in the '.cc' file. The generated class will +inherit from the abstract class hadoop::Record. The DDL files +module declaration determines the namespace the record belongs to. +Each '.' delimited token in the module declaration results in the +creation of a namespace. For instance, the declaration module docs.links +results in the creation of a docs namespace and a nested +docs::links namespace. In the preceding examples, the Link class +is placed in the links namespace. The header file corresponding to +the links.jr file will contain: + +

+namespace links {
+  class Link : public hadoop::Record {
+    // ....
+  };
+};
+
+ +Each field within the record will cause the generation of a private member +declaration of the appropriate type in the class declaration, and one or more +acccessor methods. The generated class will implement the serialize and +deserialize methods defined in hadoop::Record+. It will also +implement the inspection methods type and signature from +hadoop::Record. A default constructor and virtual destructor will also +be generated. Serialization code will read/write records into streams that +implement the hadoop::InStream and the hadoop::OutStream interfaces. + +For each member of a record an accessor method is generated that returns +either the member or a reference to the member. For members that are returned +by value, a setter method is also generated. This is true for primitive +data members of the types byte, int, long, boolean, float and +double. For example, for a int field called MyField the folowing +code is generated. + +

+...
+private:
+  int32_t mMyField;
+  ...
+public:
+  int32_t getMyField(void) const {
+    return mMyField;
+  };
+
+  void setMyField(int32_t m) {
+    mMyField = m;
+  };
+  ...
+
+ +For a ustring or buffer or composite field. The generated code +only contains accessors that return a reference to the field. A const +and a non-const accessor are generated. For example: + +

+...
+private:
+  std::string mMyBuf;
+  ...
+public:
+
+  std::string& getMyBuf() {
+    return mMyBuf;
+  };
+
+  const std::string& getMyBuf() const {
+    return mMyBuf;
+  };
+  ...
+
+ +

Examples

+ +Suppose the inclrec.jr file contains: +

+module inclrec {
+    class RI {
+        int      I32;
+        double   D;
+        ustring  S;
+    };
+}
+
+ +and the testrec.jr file contains: + +

+include "inclrec.jr"
+module testrec {
+    class R {
+        vector VF;
+        RI            Rec;
+        buffer        Buf;
+    };
+}
+
+ +Then the invocation of rcc such as: +

+$ rcc -l c++ inclrec.jr testrec.jr
+
+will result in generation of four files: +inclrec.jr.{cc,hh} and testrec.jr.{cc,hh}. + +The inclrec.jr.hh will contain: + +

+#ifndef _INCLREC_JR_HH_
+#define _INCLREC_JR_HH_
+
+#include "recordio.hh"
+
+namespace inclrec {
+  
+  class RI : public hadoop::Record {
+
+  private:
+
+    int32_t      I32;
+    double       D;
+    std::string  S;
+
+  public:
+
+    RI(void);
+    virtual ~RI(void);
+
+    virtual bool operator==(const RI& peer) const;
+    virtual bool operator<(const RI& peer) const;
+
+    virtual int32_t getI32(void) const { return I32; }
+    virtual void setI32(int32_t v) { I32 = v; }
+
+    virtual double getD(void) const { return D; }
+    virtual void setD(double v) { D = v; }
+
+    virtual std::string& getS(void) const { return S; }
+    virtual const std::string& getS(void) const { return S; }
+
+    virtual std::string type(void) const;
+    virtual std::string signature(void) const;
+
+  protected:
+
+    virtual void serialize(hadoop::OArchive& a) const;
+    virtual void deserialize(hadoop::IArchive& a);
+  };
+} // end namespace inclrec
+
+#endif /* _INCLREC_JR_HH_ */
+
+
+ +The testrec.jr.hh file will contain: + + +

+
+#ifndef _TESTREC_JR_HH_
+#define _TESTREC_JR_HH_
+
+#include "inclrec.jr.hh"
+
+namespace testrec {
+  class R : public hadoop::Record {
+
+  private:
+
+    std::vector VF;
+    inclrec::RI        Rec;
+    std::string        Buf;
+
+  public:
+
+    R(void);
+    virtual ~R(void);
+
+    virtual bool operator==(const R& peer) const;
+    virtual bool operator<(const R& peer) const;
+
+    virtual std::vector& getVF(void) const;
+    virtual const std::vector& getVF(void) const;
+
+    virtual std::string& getBuf(void) const ;
+    virtual const std::string& getBuf(void) const;
+
+    virtual inclrec::RI& getRec(void) const;
+    virtual const inclrec::RI& getRec(void) const;
+    
+    virtual bool serialize(hadoop::OutArchive& a) const;
+    virtual bool deserialize(hadoop::InArchive& a);
+    
+    virtual std::string type(void) const;
+    virtual std::string signature(void) const;
+  };
+}; // end namespace testrec
+#endif /* _TESTREC_JR_HH_ */
+
+
+ +

Java

+ +Code generation for Java is similar to that for C++. A Java class is generated +for each record type with private members corresponding to the fields. Getters +and setters for fields are also generated. Some differences arise in the +way comparison is expressed and in the mapping of modules to packages and +classes to files. For equality testing, an equals method is generated +for each record type. As per Java requirements a hashCode method is also +generated. For comparison a compareTo method is generated for each +record type. This has the semantics as defined by the Java Comparable +interface, that is, the method returns a negative integer, zero, or a positive +integer as the invoked object is less than, equal to, or greater than the +comparison parameter. + +A .java file is generated per record type as opposed to per DDL +file as in C++. The module declaration translates to a Java +package declaration. The module name maps to an identical Java package +name. In addition to this mapping, the DDL compiler creates the appropriate +directory hierarchy for the package and places the generated .java +files in the correct directories. + +

Mapping Summary

+ +

+DDL Type        C++ Type            Java Type 
+
+boolean         bool                boolean
+byte            int8_t              byte
+int             int32_t             int
+long            int64_t             long
+float           float               float
+double          double              double
+ustring         std::string         java.lang.String
+buffer          std::string         org.apache.hadoop.record.Buffer
+class type      class type          class type
+vector    std::vector   java.util.ArrayList
+map  std::map java.util.TreeMap
+
+ +

Data encodings

+ +This section describes the format of the data encodings supported by Hadoop. +Currently, three data encodings are supported, namely binary, CSV and XML. + +

Binary Serialization Format

+ +The binary data encoding format is fairly dense. Serialization of composite +types is simply defined as a concatenation of serializations of the constituent +elements (lengths are included in vectors and maps). + +Composite types are serialized as follows: +
    +
  • class: Sequence of serialized members. +
  • vector: The number of elements serialized as an int. Followed by a +sequence of serialized elements. +
  • map: The number of key value pairs serialized as an int. Followed +by a sequence of serialized (key,value) pairs. +
+ +Serialization of primitives is more interesting, with a zero compression +optimization for integral types and normalization to UTF-8 for strings. +Primitive types are serialized as follows: + +
    +
  • byte: Represented by 1 byte, as is. +
  • boolean: Represented by 1-byte (0 or 1) +
  • int/long: Integers and longs are serialized zero compressed. +Represented as 1-byte if -120 <= value < 128. Otherwise, serialized as a +sequence of 2-5 bytes for ints, 2-9 bytes for longs. The first byte represents +the number of trailing bytes, N, as the negative number (-120-N). For example, +the number 1024 (0x400) is represented by the byte sequence 'x86 x04 x00'. +This doesn't help much for 4-byte integers but does a reasonably good job with +longs without bit twiddling. +
  • float/double: Serialized in IEEE 754 single and double precision +format in network byte order. This is the format used by Java. +
  • ustring: Serialized as 4-byte zero compressed length followed by +data encoded as UTF-8. Strings are normalized to UTF-8 regardless of native +language representation. +
  • buffer: Serialized as a 4-byte zero compressed length followed by the +raw bytes in the buffer. +
+ + +

CSV Serialization Format

+ +The CSV serialization format has a lot more structure than the "standard" +Excel CSV format, but we believe the additional structure is useful because + +
    +
  • it makes parsing a lot easier without detracting too much from legibility +
  • the delimiters around composites make it obvious when one is reading a +sequence of Hadoop records +
+ +Serialization formats for the various types are detailed in the grammar that +follows. The notable feature of the formats is the use of delimiters for +indicating the certain field types. + +
    +
  • A string field begins with a single quote ('). +
  • A buffer field begins with a sharp (#). +
  • A class, vector or map begins with 's{', 'v{' or 'm{' respectively and +ends with '}'. +
+ +The CSV format can be described by the following grammar: + +

+record = primitive / struct / vector / map
+primitive = boolean / int / long / float / double / ustring / buffer
+
+boolean = "T" / "F"
+int = ["-"] 1*DIGIT
+long = ";" ["-"] 1*DIGIT
+float = ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
+double = ";" ["-"] 1*DIGIT "." 1*DIGIT ["E" / "e" ["-"] 1*DIGIT]
+
+ustring = "'" *(UTF8 char except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
+
+buffer = "#" *(BYTE except NULL, LF, % and , / "%00" / "%0a" / "%25" / "%2c" )
+
+struct = "s{" record *("," record) "}"
+vector = "v{" [record *("," record)] "}"
+map = "m{" [*(record "," record)] "}"
+
+ +

XML Serialization Format

+ +The XML serialization format is the same used by Apache XML-RPC +(http://ws.apache.org/xmlrpc/types.html). This is an extension of the original +XML-RPC format and adds some additional data types. All record I/O types are +not directly expressible in this format, and access to a DDL is required in +order to convert these to valid types. All types primitive or composite are +represented by <value> elements. The particular XML-RPC type is +indicated by a nested element in the <value> element. The encoding for +records is always UTF-8. Primitive types are serialized as follows: + +
    +
  • byte: XML tag <ex:i1>. Values: 1-byte unsigned +integers represented in US-ASCII +
  • boolean: XML tag <boolean>. Values: "0" or "1" +
  • int: XML tags <i4> or <int>. Values: 4-byte +signed integers represented in US-ASCII. +
  • long: XML tag <ex:i8>. Values: 8-byte signed integers +represented in US-ASCII. +
  • float: XML tag <ex:float>. Values: Single precision +floating point numbers represented in US-ASCII. +
  • double: XML tag <double>. Values: Double precision +floating point numbers represented in US-ASCII. +
  • ustring: XML tag <;string>. Values: String values +represented as UTF-8. XML does not permit all Unicode characters in literal +data. In particular, NULLs and control chars are not allowed. Additionally, +XML processors are required to replace carriage returns with line feeds and to +replace CRLF sequences with line feeds. Programming languages that we work +with do not impose these restrictions on string types. To work around these +restrictions, disallowed characters and CRs are percent escaped in strings. +The '%' character is also percent escaped. +
  • buffer: XML tag <string&>. Values: Arbitrary binary +data. Represented as hexBinary, each byte is replaced by its 2-byte +hexadecimal representation. +
+ +Composite types are serialized as follows: + +
    +
  • class: XML tag <struct>. A struct is a sequence of +<member> elements. Each <member> element has a <name> +element and a <value> element. The <name> is a string that must +match /[a-zA-Z][a-zA-Z0-9_]*/. The value of the member is represented +by a <value> element. + +
  • vector: XML tag <array<. An <array> contains a +single <data> element. The <data> element is a sequence of +<value> elements each of which represents an element of the vector. + +
  • map: XML tag <array>. Same as vector. + +
+ +For example: + +

+class {
+  int           MY_INT;            // value 5
+  vector MY_VEC;            // values 0.1, -0.89, 2.45e4
+  buffer        MY_BUF;            // value '\00\n\tabc%'
+}
+
+ +is serialized as + +

+<value>
+  <struct>
+    <member>
+      <name>MY_INT</name>
+      <value><i4>5</i4></value>
+    </member>
+    <member>
+      <name>MY_VEC</name>
+      <value>
+        <array>
+          <data>
+            <value><ex:float>0.1</ex:float></value>
+            <value><ex:float>-0.89</ex:float></value>
+            <value><ex:float>2.45e4</ex:float></value>
+          </data>
+        </array>
+      </value>
+    </member>
+    <member>
+      <name>MY_BUF</name>
+      <value><string>%00\n\tabc%25</string></value>
+    </member>
+  </struct>
+</value> 
+
+ + + diff --git a/src/java/org/apache/hadoop/security/AccessControlException.java b/src/java/org/apache/hadoop/security/AccessControlException.java new file mode 100644 index 00000000000..d04c52948c8 --- /dev/null +++ b/src/java/org/apache/hadoop/security/AccessControlException.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security; + +/** + * An exception class for access control related issues. + */ +public class AccessControlException + extends org.apache.hadoop.fs.permission.AccessControlException { + + //Required by {@link java.io.Serializable}. + private static final long serialVersionUID = 1L; + + /** + * Default constructor is needed for unwrapping from + * {@link org.apache.hadoop.ipc.RemoteException}. + */ + public AccessControlException() { + super("Permission denied."); + } + + /** + * Constructs an {@link AccessControlException} + * with the specified detail message. + * @param s the detail message. + */ + public AccessControlException(String s) {super(s);} + + /** + * Constructs a new exception with the specified cause and a detail + * message of (cause==null ? null : cause.toString()) (which + * typically contains the class and detail message of cause). + * @param cause the cause (which is saved for later retrieval by the + * {@link #getCause()} method). (A null value is + * permitted, and indicates that the cause is nonexistent or + * unknown.) + */ + public AccessControlException(Throwable cause) { + super(cause); + } +} diff --git a/src/java/org/apache/hadoop/security/AccessKey.java b/src/java/org/apache/hadoop/security/AccessKey.java new file mode 100644 index 00000000000..81b6383381e --- /dev/null +++ b/src/java/org/apache/hadoop/security/AccessKey.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import javax.crypto.Mac; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableUtils; + +/** + * Key used for generating and verifying access tokens + */ +public class AccessKey implements Writable { + private long keyID; + private Text key; + private long expiryDate; + private Mac mac; + + public AccessKey() { + this(0L, new Text(), 0L); + } + + public AccessKey(long keyID, Text key, long expiryDate) { + this.keyID = keyID; + this.key = key; + this.expiryDate = expiryDate; + } + + public long getKeyID() { + return keyID; + } + + public Text getKey() { + return key; + } + + public long getExpiryDate() { + return expiryDate; + } + + public Mac getMac() { + return mac; + } + + public void setMac(Mac mac) { + this.mac = mac; + } + + static boolean isEqual(Object a, Object b) { + return a == null ? b == null : a.equals(b); + } + + /** {@inheritDoc} */ + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (obj instanceof AccessKey) { + AccessKey that = (AccessKey) obj; + return this.keyID == that.keyID && isEqual(this.key, that.key) + && this.expiryDate == that.expiryDate; + } + return false; + } + + /** {@inheritDoc} */ + public int hashCode() { + return key == null ? 0 : key.hashCode(); + } + + // /////////////////////////////////////////////// + // Writable + // /////////////////////////////////////////////// + /** + */ + public void write(DataOutput out) throws IOException { + WritableUtils.writeVLong(out, keyID); + key.write(out); + WritableUtils.writeVLong(out, expiryDate); + } + + /** + */ + public void readFields(DataInput in) throws IOException { + keyID = WritableUtils.readVLong(in); + key.readFields(in); + expiryDate = WritableUtils.readVLong(in); + } +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/security/AccessToken.java b/src/java/org/apache/hadoop/security/AccessToken.java new file mode 100644 index 00000000000..5a5d9a72f46 --- /dev/null +++ b/src/java/org/apache/hadoop/security/AccessToken.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; + +public class AccessToken implements Writable { + public static final AccessToken DUMMY_TOKEN = new AccessToken(); + private Text tokenID; + private Text tokenAuthenticator; + + public AccessToken() { + this(new Text(), new Text()); + } + + public AccessToken(Text tokenID, Text tokenAuthenticator) { + this.tokenID = tokenID; + this.tokenAuthenticator = tokenAuthenticator; + } + + public Text getTokenID() { + return tokenID; + } + + public Text getTokenAuthenticator() { + return tokenAuthenticator; + } + + static boolean isEqual(Object a, Object b) { + return a == null ? b == null : a.equals(b); + } + + /** {@inheritDoc} */ + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (obj instanceof AccessToken) { + AccessToken that = (AccessToken) obj; + return isEqual(this.tokenID, that.tokenID) + && isEqual(this.tokenAuthenticator, that.tokenAuthenticator); + } + return false; + } + + /** {@inheritDoc} */ + public int hashCode() { + return tokenAuthenticator == null ? 0 : tokenAuthenticator.hashCode(); + } + + // /////////////////////////////////////////////// + // Writable + // /////////////////////////////////////////////// + /** + */ + public void write(DataOutput out) throws IOException { + tokenID.write(out); + tokenAuthenticator.write(out); + } + + /** + */ + public void readFields(DataInput in) throws IOException { + tokenID.readFields(in); + tokenAuthenticator.readFields(in); + } + +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/security/AccessTokenHandler.java b/src/java/org/apache/hadoop/security/AccessTokenHandler.java new file mode 100644 index 00000000000..8ede2bb3104 --- /dev/null +++ b/src/java/org/apache/hadoop/security/AccessTokenHandler.java @@ -0,0 +1,289 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.security.NoSuchAlgorithmException; +import java.security.GeneralSecurityException; +import java.security.SecureRandom; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import javax.crypto.KeyGenerator; +import javax.crypto.Mac; +import javax.crypto.spec.SecretKeySpec; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableUtils; + +/** + * AccessTokenHandler can be instantiated in 2 modes, master mode and slave + * mode. Master can generate new access keys and export access keys to slaves, + * while slaves can only import and use access keys received from master. Both + * master and slave can generate and verify access tokens. Typically, master + * mode is used by NN and slave mode is used by DN. + */ +public class AccessTokenHandler { + private static final Log LOG = LogFactory.getLog(AccessTokenHandler.class); + public static final String STRING_ENABLE_ACCESS_TOKEN = "dfs.access.token.enable"; + public static final String STRING_ACCESS_KEY_UPDATE_INTERVAL = "dfs.access.key.update.interval"; + public static final String STRING_ACCESS_TOKEN_LIFETIME = "dfs.access.token.lifetime"; + + private final boolean isMaster; + /* + * keyUpdateInterval is the interval that NN updates its access keys. It + * should be set long enough so that all live DN's and Balancer should have + * sync'ed their access keys with NN at least once during each interval. + */ + private final long keyUpdateInterval; + private final long tokenLifetime; + private long serialNo = new SecureRandom().nextLong(); + private KeyGenerator keyGen; + private AccessKey currentKey; + private AccessKey nextKey; + private Map allKeys; + + public static enum AccessMode { + READ, WRITE, COPY, REPLACE + }; + + /** + * Constructor + * + * @param isMaster + * @param keyUpdateInterval + * @param tokenLifetime + * @throws IOException + */ + public AccessTokenHandler(boolean isMaster, long keyUpdateInterval, + long tokenLifetime) throws IOException { + this.isMaster = isMaster; + this.keyUpdateInterval = keyUpdateInterval; + this.tokenLifetime = tokenLifetime; + this.allKeys = new HashMap(); + if (isMaster) { + try { + generateKeys(); + initMac(currentKey); + } catch (GeneralSecurityException e) { + throw (IOException) new IOException( + "Failed to create AccessTokenHandler").initCause(e); + } + } + } + + /** Initialize access keys */ + private synchronized void generateKeys() throws NoSuchAlgorithmException { + keyGen = KeyGenerator.getInstance("HmacSHA1"); + /* + * Need to set estimated expiry dates for currentKey and nextKey so that if + * NN crashes, DN can still expire those keys. NN will stop using the newly + * generated currentKey after the first keyUpdateInterval, however it may + * still be used by DN and Balancer to generate new tokens before they get a + * chance to sync their keys with NN. Since we require keyUpdInterval to be + * long enough so that all live DN's and Balancer will sync their keys with + * NN at least once during the period, the estimated expiry date for + * currentKey is set to now() + 2 * keyUpdateInterval + tokenLifetime. + * Similarly, the estimated expiry date for nextKey is one keyUpdateInterval + * more. + */ + serialNo++; + currentKey = new AccessKey(serialNo, new Text(keyGen.generateKey() + .getEncoded()), System.currentTimeMillis() + 2 * keyUpdateInterval + + tokenLifetime); + serialNo++; + nextKey = new AccessKey(serialNo, new Text(keyGen.generateKey() + .getEncoded()), System.currentTimeMillis() + 3 * keyUpdateInterval + + tokenLifetime); + allKeys.put(currentKey.getKeyID(), currentKey); + allKeys.put(nextKey.getKeyID(), nextKey); + } + + /** Initialize Mac function */ + private synchronized void initMac(AccessKey key) throws IOException { + try { + Mac mac = Mac.getInstance("HmacSHA1"); + mac.init(new SecretKeySpec(key.getKey().getBytes(), "HmacSHA1")); + key.setMac(mac); + } catch (GeneralSecurityException e) { + throw (IOException) new IOException( + "Failed to initialize Mac for access key, keyID=" + key.getKeyID()) + .initCause(e); + } + } + + /** Export access keys, only to be used in master mode */ + public synchronized ExportedAccessKeys exportKeys() { + if (!isMaster) + return null; + if (LOG.isDebugEnabled()) + LOG.debug("Exporting access keys"); + return new ExportedAccessKeys(true, keyUpdateInterval, tokenLifetime, + currentKey, allKeys.values().toArray(new AccessKey[0])); + } + + private synchronized void removeExpiredKeys() { + long now = System.currentTimeMillis(); + for (Iterator> it = allKeys.entrySet() + .iterator(); it.hasNext();) { + Map.Entry e = it.next(); + if (e.getValue().getExpiryDate() < now) { + it.remove(); + } + } + } + + /** + * Set access keys, only to be used in slave mode + */ + public synchronized void setKeys(ExportedAccessKeys exportedKeys) + throws IOException { + if (isMaster || exportedKeys == null) + return; + LOG.info("Setting access keys"); + removeExpiredKeys(); + this.currentKey = exportedKeys.getCurrentKey(); + initMac(currentKey); + AccessKey[] receivedKeys = exportedKeys.getAllKeys(); + for (int i = 0; i < receivedKeys.length; i++) { + if (receivedKeys[i] == null) + continue; + this.allKeys.put(receivedKeys[i].getKeyID(), receivedKeys[i]); + } + } + + /** + * Update access keys, only to be used in master mode + */ + public synchronized void updateKeys() throws IOException { + if (!isMaster) + return; + LOG.info("Updating access keys"); + removeExpiredKeys(); + // set final expiry date of retiring currentKey + allKeys.put(currentKey.getKeyID(), new AccessKey(currentKey.getKeyID(), + currentKey.getKey(), System.currentTimeMillis() + keyUpdateInterval + + tokenLifetime)); + // update the estimated expiry date of new currentKey + currentKey = new AccessKey(nextKey.getKeyID(), nextKey.getKey(), System + .currentTimeMillis() + + 2 * keyUpdateInterval + tokenLifetime); + initMac(currentKey); + allKeys.put(currentKey.getKeyID(), currentKey); + // generate a new nextKey + serialNo++; + nextKey = new AccessKey(serialNo, new Text(keyGen.generateKey() + .getEncoded()), System.currentTimeMillis() + 3 * keyUpdateInterval + + tokenLifetime); + allKeys.put(nextKey.getKeyID(), nextKey); + } + + /** Check if token is well formed */ + private synchronized Boolean verifyToken(long keyID, AccessToken token) + throws IOException { + AccessKey key = allKeys.get(keyID); + if (key == null) { + LOG.warn("Access key for keyID=" + keyID + " doesn't exist."); + return false; + } + if (key.getMac() == null) { + initMac(key); + } + Text tokenID = token.getTokenID(); + Text authenticator = new Text(key.getMac().doFinal(tokenID.getBytes())); + return authenticator.equals(token.getTokenAuthenticator()); + } + + /** Generate an access token for current user */ + public AccessToken generateToken(long blockID, EnumSet modes) + throws IOException { + UserGroupInformation ugi = UserGroupInformation.getCurrentUGI(); + String userID = (ugi == null ? null : ugi.getUserName()); + return generateToken(userID, blockID, modes); + } + + /** Generate an access token for a specified user */ + public synchronized AccessToken generateToken(String userID, long blockID, + EnumSet modes) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Generating access token for user=" + userID + ", blockID=" + + blockID + ", access modes=" + modes + ", keyID=" + + currentKey.getKeyID()); + } + if (modes == null || modes.isEmpty()) + throw new IOException("access modes can't be null or empty"); + ByteArrayOutputStream buf = new ByteArrayOutputStream(4096); + DataOutputStream out = new DataOutputStream(buf); + WritableUtils.writeVLong(out, System.currentTimeMillis() + tokenLifetime); + WritableUtils.writeVLong(out, currentKey.getKeyID()); + WritableUtils.writeString(out, userID); + WritableUtils.writeVLong(out, blockID); + WritableUtils.writeVInt(out, modes.size()); + for (AccessMode aMode : modes) { + WritableUtils.writeEnum(out, aMode); + } + Text tokenID = new Text(buf.toByteArray()); + return new AccessToken(tokenID, new Text(currentKey.getMac().doFinal( + tokenID.getBytes()))); + } + + /** Check if access should be allowed. userID is not checked if null */ + public Boolean checkAccess(AccessToken token, String userID, long blockID, + AccessMode mode) throws IOException { + long oExpiry = 0; + long oKeyID = 0; + String oUserID = null; + long oBlockID = 0; + EnumSet oModes = EnumSet.noneOf(AccessMode.class); + + try { + ByteArrayInputStream buf = new ByteArrayInputStream(token.getTokenID() + .getBytes()); + DataInputStream in = new DataInputStream(buf); + oExpiry = WritableUtils.readVLong(in); + oKeyID = WritableUtils.readVLong(in); + oUserID = WritableUtils.readString(in); + oBlockID = WritableUtils.readVLong(in); + int length = WritableUtils.readVInt(in); + for (int i = 0; i < length; ++i) { + oModes.add(WritableUtils.readEnum(in, AccessMode.class)); + } + } catch (IOException e) { + throw (IOException) new IOException( + "Unable to parse access token for user=" + userID + ", blockID=" + + blockID + ", access mode=" + mode).initCause(e); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Verifying access token for user=" + userID + ", blockID=" + + blockID + ", access mode=" + mode + ", keyID=" + oKeyID); + } + return (userID == null || userID.equals(oUserID)) && oBlockID == blockID + && System.currentTimeMillis() < oExpiry && oModes.contains(mode) + && verifyToken(oKeyID, token); + } + +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/security/ExportedAccessKeys.java b/src/java/org/apache/hadoop/security/ExportedAccessKeys.java new file mode 100644 index 00000000000..e5ab2934b4b --- /dev/null +++ b/src/java/org/apache/hadoop/security/ExportedAccessKeys.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableFactories; +import org.apache.hadoop.io.WritableFactory; + +/** + * Object for passing access keys + */ +public class ExportedAccessKeys implements Writable { + public static final ExportedAccessKeys DUMMY_KEYS = new ExportedAccessKeys(); + private boolean isAccessTokenEnabled; + private long keyUpdateInterval; + private long tokenLifetime; + private AccessKey currentKey; + private AccessKey[] allKeys; + + public ExportedAccessKeys() { + this(false, 0, 0, new AccessKey(), new AccessKey[0]); + } + + ExportedAccessKeys(boolean isAccessTokenEnabled, long keyUpdateInterval, + long tokenLifetime, AccessKey currentKey, AccessKey[] allKeys) { + this.isAccessTokenEnabled = isAccessTokenEnabled; + this.keyUpdateInterval = keyUpdateInterval; + this.tokenLifetime = tokenLifetime; + this.currentKey = currentKey; + this.allKeys = allKeys; + } + + public boolean isAccessTokenEnabled() { + return isAccessTokenEnabled; + } + + public long getKeyUpdateInterval() { + return keyUpdateInterval; + } + + public long getTokenLifetime() { + return tokenLifetime; + } + + public AccessKey getCurrentKey() { + return currentKey; + } + + public AccessKey[] getAllKeys() { + return allKeys; + } + + static boolean isEqual(Object a, Object b) { + return a == null ? b == null : a.equals(b); + } + + /** {@inheritDoc} */ + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (obj instanceof ExportedAccessKeys) { + ExportedAccessKeys that = (ExportedAccessKeys) obj; + return this.isAccessTokenEnabled == that.isAccessTokenEnabled + && this.keyUpdateInterval == that.keyUpdateInterval + && this.tokenLifetime == that.tokenLifetime + && isEqual(this.currentKey, that.currentKey) + && Arrays.equals(this.allKeys, that.allKeys); + } + return false; + } + + /** {@inheritDoc} */ + public int hashCode() { + return currentKey == null ? 0 : currentKey.hashCode(); + } + + // /////////////////////////////////////////////// + // Writable + // /////////////////////////////////////////////// + static { // register a ctor + WritableFactories.setFactory(ExportedAccessKeys.class, + new WritableFactory() { + public Writable newInstance() { + return new ExportedAccessKeys(); + } + }); + } + + /** + */ + public void write(DataOutput out) throws IOException { + out.writeBoolean(isAccessTokenEnabled); + out.writeLong(keyUpdateInterval); + out.writeLong(tokenLifetime); + currentKey.write(out); + out.writeInt(allKeys.length); + for (int i = 0; i < allKeys.length; i++) { + allKeys[i].write(out); + } + } + + /** + */ + public void readFields(DataInput in) throws IOException { + isAccessTokenEnabled = in.readBoolean(); + keyUpdateInterval = in.readLong(); + tokenLifetime = in.readLong(); + currentKey.readFields(in); + this.allKeys = new AccessKey[in.readInt()]; + for (int i = 0; i < allKeys.length; i++) { + allKeys[i] = new AccessKey(); + allKeys[i].readFields(in); + } + } + +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/security/Group.java b/src/java/org/apache/hadoop/security/Group.java new file mode 100644 index 00000000000..2bb8caad8f7 --- /dev/null +++ b/src/java/org/apache/hadoop/security/Group.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security; + +import java.security.Principal; + +/** + * A group to which a user belongs to. + */ +public class Group implements Principal { + final String group; + + /** + * Create a new Group with the given groupname. + * @param group group name + */ + public Group(String group) { + this.group = group; + } + + @Override + public String getName() { + return group; + } + + @Override + public String toString() { + return group; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((group == null) ? 0 : group.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Group other = (Group) obj; + if (group == null) { + if (other.group != null) + return false; + } else if (!group.equals(other.group)) + return false; + return true; + } +} diff --git a/src/java/org/apache/hadoop/security/InvalidAccessTokenException.java b/src/java/org/apache/hadoop/security/InvalidAccessTokenException.java new file mode 100644 index 00000000000..eabce15ea3b --- /dev/null +++ b/src/java/org/apache/hadoop/security/InvalidAccessTokenException.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security; + +import java.io.IOException; + +/** + * Access token verification failed. + */ +public class InvalidAccessTokenException extends IOException { + private static final long serialVersionUID = 168L; + + public InvalidAccessTokenException() { + super(); + } + + public InvalidAccessTokenException(String msg) { + super(msg); + } +} diff --git a/src/java/org/apache/hadoop/security/PermissionChecker.java b/src/java/org/apache/hadoop/security/PermissionChecker.java new file mode 100644 index 00000000000..ea8246f5132 --- /dev/null +++ b/src/java/org/apache/hadoop/security/PermissionChecker.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security; + +import java.io.IOException; +import java.util.*; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; + +/** Perform permission checking. */ +public class PermissionChecker { + static final Log LOG = LogFactory.getLog(UserGroupInformation.class); + + public final String user; + protected final Set groups = new HashSet(); + public final boolean isSuper; + + /** + * Checks if the caller has the required permission. + * @param owner username of the owner + * @param supergroup supergroup that the owner belongs to + */ + public PermissionChecker(String owner, String supergroup + ) throws AccessControlException{ + UserGroupInformation ugi = UserGroupInformation.getCurrentUGI(); + if (LOG.isDebugEnabled()) { + LOG.debug("ugi=" + ugi); + } + + if (ugi != null) { + user = ugi.getUserName(); + groups.addAll(Arrays.asList(ugi.getGroupNames())); + isSuper = user.equals(owner) || groups.contains(supergroup); + } + else { + throw new AccessControlException("ugi = null"); + } + } + + /** + * Check if the callers group contains the required values. + * @param group group to check + */ + public boolean containsGroup(String group) {return groups.contains(group);} + + /** + * Verify if the caller has the required permission. This will result into + * an exception if the caller is not allowed to access the resource. + * @param owner owner of the system + * @param supergroup supergroup of the system + */ + public static void checkSuperuserPrivilege(UserGroupInformation owner, + String supergroup) + throws AccessControlException { + PermissionChecker checker = + new PermissionChecker(owner.getUserName(), supergroup); + if (!checker.isSuper) { + throw new AccessControlException("Access denied for user " + + checker.user + ". Superuser privilege is required"); + } + } +} diff --git a/src/java/org/apache/hadoop/security/SecurityUtil.java b/src/java/org/apache/hadoop/security/SecurityUtil.java new file mode 100644 index 00000000000..94b68254c71 --- /dev/null +++ b/src/java/org/apache/hadoop/security/SecurityUtil.java @@ -0,0 +1,159 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security; + +import java.security.Policy; +import java.security.Principal; +import java.util.HashSet; +import java.util.Set; +import java.util.TreeSet; + +import javax.security.auth.Subject; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.authorize.ConfiguredPolicy; +import org.apache.hadoop.security.authorize.PolicyProvider; + +public class SecurityUtil { + + private static final Log LOG = LogFactory.getLog(SecurityUtil.class); + + static { + // Set an empty default policy + setPolicy(new ConfiguredPolicy(new Configuration(), + PolicyProvider.DEFAULT_POLICY_PROVIDER)); + } + + /** + * Set the global security policy for Hadoop. + * + * @param policy {@link Policy} used for authorization. + */ + public static void setPolicy(Policy policy) { + if (LOG.isDebugEnabled()) { + LOG.debug("Setting Hadoop security policy"); + } + Policy.setPolicy(policy); + } + + /** + * Get the current global security policy for Hadoop. + * @return the current {@link Policy} + */ + public static Policy getPolicy() { + return Policy.getPolicy(); + } + + /** + * Get the {@link Subject} for the user identified by ugi. + * @param ugi user + * @return the {@link Subject} for the user identified by ugi + */ + public static Subject getSubject(UserGroupInformation ugi) { + if (ugi == null) { + return null; + } + + Set principals = // Number of principals = username + #groups + new HashSet(ugi.getGroupNames().length+1); + User userPrincipal = new User(ugi.getUserName()); + principals.add(userPrincipal); + for (String group : ugi.getGroupNames()) { + Group groupPrincipal = new Group(group); + principals.add(groupPrincipal); + } + principals.add(ugi); + Subject user = + new Subject(false, principals, new HashSet(), new HashSet()); + + return user; + } + + /** + * Class representing a configured access control list. + */ + public static class AccessControlList { + + // Indicates an ACL string that represents access to all users + public static final String WILDCARD_ACL_VALUE = "*"; + + // Set of users who are granted access. + private Set users; + // Set of groups which are granted access + private Set groups; + // Whether all users are granted access. + private boolean allAllowed; + + /** + * Construct a new ACL from a String representation of the same. + * + * The String is a a comma separated list of users and groups. + * The user list comes first and is separated by a space followed + * by the group list. For e.g. "user1,user2 group1,group2" + * + * @param aclString String representation of the ACL + */ + public AccessControlList(String aclString) { + users = new TreeSet(); + groups = new TreeSet(); + if (aclString.contains(WILDCARD_ACL_VALUE) && + aclString.trim().equals(WILDCARD_ACL_VALUE)) { + allAllowed = true; + } else { + String[] userGroupStrings = aclString.split(" ", 2); + + if (userGroupStrings.length >= 1) { + String[] usersStr = userGroupStrings[0].split(","); + if (usersStr.length >= 1) { + addToSet(users, usersStr); + } + } + + if (userGroupStrings.length == 2) { + String[] groupsStr = userGroupStrings[1].split(","); + if (groupsStr.length >= 1) { + addToSet(groups, groupsStr); + } + } + } + } + + public boolean allAllowed() { + return allAllowed; + } + + public Set getUsers() { + return users; + } + + public Set getGroups() { + return groups; + } + + private static final void addToSet(Set set, String[] strings) { + for (String s : strings) { + s = s.trim(); + if (s.length() > 0) { + set.add(s); + } + } + } + } +} diff --git a/src/java/org/apache/hadoop/security/UnixUserGroupInformation.java b/src/java/org/apache/hadoop/security/UnixUserGroupInformation.java new file mode 100644 index 00000000000..62cbb659869 --- /dev/null +++ b/src/java/org/apache/hadoop/security/UnixUserGroupInformation.java @@ -0,0 +1,432 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.StringTokenizer; +import java.util.TreeSet; + +import javax.security.auth.login.LoginException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableUtils; + +/** An implementation of UserGroupInformation in the Unix system */ +public class UnixUserGroupInformation extends UserGroupInformation { + public static final String DEFAULT_USERNAME = "DrWho"; + public static final String DEFAULT_GROUP = "Tardis"; + + final static public String UGI_PROPERTY_NAME = "hadoop.job.ugi"; + final static private HashMap user2UGIMap = + new HashMap(); + + /** Create an immutable {@link UnixUserGroupInformation} object. */ + public static UnixUserGroupInformation createImmutable(String[] ugi) { + return new UnixUserGroupInformation(ugi) { + public void readFields(DataInput in) throws IOException { + throw new UnsupportedOperationException(); + } + }; + } + + private String userName; + private String[] groupNames; + + /** Default constructor + */ + public UnixUserGroupInformation() { + } + + /** Constructor with parameters user name and its group names. + * The first entry in the groups list is the default group. + * + * @param userName a user's name + * @param groupNames groups list, first of which is the default group + * @exception IllegalArgumentException if any argument is null + */ + public UnixUserGroupInformation(String userName, String[] groupNames) { + setUserGroupNames(userName, groupNames); + } + + /** Constructor with parameter user/group names + * + * @param ugi an array containing user/group names, the first + * element of which is the user name, the second of + * which is the default group name. + * @exception IllegalArgumentException if the array size is less than 2 + * or any element is null. + */ + public UnixUserGroupInformation(String[] ugi) { + if (ugi==null || ugi.length < 2) { + throw new IllegalArgumentException( "Parameter does contain at least "+ + "one user name and one group name"); + } + String[] groupNames = new String[ugi.length-1]; + System.arraycopy(ugi, 1, groupNames, 0, groupNames.length); + setUserGroupNames(ugi[0], groupNames); + } + + /* Set this object's user name and group names + * + * @param userName a user's name + * @param groupNames groups list, the first of which is the default group + * @exception IllegalArgumentException if any argument is null + */ + private void setUserGroupNames(String userName, String[] groupNames) { + if (userName==null || userName.length()==0 || + groupNames== null || groupNames.length==0) { + throw new IllegalArgumentException( + "Parameters should not be null or an empty string/array"); + } + for (int i=0; iugi as a comma separated string in + * conf as a property attr + * + * The String starts with the user name followed by the default group names, + * and other group names. + * + * @param conf configuration + * @param attr property name + * @param ugi a UnixUserGroupInformation + */ + public static void saveToConf(Configuration conf, String attr, + UnixUserGroupInformation ugi ) { + conf.set(attr, ugi.toString()); + } + + /** Read a UGI from the given conf + * + * The object is expected to store with the property name attr + * as a comma separated string that starts + * with the user name followed by group names. + * If the property name is not defined, return null. + * It's assumed that there is only one UGI per user. If this user already + * has a UGI in the ugi map, return the ugi in the map. + * Otherwise, construct a UGI from the configuration, store it in the + * ugi map and return it. + * + * @param conf configuration + * @param attr property name + * @return a UnixUGI + * @throws LoginException if the stored string is ill-formatted. + */ + public static UnixUserGroupInformation readFromConf( + Configuration conf, String attr) throws LoginException { + String[] ugi = conf.getStrings(attr); + if(ugi == null) { + return null; + } + UnixUserGroupInformation currentUGI = null; + if (ugi.length>0 ){ + currentUGI = user2UGIMap.get(ugi[0]); + } + if (currentUGI == null) { + try { + currentUGI = new UnixUserGroupInformation(ugi); + user2UGIMap.put(currentUGI.getUserName(), currentUGI); + } catch (IllegalArgumentException e) { + throw new LoginException("Login failed: "+e.getMessage()); + } + } + + return currentUGI; + } + + /** + * Get current user's name and the names of all its groups from Unix. + * It's assumed that there is only one UGI per user. If this user already + * has a UGI in the ugi map, return the ugi in the map. + * Otherwise get the current user's information from Unix, store it + * in the map, and return it. + * + * If the current user's UNIX username or groups are configured in such a way + * to throw an Exception, for example if the user uses LDAP, then this method + * will use a the {@link #DEFAULT_USERNAME} and {@link #DEFAULT_GROUP} + * constants. + */ + public static UnixUserGroupInformation login() throws LoginException { + try { + String userName; + + // if an exception occurs, then uses the + // default user + try { + userName = getUnixUserName(); + } catch (Exception e) { + userName = DEFAULT_USERNAME; + } + + // check if this user already has a UGI object in the ugi map + UnixUserGroupInformation ugi = user2UGIMap.get(userName); + if (ugi != null) { + return ugi; + } + + /* get groups list from UNIX. + * It's assumed that the first group is the default group. + */ + String[] groupNames; + + // if an exception occurs, then uses the + // default group + try { + groupNames = getUnixGroups(); + } catch (Exception e) { + groupNames = new String[1]; + groupNames[0] = DEFAULT_GROUP; + } + + // construct a Unix UGI + ugi = new UnixUserGroupInformation(userName, groupNames); + user2UGIMap.put(ugi.getUserName(), ugi); + return ugi; + } catch (Exception e) { + throw new LoginException("Login failed: "+e.getMessage()); + } + } + + /** Equivalent to login(conf, false). */ + public static UnixUserGroupInformation login(Configuration conf) + throws LoginException { + return login(conf, false); + } + + /** Get a user's name & its group names from the given configuration; + * If it is not defined in the configuration, get the current user's + * information from Unix. + * If the user has a UGI in the ugi map, return the one in + * the UGI map. + * + * @param conf either a job configuration or client's configuration + * @param save saving it to conf? + * @return UnixUserGroupInformation a user/group information + * @exception LoginException if not able to get the user/group information + */ + public static UnixUserGroupInformation login(Configuration conf, boolean save + ) throws LoginException { + UnixUserGroupInformation ugi = readFromConf(conf, UGI_PROPERTY_NAME); + if (ugi == null) { + ugi = login(); + LOG.debug("Unix Login: " + ugi); + if (save) { + saveToConf(conf, UGI_PROPERTY_NAME, ugi); + } + } + return ugi; + } + + /* Return a string representation of a string array. + * Two strings are separated by a blank. + */ + private static String toString(String[] strArray) { + if (strArray==null || strArray.length==0) { + return ""; + } + StringBuilder buf = new StringBuilder(strArray[0]); + for (int i=1; i0 && !groupNames[0].equals(otherUGI.groupNames[0])) { + return false; + } + // check all group names, ignoring the order + return new TreeSet(Arrays.asList(groupNames)).equals( + new TreeSet(Arrays.asList(otherUGI.groupNames))); + } + + /** Returns a hash code for this UGI. + * The hash code for a UGI is the hash code of its user name string. + * + * @return a hash code value for this UGI. + */ + public int hashCode() { + return getUserName().hashCode(); + } + + /** Convert this object to a string + * + * @return a comma separated string containing the user name and group names + */ + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append(userName); + for (String groupName : groupNames) { + buf.append(','); + buf.append(groupName); + } + return buf.toString(); + } + + @Override + public String getName() { + return toString(); + } +} diff --git a/src/java/org/apache/hadoop/security/User.java b/src/java/org/apache/hadoop/security/User.java new file mode 100644 index 00000000000..dd62debcf8d --- /dev/null +++ b/src/java/org/apache/hadoop/security/User.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security; + +import java.security.Principal; + +/** + * The username of a user. + */ +public class User implements Principal { + final String user; + + /** + * Create a new User with the given username. + * @param user user name + */ + public User(String user) { + this.user = user; + } + + @Override + public String getName() { + return user; + } + + @Override + public String toString() { + return user; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((user == null) ? 0 : user.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + User other = (User) obj; + if (user == null) { + if (other.user != null) + return false; + } else if (!user.equals(other.user)) + return false; + return true; + } +} diff --git a/src/java/org/apache/hadoop/security/UserGroupInformation.java b/src/java/org/apache/hadoop/security/UserGroupInformation.java new file mode 100644 index 00000000000..ada9dcf2958 --- /dev/null +++ b/src/java/org/apache/hadoop/security/UserGroupInformation.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security; + +import java.io.IOException; +import java.security.AccessController; +import java.security.Principal; +import java.util.Set; + +import javax.security.auth.Subject; +import javax.security.auth.login.LoginException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Writable; + +/** A {@link Writable} abstract class for storing user and groups information. + */ +public abstract class UserGroupInformation implements Writable, Principal { + public static final Log LOG = LogFactory.getLog(UserGroupInformation.class); + private static UserGroupInformation LOGIN_UGI = null; + + private static final ThreadLocal currentUser = + new ThreadLocal(); + + /** @return the {@link UserGroupInformation} for the current thread */ + public static UserGroupInformation getCurrentUGI() { + Subject user = getCurrentUser(); + + if (user == null) { + user = currentUser.get(); + if (user == null) { + return null; + } + } + + Set ugiPrincipals = + user.getPrincipals(UserGroupInformation.class); + + UserGroupInformation ugi = null; + if (ugiPrincipals != null && ugiPrincipals.size() == 1) { + ugi = ugiPrincipals.iterator().next(); + if (ugi == null) { + throw new RuntimeException("Cannot find _current user_ UGI in the Subject!"); + } + } else { + throw new RuntimeException("Cannot resolve current user from subject, " + + "which had " + ugiPrincipals.size() + + " UGI principals!"); + } + return ugi; + } + + /** + * Set the {@link UserGroupInformation} for the current thread + * @deprecated Use {@link #setCurrentUser(UserGroupInformation)} + */ + @Deprecated + public static void setCurrentUGI(UserGroupInformation ugi) { + setCurrentUser(ugi); + } + + /** + * Return the current user Subject. + * @return the current user Subject + */ + static Subject getCurrentUser() { + return Subject.getSubject(AccessController.getContext()); + } + + /** + * Set the {@link UserGroupInformation} for the current thread + * WARNING - This method should be used only in test cases and other exceptional + * cases! + * @param ugi {@link UserGroupInformation} for the current thread + */ + public static void setCurrentUser(UserGroupInformation ugi) { + Subject user = SecurityUtil.getSubject(ugi); + currentUser.set(user); + } + + /** Get username + * + * @return the user's name + */ + public abstract String getUserName(); + + /** Get the name of the groups that the user belong to + * + * @return an array of group names + */ + public abstract String[] getGroupNames(); + + /** Login and return a UserGroupInformation object. */ + public static UserGroupInformation login(Configuration conf + ) throws LoginException { + if (LOGIN_UGI == null) { + LOGIN_UGI = UnixUserGroupInformation.login(conf); + } + return LOGIN_UGI; + } + + /** Read a {@link UserGroupInformation} from conf */ + public static UserGroupInformation readFrom(Configuration conf + ) throws IOException { + try { + return UnixUserGroupInformation.readFromConf(conf, + UnixUserGroupInformation.UGI_PROPERTY_NAME); + } catch (LoginException e) { + throw (IOException)new IOException().initCause(e); + } + } +} diff --git a/src/java/org/apache/hadoop/security/authorize/AuthorizationException.java b/src/java/org/apache/hadoop/security/authorize/AuthorizationException.java new file mode 100644 index 00000000000..c001a2dd6f5 --- /dev/null +++ b/src/java/org/apache/hadoop/security/authorize/AuthorizationException.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.authorize; + +import java.io.PrintStream; +import java.io.PrintWriter; + +import org.apache.hadoop.security.AccessControlException; + +/** + * An exception class for authorization-related issues. + * + * This class does not provide the stack trace for security purposes. + */ +public class AuthorizationException extends AccessControlException { + private static final long serialVersionUID = 1L; + + public AuthorizationException() { + super(); + } + + public AuthorizationException(String message) { + super(message); + } + + /** + * Constructs a new exception with the specified cause and a detail + * message of (cause==null ? null : cause.toString()) (which + * typically contains the class and detail message of cause). + * @param cause the cause (which is saved for later retrieval by the + * {@link #getCause()} method). (A null value is + * permitted, and indicates that the cause is nonexistent or + * unknown.) + */ + public AuthorizationException(Throwable cause) { + super(cause); + } + + private static StackTraceElement[] stackTrace = new StackTraceElement[0]; + @Override + public StackTraceElement[] getStackTrace() { + // Do not provide the stack-trace + return stackTrace; + } + + @Override + public void printStackTrace() { + // Do not provide the stack-trace + } + + @Override + public void printStackTrace(PrintStream s) { + // Do not provide the stack-trace + } + + @Override + public void printStackTrace(PrintWriter s) { + // Do not provide the stack-trace + } + +} diff --git a/src/java/org/apache/hadoop/security/authorize/ConfiguredPolicy.java b/src/java/org/apache/hadoop/security/authorize/ConfiguredPolicy.java new file mode 100644 index 00000000000..6b90829aa85 --- /dev/null +++ b/src/java/org/apache/hadoop/security/authorize/ConfiguredPolicy.java @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.authorize; + +import java.security.Permission; +import java.security.PermissionCollection; +import java.security.Policy; +import java.security.Principal; +import java.security.ProtectionDomain; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.Group; +import org.apache.hadoop.security.User; +import org.apache.hadoop.security.SecurityUtil.AccessControlList; + +/** + * A {@link Configuration} based security {@link Policy} for Hadoop. + * + * {@link ConfiguredPolicy} works in conjunction with a {@link PolicyProvider} + * for providing service-level authorization for Hadoop. + */ +public class ConfiguredPolicy extends Policy implements Configurable { + public static final String HADOOP_POLICY_FILE = "hadoop-policy.xml"; + private static final Log LOG = LogFactory.getLog(ConfiguredPolicy.class); + + private Configuration conf; + private PolicyProvider policyProvider; + private volatile Map> permissions; + private volatile Set allowedPermissions; + + public ConfiguredPolicy(Configuration conf, PolicyProvider policyProvider) { + this.conf = conf; + this.policyProvider = policyProvider; + refresh(); + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + refresh(); + } + + @Override + public boolean implies(ProtectionDomain domain, Permission permission) { + // Only make checks for domains having principals + if(domain.getPrincipals().length == 0) { + return true; + } + + return super.implies(domain, permission); + } + + @Override + public PermissionCollection getPermissions(ProtectionDomain domain) { + PermissionCollection permissionCollection = super.getPermissions(domain); + for (Principal principal : domain.getPrincipals()) { + Set principalPermissions = permissions.get(principal); + if (principalPermissions != null) { + for (Permission permission : principalPermissions) { + permissionCollection.add(permission); + } + } + + for (Permission permission : allowedPermissions) { + permissionCollection.add(permission); + } + } + return permissionCollection; + } + + @Override + public void refresh() { + // Get the system property 'hadoop.policy.file' + String policyFile = + System.getProperty("hadoop.policy.file", HADOOP_POLICY_FILE); + + // Make a copy of the original config, and load the policy file + Configuration policyConf = new Configuration(conf); + policyConf.addResource(policyFile); + + Map> newPermissions = + new HashMap>(); + Set newAllowPermissions = new HashSet(); + + // Parse the config file + Service[] services = policyProvider.getServices(); + if (services != null) { + for (Service service : services) { + AccessControlList acl = + new AccessControlList( + policyConf.get(service.getServiceKey(), + AccessControlList.WILDCARD_ACL_VALUE) + ); + + if (acl.allAllowed()) { + newAllowPermissions.add(service.getPermission()); + if (LOG.isDebugEnabled()) { + LOG.debug("Policy - " + service.getPermission() + " * "); + } + } else { + for (String user : acl.getUsers()) { + addPermission(newPermissions, new User(user), service.getPermission()); + } + + for (String group : acl.getGroups()) { + addPermission(newPermissions, new Group(group), service.getPermission()); + } + } + } + } + + // Flip to the newly parsed permissions + allowedPermissions = newAllowPermissions; + permissions = newPermissions; + } + + private void addPermission(Map> permissions, + Principal principal, Permission permission) { + Set principalPermissions = permissions.get(principal); + if (principalPermissions == null) { + principalPermissions = new HashSet(); + permissions.put(principal, principalPermissions); + } + principalPermissions.add(permission); + if (LOG.isDebugEnabled()) { + LOG.debug("Policy - Adding " + permission + " to " + principal); + } + } +} diff --git a/src/java/org/apache/hadoop/security/authorize/ConnectionPermission.java b/src/java/org/apache/hadoop/security/authorize/ConnectionPermission.java new file mode 100644 index 00000000000..7099f0ee2c3 --- /dev/null +++ b/src/java/org/apache/hadoop/security/authorize/ConnectionPermission.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.authorize; + +import java.security.Permission; + +import org.apache.hadoop.ipc.VersionedProtocol; + +/** + * {@link Permission} to initiate a connection to a given service. + */ +public class ConnectionPermission extends Permission { + + private static final long serialVersionUID = 1L; + private final Class protocol; + + /** + * {@link ConnectionPermission} for a given service. + * @param protocol service to be accessed + */ + public ConnectionPermission(Class protocol) { + super(protocol.getName()); + this.protocol = protocol; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof ConnectionPermission) { + return protocol == ((ConnectionPermission)obj).protocol; + } + return false; + } + + @Override + public String getActions() { + return "ALLOW"; + } + + @Override + public int hashCode() { + return protocol.hashCode(); + } + + @Override + public boolean implies(Permission permission) { + if (permission instanceof ConnectionPermission) { + ConnectionPermission that = (ConnectionPermission)permission; + if (that.protocol.equals(VersionedProtocol.class)) { + return true; + } + return this.protocol.equals(that.protocol); + } + return false; + } + + public String toString() { + return "ConnectionPermission(" + protocol.getName() + ")"; + } +} diff --git a/src/java/org/apache/hadoop/security/authorize/PolicyProvider.java b/src/java/org/apache/hadoop/security/authorize/PolicyProvider.java new file mode 100644 index 00000000000..fb75b7f5818 --- /dev/null +++ b/src/java/org/apache/hadoop/security/authorize/PolicyProvider.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.authorize; + +import java.security.Policy; + +/** + * {@link PolicyProvider} provides the {@link Service} definitions to the + * security {@link Policy} in effect for Hadoop. + * + */ +public abstract class PolicyProvider { + + /** + * Configuration key for the {@link PolicyProvider} implementation. + */ + public static final String POLICY_PROVIDER_CONFIG = + "hadoop.security.authorization.policyprovider"; + + /** + * A default {@link PolicyProvider} without any defined services. + */ + public static final PolicyProvider DEFAULT_POLICY_PROVIDER = + new PolicyProvider() { + public Service[] getServices() { + return null; + } + }; + + /** + * Get the {@link Service} definitions from the {@link PolicyProvider}. + * @return the {@link Service} definitions + */ + public abstract Service[] getServices(); +} diff --git a/src/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java b/src/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java new file mode 100644 index 00000000000..7f9b530a37c --- /dev/null +++ b/src/java/org/apache/hadoop/security/authorize/RefreshAuthorizationPolicyProtocol.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.authorize; + +import java.io.IOException; + +import org.apache.hadoop.ipc.VersionedProtocol; + +/** + * Protocol which is used to refresh the authorization policy in use currently. + */ +public interface RefreshAuthorizationPolicyProtocol extends VersionedProtocol { + + /** + * Version 1: Initial version + */ + public static final long versionID = 1L; + + /** + * Refresh the service-level authorization policy in-effect. + * @throws IOException + */ + void refreshServiceAcl() throws IOException; +} diff --git a/src/java/org/apache/hadoop/security/authorize/Service.java b/src/java/org/apache/hadoop/security/authorize/Service.java new file mode 100644 index 00000000000..cd0f8ed9799 --- /dev/null +++ b/src/java/org/apache/hadoop/security/authorize/Service.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.authorize; + +import java.security.Permission; + +/** + * An abstract definition of service as related to + * Service Level Authorization for Hadoop. + * + * Each service defines it's configuration key and also the necessary + * {@link Permission} required to access the service. + */ +public class Service { + private String key; + private Permission permission; + + public Service(String key, Class protocol) { + this.key = key; + this.permission = new ConnectionPermission(protocol); + } + + /** + * Get the configuration key for the service. + * @return the configuration key for the service + */ + public String getServiceKey() { + return key; + } + + /** + * Get the {@link Permission} required to access the service. + * @return the {@link Permission} required to access the service + */ + public Permission getPermission() { + return permission; + } +} diff --git a/src/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java b/src/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java new file mode 100644 index 00000000000..3573467afaa --- /dev/null +++ b/src/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.security.authorize; + +import java.security.AccessControlException; +import java.security.AccessController; +import java.security.Permission; +import java.security.PrivilegedActionException; +import java.security.PrivilegedExceptionAction; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import javax.security.auth.Subject; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.security.UserGroupInformation; + +/** + * An authorization manager which handles service-level authorization + * for incoming service requests. + */ +public class ServiceAuthorizationManager { + + private static final Log LOG = + LogFactory.getLog(ServiceAuthorizationManager.class); + + /** + * Configuration key for controlling service-level authorization for Hadoop. + */ + public static final String SERVICE_AUTHORIZATION_CONFIG = + "hadoop.security.authorization"; + + private static Map, Permission> protocolToPermissionMap = + Collections.synchronizedMap(new HashMap, Permission>()); + + /** + * Authorize the user to access the protocol being used. + * + * @param user user accessing the service + * @param protocol service being accessed + * @throws AuthorizationException on authorization failure + */ + public static void authorize(Subject user, Class protocol) + throws AuthorizationException { + Permission permission = protocolToPermissionMap.get(protocol); + if (permission == null) { + permission = new ConnectionPermission(protocol); + protocolToPermissionMap.put(protocol, permission); + } + + checkPermission(user, permission); + } + + /** + * Check if the given {@link Subject} has all of necessary {@link Permission} + * set. + * + * @param user Subject to be authorized + * @param permissions Permission set + * @throws AuthorizationException if the authorization failed + */ + private static void checkPermission(final Subject user, + final Permission... permissions) + throws AuthorizationException { + try{ + Subject.doAs(user, + new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + try { + for(Permission permission : permissions) { + AccessController.checkPermission(permission); + } + } catch (AccessControlException ace) { + LOG.info("Authorization failed for " + + UserGroupInformation.getCurrentUGI(), ace); + throw new AuthorizationException(ace); + } + return null; + } + } + ); + } catch (PrivilegedActionException e) { + throw new AuthorizationException(e.getException()); + } + } + +} diff --git a/src/java/org/apache/hadoop/util/CyclicIteration.java b/src/java/org/apache/hadoop/util/CyclicIteration.java new file mode 100644 index 00000000000..4bfd96a3296 --- /dev/null +++ b/src/java/org/apache/hadoop/util/CyclicIteration.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import java.util.Iterator; +import java.util.Map; +import java.util.NavigableMap; +import java.util.NoSuchElementException; + +/** Provide an cyclic {@link Iterator} for a {@link NavigableMap}. + * The {@link Iterator} navigates the entries of the map + * according to the map's ordering. + * If the {@link Iterator} hits the last entry of the map, + * it will then continue from the first entry. + */ +public class CyclicIteration implements Iterable> { + private final NavigableMap navigablemap; + private final NavigableMap tailmap; + + /** Construct an {@link Iterable} object, + * so that an {@link Iterator} can be created + * for iterating the given {@link NavigableMap}. + * The iteration begins from the starting key exclusively. + */ + public CyclicIteration(NavigableMap navigablemap, K startingkey) { + if (navigablemap == null || navigablemap.isEmpty()) { + this.navigablemap = null; + this.tailmap = null; + } + else { + this.navigablemap = navigablemap; + this.tailmap = navigablemap.tailMap(startingkey, false); + } + } + + /** {@inheritDoc} */ + public Iterator> iterator() { + return new CyclicIterator(); + } + + /** An {@link Iterator} for {@link CyclicIteration}. */ + private class CyclicIterator implements Iterator> { + private boolean hasnext; + private Iterator> i; + /** The first entry to begin. */ + private final Map.Entry first; + /** The next entry. */ + private Map.Entry next; + + private CyclicIterator() { + hasnext = navigablemap != null; + if (hasnext) { + i = tailmap.entrySet().iterator(); + first = nextEntry(); + next = first; + } + else { + i = null; + first = null; + next = null; + } + } + + private Map.Entry nextEntry() { + if (!i.hasNext()) { + i = navigablemap.entrySet().iterator(); + } + return i.next(); + } + + /** {@inheritDoc} */ + public boolean hasNext() { + return hasnext; + } + + /** {@inheritDoc} */ + public Map.Entry next() { + if (!hasnext) { + throw new NoSuchElementException(); + } + + final Map.Entry curr = next; + next = nextEntry(); + hasnext = !next.equals(first); + return curr; + } + + /** Not supported */ + public void remove() { + throw new UnsupportedOperationException("Not supported"); + } + } +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/util/Daemon.java b/src/java/org/apache/hadoop/util/Daemon.java new file mode 100644 index 00000000000..71928af80d0 --- /dev/null +++ b/src/java/org/apache/hadoop/util/Daemon.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +/** A thread that has called {@link Thread#setDaemon(boolean) } with true.*/ +public class Daemon extends Thread { + + { + setDaemon(true); // always a daemon + } + + Runnable runnable = null; + /** Construct a daemon thread. */ + public Daemon() { + super(); + } + + /** Construct a daemon thread. */ + public Daemon(Runnable runnable) { + super(runnable); + this.runnable = runnable; + this.setName(((Object)runnable).toString()); + } + + /** Construct a daemon thread to be part of a specified thread group. */ + public Daemon(ThreadGroup group, Runnable runnable) { + super(group, runnable); + this.runnable = runnable; + this.setName(((Object)runnable).toString()); + } + + public Runnable getRunnable() { + return runnable; + } +} diff --git a/src/java/org/apache/hadoop/util/DataChecksum.java b/src/java/org/apache/hadoop/util/DataChecksum.java new file mode 100644 index 00000000000..9aa339025b3 --- /dev/null +++ b/src/java/org/apache/hadoop/util/DataChecksum.java @@ -0,0 +1,247 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.util.zip.Checksum; +import java.util.zip.CRC32; + +import java.io.*; + +/** + * This class provides inteface and utilities for processing checksums for + * DFS data transfers. + */ + +public class DataChecksum implements Checksum { + + // Misc constants + public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len + + // checksum types + public static final int CHECKSUM_NULL = 0; + public static final int CHECKSUM_CRC32 = 1; + + private static final int CHECKSUM_NULL_SIZE = 0; + private static final int CHECKSUM_CRC32_SIZE = 4; + + + public static DataChecksum newDataChecksum( int type, int bytesPerChecksum ) { + if ( bytesPerChecksum <= 0 ) { + return null; + } + + switch ( type ) { + case CHECKSUM_NULL : + return new DataChecksum( CHECKSUM_NULL, new ChecksumNull(), + CHECKSUM_NULL_SIZE, bytesPerChecksum ); + case CHECKSUM_CRC32 : + return new DataChecksum( CHECKSUM_CRC32, new CRC32(), + CHECKSUM_CRC32_SIZE, bytesPerChecksum ); + default: + return null; + } + } + + /** + * Creates a DataChecksum from HEADER_LEN bytes from arr[offset]. + * @return DataChecksum of the type in the array or null in case of an error. + */ + public static DataChecksum newDataChecksum( byte bytes[], int offset ) { + if ( offset < 0 || bytes.length < offset + HEADER_LEN ) { + return null; + } + + // like readInt(): + int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | + ( (bytes[offset+2] & 0xff) << 16 ) | + ( (bytes[offset+3] & 0xff) << 8 ) | + ( (bytes[offset+4] & 0xff) ); + return newDataChecksum( bytes[0], bytesPerChecksum ); + } + + /** + * This constructucts a DataChecksum by reading HEADER_LEN bytes from + * input stream in + */ + public static DataChecksum newDataChecksum( DataInputStream in ) + throws IOException { + int type = in.readByte(); + int bpc = in.readInt(); + DataChecksum summer = newDataChecksum( type, bpc ); + if ( summer == null ) { + throw new IOException( "Could not create DataChecksum of type " + + type + " with bytesPerChecksum " + bpc ); + } + return summer; + } + + /** + * Writes the checksum header to the output stream out. + */ + public void writeHeader( DataOutputStream out ) + throws IOException { + out.writeByte( type ); + out.writeInt( bytesPerChecksum ); + } + + public byte[] getHeader() { + byte[] header = new byte[DataChecksum.HEADER_LEN]; + header[0] = (byte) (type & 0xff); + // Writing in buffer just like DataOutput.WriteInt() + header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff); + header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff); + header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff); + header[1+3] = (byte) (bytesPerChecksum & 0xff); + return header; + } + + /** + * Writes the current checksum to the stream. + * If reset is true, then resets the checksum. + * @return number of bytes written. Will be equal to getChecksumSize(); + */ + public int writeValue( DataOutputStream out, boolean reset ) + throws IOException { + if ( size <= 0 ) { + return 0; + } + + if ( type == CHECKSUM_CRC32 ) { + out.writeInt( (int) summer.getValue() ); + } else { + throw new IOException( "Unknown Checksum " + type ); + } + + if ( reset ) { + reset(); + } + + return size; + } + + /** + * Writes the current checksum to a buffer. + * If reset is true, then resets the checksum. + * @return number of bytes written. Will be equal to getChecksumSize(); + */ + public int writeValue( byte[] buf, int offset, boolean reset ) + throws IOException { + if ( size <= 0 ) { + return 0; + } + + if ( type == CHECKSUM_CRC32 ) { + int checksum = (int) summer.getValue(); + buf[offset+0] = (byte) ((checksum >>> 24) & 0xff); + buf[offset+1] = (byte) ((checksum >>> 16) & 0xff); + buf[offset+2] = (byte) ((checksum >>> 8) & 0xff); + buf[offset+3] = (byte) (checksum & 0xff); + } else { + throw new IOException( "Unknown Checksum " + type ); + } + + if ( reset ) { + reset(); + } + + return size; + } + + /** + * Compares the checksum located at buf[offset] with the current checksum. + * @return true if the checksum matches and false otherwise. + */ + public boolean compare( byte buf[], int offset ) { + if ( size > 0 && type == CHECKSUM_CRC32 ) { + int checksum = ( (buf[offset+0] & 0xff) << 24 ) | + ( (buf[offset+1] & 0xff) << 16 ) | + ( (buf[offset+2] & 0xff) << 8 ) | + ( (buf[offset+3] & 0xff) ); + return checksum == (int) summer.getValue(); + } + return size == 0; + } + + private final int type; + private final int size; + private final Checksum summer; + private final int bytesPerChecksum; + private int inSum = 0; + + private DataChecksum( int checksumType, Checksum checksum, + int sumSize, int chunkSize ) { + type = checksumType; + summer = checksum; + size = sumSize; + bytesPerChecksum = chunkSize; + } + + // Accessors + public int getChecksumType() { + return type; + } + public int getChecksumSize() { + return size; + } + public int getBytesPerChecksum() { + return bytesPerChecksum; + } + public int getNumBytesInSum() { + return inSum; + } + + public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE; + static public int getChecksumHeaderSize() { + return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int + } + //Checksum Interface. Just a wrapper around member summer. + public long getValue() { + return summer.getValue(); + } + public void reset() { + summer.reset(); + inSum = 0; + } + public void update( byte[] b, int off, int len ) { + if ( len > 0 ) { + summer.update( b, off, len ); + inSum += len; + } + } + public void update( int b ) { + summer.update( b ); + inSum += 1; + } + + /** + * This just provides a dummy implimentation for Checksum class + * This is used when there is no checksum available or required for + * data + */ + static class ChecksumNull implements Checksum { + + public ChecksumNull() {} + + //Dummy interface + public long getValue() { return 0; } + public void reset() {} + public void update(byte[] b, int off, int len) {} + public void update(int b) {} + }; +} diff --git a/src/java/org/apache/hadoop/util/DiskChecker.java b/src/java/org/apache/hadoop/util/DiskChecker.java new file mode 100644 index 00000000000..4c471dbce83 --- /dev/null +++ b/src/java/org/apache/hadoop/util/DiskChecker.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.File; +import java.io.IOException; + +/** + * Class that provides utility functions for checking disk problem + */ + +public class DiskChecker { + + public static class DiskErrorException extends IOException { + public DiskErrorException(String msg) { + super(msg); + } + } + + public static class DiskOutOfSpaceException extends IOException { + public DiskOutOfSpaceException(String msg) { + super(msg); + } + } + + /** + * The semantics of mkdirsWithExistsCheck method is different from the mkdirs + * method provided in the Sun's java.io.File class in the following way: + * While creating the non-existent parent directories, this method checks for + * the existence of those directories if the mkdir fails at any point (since + * that directory might have just been created by some other process). + * If both mkdir() and the exists() check fails for any seemingly + * non-existent directory, then we signal an error; Sun's mkdir would signal + * an error (return false) if a directory it is attempting to create already + * exists or the mkdir fails. + * @param dir + * @return true on success, false on failure + */ + public static boolean mkdirsWithExistsCheck(File dir) { + if (dir.mkdir() || dir.exists()) { + return true; + } + File canonDir = null; + try { + canonDir = dir.getCanonicalFile(); + } catch (IOException e) { + return false; + } + String parent = canonDir.getParent(); + return (parent != null) && + (mkdirsWithExistsCheck(new File(parent)) && + (canonDir.mkdir() || canonDir.exists())); + } + + public static void checkDir(File dir) throws DiskErrorException { + if (!mkdirsWithExistsCheck(dir)) + throw new DiskErrorException("can not create directory: " + + dir.toString()); + + if (!dir.isDirectory()) + throw new DiskErrorException("not a directory: " + + dir.toString()); + + if (!dir.canRead()) + throw new DiskErrorException("directory is not readable: " + + dir.toString()); + + if (!dir.canWrite()) + throw new DiskErrorException("directory is not writable: " + + dir.toString()); + } + +} diff --git a/src/java/org/apache/hadoop/util/GenericOptionsParser.java b/src/java/org/apache/hadoop/util/GenericOptionsParser.java new file mode 100644 index 00000000000..28323787551 --- /dev/null +++ b/src/java/org/apache/hadoop/util/GenericOptionsParser.java @@ -0,0 +1,408 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintStream; +import java.net.URI; +import java.net.URL; +import java.net.URLClassLoader; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * GenericOptionsParser is a utility to parse command line + * arguments generic to the Hadoop framework. + * + * GenericOptionsParser recognizes several standarad command + * line arguments, enabling applications to easily specify a namenode, a + * jobtracker, additional configuration resources etc. + * + *

Generic Options

+ * + *

The supported generic options are:

+ *

+ *     -conf <configuration file>     specify a configuration file
+ *     -D <property=value>            use value for given property
+ *     -fs <local|namenode:port>      specify a namenode
+ *     -jt <local|jobtracker:port>    specify a job tracker
+ *     -files <comma separated list of files>    specify comma separated
+ *                            files to be copied to the map reduce cluster
+ *     -libjars <comma separated list of jars>   specify comma separated
+ *                            jar files to include in the classpath.
+ *     -archives <comma separated list of archives>    specify comma
+ *             separated archives to be unarchived on the compute machines.
+
+ * 

+ * + *

The general command line syntax is:

+ *

+ * bin/hadoop command [genericOptions] [commandOptions]
+ * 

+ * + *

Generic command line arguments might modify + * Configuration objects, given to constructors.

+ * + *

The functionality is implemented using Commons CLI.

+ * + *

Examples:

+ *

+ * $ bin/hadoop dfs -fs darwin:8020 -ls /data
+ * list /data directory in dfs with namenode darwin:8020
+ * 
+ * $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data
+ * list /data directory in dfs with namenode darwin:8020
+ *     
+ * $ bin/hadoop dfs -conf hadoop-site.xml -ls /data
+ * list /data directory in dfs with conf specified in hadoop-site.xml
+ *     
+ * $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml
+ * submit a job to job tracker darwin:50020
+ *     
+ * $ bin/hadoop job -jt darwin:50020 -submit job.xml
+ * submit a job to job tracker darwin:50020
+ *     
+ * $ bin/hadoop job -jt local -submit job.xml
+ * submit a job to local runner
+ * 
+ * $ bin/hadoop jar -libjars testlib.jar 
+ * -archives test.tgz -files file.txt inputjar args
+ * job submission with libjars, files and archives
+ * 

+ * + * @see Tool + * @see ToolRunner + */ +public class GenericOptionsParser { + + private static final Log LOG = LogFactory.getLog(GenericOptionsParser.class); + private Configuration conf; + private CommandLine commandLine; + + /** + * Create an options parser with the given options to parse the args. + * @param opts the options + * @param args the command line arguments + */ + public GenericOptionsParser(Options opts, String[] args) { + this(new Configuration(), new Options(), args); + } + + /** + * Create an options parser to parse the args. + * @param args the command line arguments + */ + public GenericOptionsParser(String[] args) { + this(new Configuration(), new Options(), args); + } + + /** + * Create a GenericOptionsParser to parse only the generic Hadoop + * arguments. + * + * The array of string arguments other than the generic arguments can be + * obtained by {@link #getRemainingArgs()}. + * + * @param conf the Configuration to modify. + * @param args command-line arguments. + */ + public GenericOptionsParser(Configuration conf, String[] args) { + this(conf, new Options(), args); + } + + /** + * Create a GenericOptionsParser to parse given options as well + * as generic Hadoop options. + * + * The resulting CommandLine object can be obtained by + * {@link #getCommandLine()}. + * + * @param conf the configuration to modify + * @param options options built by the caller + * @param args User-specified arguments + */ + public GenericOptionsParser(Configuration conf, Options options, String[] args) { + parseGeneralOptions(options, conf, args); + this.conf = conf; + } + + /** + * Returns an array of Strings containing only application-specific arguments. + * + * @return array of Strings containing the un-parsed arguments + * or empty array if commandLine was not defined. + */ + public String[] getRemainingArgs() { + return (commandLine == null) ? new String[]{} : commandLine.getArgs(); + } + + /** + * Get the modified configuration + * @return the configuration that has the modified parameters. + */ + public Configuration getConfiguration() { + return conf; + } + + /** + * Returns the commons-cli CommandLine object + * to process the parsed arguments. + * + * Note: If the object is created with + * {@link #GenericOptionsParser(Configuration, String[])}, then returned + * object will only contain parsed generic options. + * + * @return CommandLine representing list of arguments + * parsed against Options descriptor. + */ + public CommandLine getCommandLine() { + return commandLine; + } + + /** + * Specify properties of each generic option + */ + @SuppressWarnings("static-access") + private static Options buildGeneralOptions(Options opts) { + Option fs = OptionBuilder.withArgName("local|namenode:port") + .hasArg() + .withDescription("specify a namenode") + .create("fs"); + Option jt = OptionBuilder.withArgName("local|jobtracker:port") + .hasArg() + .withDescription("specify a job tracker") + .create("jt"); + Option oconf = OptionBuilder.withArgName("configuration file") + .hasArg() + .withDescription("specify an application configuration file") + .create("conf"); + Option property = OptionBuilder.withArgName("property=value") + .hasArgs() + .withArgPattern("=", 1) + .withDescription("use value for given property") + .create('D'); + Option libjars = OptionBuilder.withArgName("paths") + .hasArg() + .withDescription("comma separated jar files to include in the classpath.") + .create("libjars"); + Option files = OptionBuilder.withArgName("paths") + .hasArg() + .withDescription("comma separated files to be copied to the " + + "map reduce cluster") + .create("files"); + Option archives = OptionBuilder.withArgName("paths") + .hasArg() + .withDescription("comma separated archives to be unarchived" + + " on the compute machines.") + .create("archives"); + + opts.addOption(fs); + opts.addOption(jt); + opts.addOption(oconf); + opts.addOption(property); + opts.addOption(libjars); + opts.addOption(files); + opts.addOption(archives); + + return opts; + } + + /** + * Modify configuration according user-specified generic options + * @param conf Configuration to be modified + * @param line User-specified generic options + */ + private void processGeneralOptions(Configuration conf, + CommandLine line) { + if (line.hasOption("fs")) { + FileSystem.setDefaultUri(conf, line.getOptionValue("fs")); + } + + if (line.hasOption("jt")) { + conf.set("mapred.job.tracker", line.getOptionValue("jt")); + } + if (line.hasOption("conf")) { + String[] values = line.getOptionValues("conf"); + for(String value : values) { + conf.addResource(new Path(value)); + } + } + try { + if (line.hasOption("libjars")) { + conf.set("tmpjars", + validateFiles(line.getOptionValue("libjars"), conf)); + //setting libjars in client classpath + URL[] libjars = getLibJars(conf); + if(libjars!=null && libjars.length>0) { + conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); + Thread.currentThread().setContextClassLoader( + new URLClassLoader(libjars, + Thread.currentThread().getContextClassLoader())); + } + } + if (line.hasOption("files")) { + conf.set("tmpfiles", + validateFiles(line.getOptionValue("files"), conf)); + } + if (line.hasOption("archives")) { + conf.set("tmparchives", + validateFiles(line.getOptionValue("archives"), conf)); + } + } catch (IOException ioe) { + System.err.println(StringUtils.stringifyException(ioe)); + } + if (line.hasOption('D')) { + String[] property = line.getOptionValues('D'); + for(int i=0; i specify an application configuration file"); + out.println("-D use value for given property"); + out.println("-fs specify a namenode"); + out.println("-jt specify a job tracker"); + out.println("-files " + + "specify comma separated files to be copied to the map reduce cluster"); + out.println("-libjars " + + "specify comma separated jar files to include in the classpath."); + out.println("-archives " + + "specify comma separated archives to be unarchived" + + " on the compute machines.\n"); + out.println("The general command line syntax is"); + out.println("bin/hadoop command [genericOptions] [commandOptions]\n"); + } + +} diff --git a/src/java/org/apache/hadoop/util/GenericsUtil.java b/src/java/org/apache/hadoop/util/GenericsUtil.java new file mode 100644 index 00000000000..ca4ea7ebb44 --- /dev/null +++ b/src/java/org/apache/hadoop/util/GenericsUtil.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.lang.reflect.Array; +import java.util.List; + +/** + * Contains utility methods for dealing with Java Generics. + */ +public class GenericsUtil { + + /** + * Returns the Class object (of type Class<T>) of the + * argument of type T. + * @param The type of the argument + * @param t the object to get it class + * @return Class<T> + */ + public static Class getClass(T t) { + @SuppressWarnings("unchecked") + Class clazz = (Class)t.getClass(); + return clazz; + } + + /** + * Converts the given List<T> to a an array of + * T[]. + * @param c the Class object of the items in the list + * @param list the list to convert + */ + public static T[] toArray(Class c, List list) + { + @SuppressWarnings("unchecked") + T[] ta= (T[])Array.newInstance(c, list.size()); + + for (int i= 0; iList<T> to a an array of + * T[]. + * @param list the list to convert + * @throws ArrayIndexOutOfBoundsException if the list is empty. + * Use {@link #toArray(Class, List)} if the list may be empty. + */ + public static T[] toArray(List list) { + return toArray(getClass(list.get(0)), list); + } + +} diff --git a/src/java/org/apache/hadoop/util/HeapSort.java b/src/java/org/apache/hadoop/util/HeapSort.java new file mode 100644 index 00000000000..068a72e8778 --- /dev/null +++ b/src/java/org/apache/hadoop/util/HeapSort.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +/** + * An implementation of the core algorithm of HeapSort. + */ +public final class HeapSort implements IndexedSorter { + + public HeapSort() { } + + private static void downHeap(final IndexedSortable s, final int b, + int i, final int N) { + for (int idx = i << 1; idx < N; idx = i << 1) { + if (idx + 1 < N && s.compare(b + idx, b + idx + 1) < 0) { + if (s.compare(b + i, b + idx + 1) < 0) { + s.swap(b + i, b + idx + 1); + } else return; + i = idx + 1; + } else if (s.compare(b + i, b + idx) < 0) { + s.swap(b + i, b + idx); + i = idx; + } else return; + } + } + + /** + * Sort the given range of items using heap sort. + * {@inheritDoc} + */ + public void sort(IndexedSortable s, int p, int r) { + sort(s, p, r, null); + } + + /** + * {@inheritDoc} + */ + public void sort(final IndexedSortable s, final int p, final int r, + final Progressable rep) { + final int N = r - p; + // build heap w/ reverse comparator, then write in-place from end + final int t = Integer.highestOneBit(N); + for (int i = t; i > 1; i >>>= 1) { + for (int j = i >>> 1; j < i; ++j) { + downHeap(s, p-1, j, N + 1); + } + if (null != rep) { + rep.progress(); + } + } + for (int i = r - 1; i > p; --i) { + s.swap(p, i); + downHeap(s, p - 1, 1, i - p + 1); + } + } +} diff --git a/src/java/org/apache/hadoop/util/HostsFileReader.java b/src/java/org/apache/hadoop/util/HostsFileReader.java new file mode 100644 index 00000000000..89cdb39b0c8 --- /dev/null +++ b/src/java/org/apache/hadoop/util/HostsFileReader.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.*; +import java.util.Set; +import java.util.HashSet; + +import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.Log; + +// Keeps track of which datanodes/tasktrackers are allowed to connect to the +// namenode/jobtracker. +public class HostsFileReader { + private Set includes; + private Set excludes; + private String includesFile; + private String excludesFile; + + private static final Log LOG = LogFactory.getLog(HostsFileReader.class); + + public HostsFileReader(String inFile, + String exFile) throws IOException { + includes = new HashSet(); + excludes = new HashSet(); + includesFile = inFile; + excludesFile = exFile; + refresh(); + } + + private void readFileToSet(String filename, Set set) throws IOException { + File file = new File(filename); + if (!file.exists()) { + return; + } + FileInputStream fis = new FileInputStream(file); + BufferedReader reader = null; + try { + reader = new BufferedReader(new InputStreamReader(fis)); + String line; + while ((line = reader.readLine()) != null) { + String[] nodes = line.split("[ \t\n\f\r]+"); + if (nodes != null) { + for (int i = 0; i < nodes.length; i++) { + if (!nodes[i].equals("")) { + set.add(nodes[i]); // might need to add canonical name + } + } + } + } + } finally { + if (reader != null) { + reader.close(); + } + fis.close(); + } + } + + public synchronized void refresh() throws IOException { + LOG.info("Refreshing hosts (include/exclude) list"); + if (!includesFile.equals("")) { + Set newIncludes = new HashSet(); + readFileToSet(includesFile, newIncludes); + // switch the new hosts that are to be included + includes = newIncludes; + } + if (!excludesFile.equals("")) { + Set newExcludes = new HashSet(); + readFileToSet(excludesFile, newExcludes); + // switch the excluded hosts + excludes = newExcludes; + } + } + + public synchronized Set getHosts() { + return includes; + } + + public synchronized Set getExcludedHosts() { + return excludes; + } + + public synchronized void setIncludesFile(String includesFile) { + LOG.info("Setting the includes file to " + includesFile); + this.includesFile = includesFile; + } + + public synchronized void setExcludesFile(String excludesFile) { + LOG.info("Setting the excludes file to " + excludesFile); + this.excludesFile = excludesFile; + } + + public synchronized void updateFileNames(String includesFile, + String excludesFile) + throws IOException { + setIncludesFile(includesFile); + setExcludesFile(excludesFile); + } +} diff --git a/src/java/org/apache/hadoop/util/IndexedSortable.java b/src/java/org/apache/hadoop/util/IndexedSortable.java new file mode 100644 index 00000000000..fdd758c519e --- /dev/null +++ b/src/java/org/apache/hadoop/util/IndexedSortable.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +/** + * Interface for collections capable of being sorted by {@link IndexedSorter} + * algorithms. + */ +public interface IndexedSortable { + + /** + * Compare items at the given addresses consistent with the semantics of + * {@link java.util.Comparator#compare(Object, Object)}. + */ + int compare(int i, int j); + + /** + * Swap items at the given addresses. + */ + void swap(int i, int j); +} diff --git a/src/java/org/apache/hadoop/util/IndexedSorter.java b/src/java/org/apache/hadoop/util/IndexedSorter.java new file mode 100644 index 00000000000..77c725fb2b7 --- /dev/null +++ b/src/java/org/apache/hadoop/util/IndexedSorter.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +/** + * Interface for sort algorithms accepting {@link IndexedSortable} items. + * + * A sort algorithm implementing this interface may only + * {@link IndexedSortable#compare} and {@link IndexedSortable#swap} items + * for a range of indices to effect a sort across that range. + */ +public interface IndexedSorter { + + /** + * Sort the items accessed through the given IndexedSortable over the given + * range of logical indices. From the perspective of the sort algorithm, + * each index between l (inclusive) and r (exclusive) is an addressable + * entry. + * @see IndexedSortable#compare + * @see IndexedSortable#swap + */ + void sort(IndexedSortable s, int l, int r); + + /** + * Same as {@link #sort(IndexedSortable,int,int)}, but indicate progress + * periodically. + * @see #sort(IndexedSortable,int,int) + */ + void sort(IndexedSortable s, int l, int r, Progressable rep); + +} diff --git a/src/java/org/apache/hadoop/util/LineReader.java b/src/java/org/apache/hadoop/util/LineReader.java new file mode 100644 index 00000000000..b5c6e6843dd --- /dev/null +++ b/src/java/org/apache/hadoop/util/LineReader.java @@ -0,0 +1,190 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; + +/** + * A class that provides a line reader from an input stream. + */ +public class LineReader { + private static final int DEFAULT_BUFFER_SIZE = 64 * 1024; + private int bufferSize = DEFAULT_BUFFER_SIZE; + private InputStream in; + private byte[] buffer; + // the number of bytes of real data in the buffer + private int bufferLength = 0; + // the current position in the buffer + private int bufferPosn = 0; + + private static final byte CR = '\r'; + private static final byte LF = '\n'; + + /** + * Create a line reader that reads from the given stream using the + * default buffer-size (64k). + * @param in The input stream + * @throws IOException + */ + public LineReader(InputStream in) { + this(in, DEFAULT_BUFFER_SIZE); + } + + /** + * Create a line reader that reads from the given stream using the + * given buffer-size. + * @param in The input stream + * @param bufferSize Size of the read buffer + * @throws IOException + */ + public LineReader(InputStream in, int bufferSize) { + this.in = in; + this.bufferSize = bufferSize; + this.buffer = new byte[this.bufferSize]; + } + + /** + * Create a line reader that reads from the given stream using the + * io.file.buffer.size specified in the given + * Configuration. + * @param in input stream + * @param conf configuration + * @throws IOException + */ + public LineReader(InputStream in, Configuration conf) throws IOException { + this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE)); + } + + /** + * Close the underlying stream. + * @throws IOException + */ + public void close() throws IOException { + in.close(); + } + + /** + * Read one line from the InputStream into the given Text. A line + * can be terminated by one of the following: '\n' (LF) , '\r' (CR), + * or '\r\n' (CR+LF). EOF also terminates an otherwise unterminated + * line. + * + * @param str the object to store the given line (without newline) + * @param maxLineLength the maximum number of bytes to store into str; + * the rest of the line is silently discarded. + * @param maxBytesToConsume the maximum number of bytes to consume + * in this call. This is only a hint, because if the line cross + * this threshold, we allow it to happen. It can overshoot + * potentially by as much as one buffer length. + * + * @return the number of bytes read including the (longest) newline + * found. + * + * @throws IOException if the underlying stream throws + */ + public int readLine(Text str, int maxLineLength, + int maxBytesToConsume) throws IOException { + /* We're reading data from in, but the head of the stream may be + * already buffered in buffer, so we have several cases: + * 1. No newline characters are in the buffer, so we need to copy + * everything and read another buffer from the stream. + * 2. An unambiguously terminated line is in buffer, so we just + * copy to str. + * 3. Ambiguously terminated line is in buffer, i.e. buffer ends + * in CR. In this case we copy everything up to CR to str, but + * we also need to see what follows CR: if it's LF, then we + * need consume LF as well, so next call to readLine will read + * from after that. + * We use a flag prevCharCR to signal if previous character was CR + * and, if it happens to be at the end of the buffer, delay + * consuming it until we have a chance to look at the char that + * follows. + */ + str.clear(); + int txtLength = 0; //tracks str.getLength(), as an optimization + int newlineLength = 0; //length of terminating newline + boolean prevCharCR = false; //true of prev char was CR + long bytesConsumed = 0; + do { + int startPosn = bufferPosn; //starting from where we left off the last time + if (bufferPosn >= bufferLength) { + startPosn = bufferPosn = 0; + if (prevCharCR) + ++bytesConsumed; //account for CR from previous read + bufferLength = in.read(buffer); + if (bufferLength <= 0) + break; // EOF + } + for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline + if (buffer[bufferPosn] == LF) { + newlineLength = (prevCharCR) ? 2 : 1; + ++bufferPosn; // at next invocation proceed from following byte + break; + } + if (prevCharCR) { //CR + notLF, we are at notLF + newlineLength = 1; + break; + } + prevCharCR = (buffer[bufferPosn] == CR); + } + int readLength = bufferPosn - startPosn; + if (prevCharCR && newlineLength == 0) + --readLength; //CR at the end of the buffer + bytesConsumed += readLength; + int appendLength = readLength - newlineLength; + if (appendLength > maxLineLength - txtLength) { + appendLength = maxLineLength - txtLength; + } + if (appendLength > 0) { + str.append(buffer, startPosn, appendLength); + txtLength += appendLength; + } + } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); + + if (bytesConsumed > (long)Integer.MAX_VALUE) + throw new IOException("Too many bytes before newline: " + bytesConsumed); + return (int)bytesConsumed; + } + + /** + * Read from the InputStream into the given Text. + * @param str the object to store the given line + * @param maxLineLength the maximum number of bytes to store into str. + * @return the number of bytes read including the newline + * @throws IOException if the underlying stream throws + */ + public int readLine(Text str, int maxLineLength) throws IOException { + return readLine(str, maxLineLength, Integer.MAX_VALUE); +} + + /** + * Read from the InputStream into the given Text. + * @param str the object to store the given line + * @return the number of bytes read including the newline + * @throws IOException if the underlying stream throws + */ + public int readLine(Text str) throws IOException { + return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE); + } + +} diff --git a/src/java/org/apache/hadoop/util/LinuxMemoryCalculatorPlugin.java b/src/java/org/apache/hadoop/util/LinuxMemoryCalculatorPlugin.java new file mode 100644 index 00000000000..3870a4715a3 --- /dev/null +++ b/src/java/org/apache/hadoop/util/LinuxMemoryCalculatorPlugin.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Plugin to calculate virtual and physical memories on Linux systems. + */ +public class LinuxMemoryCalculatorPlugin extends MemoryCalculatorPlugin { + private static final Log LOG = + LogFactory.getLog(LinuxMemoryCalculatorPlugin.class); + + /** + * proc's meminfo virtual file has keys-values in the format + * "key:[ \t]*value[ \t]kB". + */ + private static final String PROCFS_MEMFILE = "/proc/meminfo"; + private static final Pattern PROCFS_MEMFILE_FORMAT = + Pattern.compile("^([a-zA-Z]*):[ \t]*([0-9]*)[ \t]kB"); + + // We just need the values for the keys MemTotal and SwapTotal + private static final String MEMTOTAL_STRING = "MemTotal"; + private static final String SWAPTOTAL_STRING = "SwapTotal"; + + private long ramSize = 0; + private long swapSize = 0; + + boolean readMemInfoFile = false; + + private void readProcMemInfoFile() { + + if (readMemInfoFile) { + return; + } + + // Read "/proc/memInfo" file + BufferedReader in = null; + FileReader fReader = null; + try { + fReader = new FileReader(PROCFS_MEMFILE); + in = new BufferedReader(fReader); + } catch (FileNotFoundException f) { + // shouldn't happen.... + return; + } + + Matcher mat = null; + + try { + String str = in.readLine(); + while (str != null) { + mat = PROCFS_MEMFILE_FORMAT.matcher(str); + if (mat.find()) { + if (mat.group(1).equals(MEMTOTAL_STRING)) { + ramSize = Long.parseLong(mat.group(2)); + } else if (mat.group(1).equals(SWAPTOTAL_STRING)) { + swapSize = Long.parseLong(mat.group(2)); + } + } + str = in.readLine(); + } + } catch (IOException io) { + LOG.warn("Error reading the stream " + io); + } finally { + // Close the streams + try { + fReader.close(); + try { + in.close(); + } catch (IOException i) { + LOG.warn("Error closing the stream " + in); + } + } catch (IOException i) { + LOG.warn("Error closing the stream " + fReader); + } + } + + readMemInfoFile = true; + } + + /** {@inheritDoc} */ + @Override + public long getPhysicalMemorySize() { + readProcMemInfoFile(); + return ramSize * 1024; + } + + /** {@inheritDoc} */ + @Override + public long getVirtualMemorySize() { + readProcMemInfoFile(); + return (ramSize + swapSize) * 1024; + } + + /** + * Test the {@link LinuxMemoryCalculatorPlugin} + * + * @param args + */ + public static void main(String[] args) { + LinuxMemoryCalculatorPlugin plugin = new LinuxMemoryCalculatorPlugin(); + System.out.println("Physical memory Size(bytes) : " + + plugin.getPhysicalMemorySize()); + System.out.println("Total Virtual memory Size(bytes) : " + + plugin.getVirtualMemorySize()); + } +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/util/MemoryCalculatorPlugin.java b/src/java/org/apache/hadoop/util/MemoryCalculatorPlugin.java new file mode 100644 index 00000000000..a767b663d46 --- /dev/null +++ b/src/java/org/apache/hadoop/util/MemoryCalculatorPlugin.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; + +/** + * Plugin to calculate virtual and physical memories on the system. + * + */ +public abstract class MemoryCalculatorPlugin extends Configured { + + /** + * Obtain the total size of the virtual memory present in the system. + * + * @return virtual memory size in bytes. + */ + public abstract long getVirtualMemorySize(); + + /** + * Obtain the total size of the physical memory present in the system. + * + * @return physical memory size bytes. + */ + public abstract long getPhysicalMemorySize(); + + /** + * Get the MemoryCalculatorPlugin from the class name and configure it. If + * class name is null, this method will try and return a memory calculator + * plugin available for this system. + * + * @param clazz class-name + * @param conf configure the plugin with this. + * @return MemoryCalculatorPlugin + */ + public static MemoryCalculatorPlugin getMemoryCalculatorPlugin( + Class clazz, Configuration conf) { + + if (clazz != null) { + return ReflectionUtils.newInstance(clazz, conf); + } + + // No class given, try a os specific class + try { + String osName = System.getProperty("os.name"); + if (osName.startsWith("Linux")) { + return new LinuxMemoryCalculatorPlugin(); + } + } catch (SecurityException se) { + // Failed to get Operating System name. + return null; + } + + // Not supported on this system. + return null; + } +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/util/MergeSort.java b/src/java/org/apache/hadoop/util/MergeSort.java new file mode 100644 index 00000000000..3c104924563 --- /dev/null +++ b/src/java/org/apache/hadoop/util/MergeSort.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.util.Comparator; +import org.apache.hadoop.io.IntWritable; + +/** An implementation of the core algorithm of MergeSort. */ +public class MergeSort { + //Reusable IntWritables + IntWritable I = new IntWritable(0); + IntWritable J = new IntWritable(0); + + //the comparator that the algo should use + private Comparator comparator; + + public MergeSort(Comparator comparator) { + this.comparator = comparator; + } + + public void mergeSort(int src[], int dest[], int low, int high) { + int length = high - low; + + // Insertion sort on smallest arrays + if (length < 7) { + for (int i=low; i low; j--) { + I.set(dest[j-1]); + J.set(dest[j]); + if (comparator.compare(I, J)>0) + swap(dest, j, j-1); + } + } + return; + } + + // Recursively sort halves of dest into src + int mid = (low + high) >>> 1; + mergeSort(dest, src, low, mid); + mergeSort(dest, src, mid, high); + + I.set(src[mid-1]); + J.set(src[mid]); + // If list is already sorted, just copy from src to dest. This is an + // optimization that results in faster sorts for nearly ordered lists. + if (comparator.compare(I, J) <= 0) { + System.arraycopy(src, low, dest, low, length); + return; + } + + // Merge sorted halves (now in src) into dest + for (int i = low, p = low, q = mid; i < high; i++) { + if (q < high && p < mid) { + I.set(src[p]); + J.set(src[q]); + } + if (q>=high || ptrue if native-hadoop is loaded, + * else false + */ + public static boolean isNativeCodeLoaded() { + return nativeCodeLoaded; + } + + /** + * Return if native hadoop libraries, if present, can be used for this job. + * @param conf configuration + * + * @return true if native hadoop libraries, if present, can be + * used for this job; false otherwise. + */ + public boolean getLoadNativeLibraries(Configuration conf) { + return conf.getBoolean("hadoop.native.lib", true); + } + + /** + * Set if native hadoop libraries, if present, can be used for this job. + * + * @param conf configuration + * @param loadNativeLibraries can native hadoop libraries be loaded + */ + public void setLoadNativeLibraries(Configuration conf, + boolean loadNativeLibraries) { + conf.setBoolean("hadoop.native.lib", loadNativeLibraries); + } + +} diff --git a/src/java/org/apache/hadoop/util/PlatformName.java b/src/java/org/apache/hadoop/util/PlatformName.java new file mode 100644 index 00000000000..7c173f8515c --- /dev/null +++ b/src/java/org/apache/hadoop/util/PlatformName.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +/** + * A helper class for getting build-info of the java-vm. + * + */ +public class PlatformName { + /** + * The complete platform 'name' to identify the platform as + * per the java-vm. + */ + private static final String platformName = System.getProperty("os.name") + "-" + + System.getProperty("os.arch") + "-" + + System.getProperty("sun.arch.data.model"); + + /** + * Get the complete platform as per the java-vm. + * @return returns the complete platform as per the java-vm. + */ + public static String getPlatformName() { + return platformName; + } + + public static void main(String[] args) { + System.out.println(platformName); + } +} diff --git a/src/java/org/apache/hadoop/util/PrintJarMainClass.java b/src/java/org/apache/hadoop/util/PrintJarMainClass.java new file mode 100644 index 00000000000..d693ebb8525 --- /dev/null +++ b/src/java/org/apache/hadoop/util/PrintJarMainClass.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.util.jar.*; + +/** + * A micro-application that prints the main class name out of a jar file. + */ +public class PrintJarMainClass { + + /** + * @param args + */ + public static void main(String[] args) { + try { + JarFile jar_file = new JarFile(args[0]); + if (jar_file != null) { + Manifest manifest = jar_file.getManifest(); + if (manifest != null) { + String value = manifest.getMainAttributes().getValue("Main-Class"); + if (value != null) { + System.out.println(value.replaceAll("/", ".")); + return; + } + } + } + } catch (Throwable e) { + // ignore it + } + System.out.println("UNKNOWN"); + System.exit(1); + } + +} diff --git a/src/java/org/apache/hadoop/util/PriorityQueue.java b/src/java/org/apache/hadoop/util/PriorityQueue.java new file mode 100644 index 00000000000..12324103e0d --- /dev/null +++ b/src/java/org/apache/hadoop/util/PriorityQueue.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + + +/** A PriorityQueue maintains a partial ordering of its elements such that the + least element can always be found in constant time. Put()'s and pop()'s + require log(size) time. */ +public abstract class PriorityQueue { + private T[] heap; + private int size; + private int maxSize; + + /** Determines the ordering of objects in this priority queue. Subclasses + must define this one method. */ + protected abstract boolean lessThan(Object a, Object b); + + /** Subclass constructors must call this. */ + @SuppressWarnings("unchecked") + protected final void initialize(int maxSize) { + size = 0; + int heapSize = maxSize + 1; + heap = (T[]) new Object[heapSize]; + this.maxSize = maxSize; + } + + /** + * Adds an Object to a PriorityQueue in log(size) time. + * If one tries to add more objects than maxSize from initialize + * a RuntimeException (ArrayIndexOutOfBound) is thrown. + */ + public final void put(T element) { + size++; + heap[size] = element; + upHeap(); + } + + /** + * Adds element to the PriorityQueue in log(size) time if either + * the PriorityQueue is not full, or not lessThan(element, top()). + * @param element + * @return true if element is added, false otherwise. + */ + public boolean insert(T element){ + if (size < maxSize){ + put(element); + return true; + } + else if (size > 0 && !lessThan(element, top())){ + heap[1] = element; + adjustTop(); + return true; + } + else + return false; + } + + /** Returns the least element of the PriorityQueue in constant time. */ + public final T top() { + if (size > 0) + return heap[1]; + else + return null; + } + + /** Removes and returns the least element of the PriorityQueue in log(size) + time. */ + public final T pop() { + if (size > 0) { + T result = heap[1]; // save first value + heap[1] = heap[size]; // move last to first + heap[size] = null; // permit GC of objects + size--; + downHeap(); // adjust heap + return result; + } else + return null; + } + + /** Should be called when the Object at top changes values. Still log(n) + * worst case, but it's at least twice as fast to
+   *  { pq.top().change(); pq.adjustTop(); }
+   * 
instead of
+   *  { o = pq.pop(); o.change(); pq.push(o); }
+   * 
+ */ + public final void adjustTop() { + downHeap(); + } + + + /** Returns the number of elements currently stored in the PriorityQueue. */ + public final int size() { + return size; + } + + /** Removes all entries from the PriorityQueue. */ + public final void clear() { + for (int i = 0; i <= size; i++) + heap[i] = null; + size = 0; + } + + private final void upHeap() { + int i = size; + T node = heap[i]; // save bottom node + int j = i >>> 1; + while (j > 0 && lessThan(node, heap[j])) { + heap[i] = heap[j]; // shift parents down + i = j; + j = j >>> 1; + } + heap[i] = node; // install saved node + } + + private final void downHeap() { + int i = 1; + T node = heap[i]; // save top node + int j = i << 1; // find smaller child + int k = j + 1; + if (k <= size && lessThan(heap[k], heap[j])) { + j = k; + } + while (j <= size && lessThan(heap[j], node)) { + heap[i] = heap[j]; // shift up child + i = j; + j = i << 1; + k = j + 1; + if (k <= size && lessThan(heap[k], heap[j])) { + j = k; + } + } + heap[i] = node; // install saved node + } +} diff --git a/src/java/org/apache/hadoop/util/ProcessTree.java b/src/java/org/apache/hadoop/util/ProcessTree.java new file mode 100644 index 00000000000..62b5c058ee8 --- /dev/null +++ b/src/java/org/apache/hadoop/util/ProcessTree.java @@ -0,0 +1,239 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.util.Shell.ExitCodeException; +import org.apache.hadoop.util.Shell.ShellCommandExecutor; + +/** + * Process tree related operations + */ +public class ProcessTree { + + private static final Log LOG = LogFactory.getLog(ProcessTree.class); + + public static final long DEFAULT_SLEEPTIME_BEFORE_SIGKILL = 5000L; + + public static final boolean isSetsidAvailable = isSetsidSupported(); + private static boolean isSetsidSupported() { + ShellCommandExecutor shexec = null; + boolean setsidSupported = true; + try { + String[] args = {"setsid", "bash", "-c", "echo $$"}; + shexec = new ShellCommandExecutor(args); + shexec.execute(); + } catch (IOException ioe) { + LOG.warn("setsid is not available on this machine. So not using it."); + setsidSupported = false; + } finally { // handle the exit code + LOG.info("setsid exited with exit code " + shexec.getExitCode()); + return setsidSupported; + } + } + + /** + * Kills the process(OR process group) by sending the signal SIGKILL + * in the current thread + * @param pid Process id(OR process group id) of to-be-deleted-process + * @param isProcessGroup Is pid a process group id of to-be-deleted-processes + * @param sleepTimeBeforeSigKill wait time before sending SIGKILL after + * sending SIGTERM + */ + private static void sigKillInCurrentThread(String pid, boolean isProcessGroup, + long sleepTimeBeforeSigKill) { + // Kill the subprocesses of root process(even if the root process is not + // alive) if process group is to be killed. + if (isProcessGroup || ProcessTree.isAlive(pid)) { + try { + // Sleep for some time before sending SIGKILL + Thread.sleep(sleepTimeBeforeSigKill); + } catch (InterruptedException i) { + LOG.warn("Thread sleep is interrupted."); + } + + ShellCommandExecutor shexec = null; + + try { + String pid_pgrpid; + if(isProcessGroup) {//kill the whole process group + pid_pgrpid = "-" + pid; + } + else {//kill single process + pid_pgrpid = pid; + } + + String[] args = { "kill", "-9", pid_pgrpid }; + shexec = new ShellCommandExecutor(args); + shexec.execute(); + } catch (IOException ioe) { + LOG.warn("Error executing shell command " + ioe); + } finally { + if(isProcessGroup) { + LOG.info("Killing process group" + pid + " with SIGKILL. Exit code " + + shexec.getExitCode()); + } + else { + LOG.info("Killing process " + pid + " with SIGKILL. Exit code " + + shexec.getExitCode()); + } + } + } + } + + /** Kills the process(OR process group) by sending the signal SIGKILL + * @param pid Process id(OR process group id) of to-be-deleted-process + * @param isProcessGroup Is pid a process group id of to-be-deleted-processes + * @param sleeptimeBeforeSigkill The time to wait before sending SIGKILL + * after sending SIGTERM + * @param inBackground Process is to be killed in the back ground with + * a separate thread + */ + private static void sigKill(String pid, boolean isProcessGroup, + long sleeptimeBeforeSigkill, boolean inBackground) { + + if(inBackground) { // use a separate thread for killing + SigKillThread sigKillThread = new SigKillThread(pid, isProcessGroup, + sleeptimeBeforeSigkill); + sigKillThread.setDaemon(true); + sigKillThread.start(); + } + else { + sigKillInCurrentThread(pid, isProcessGroup, sleeptimeBeforeSigkill); + } + } + + /** Destroy the process. + * @param pid Process id of to-be-killed-process + * @param sleeptimeBeforeSigkill The time to wait before sending SIGKILL + * after sending SIGTERM + * @param inBackground Process is to be killed in the back ground with + * a separate thread + */ + protected static void destroyProcess(String pid, long sleeptimeBeforeSigkill, + boolean inBackground) { + ShellCommandExecutor shexec = null; + try { + String[] args = { "kill", pid }; + shexec = new ShellCommandExecutor(args); + shexec.execute(); + } catch (IOException ioe) { + LOG.warn("Error executing shell command " + ioe); + } finally { + LOG.info("Killing process " + pid + + " with SIGTERM. Exit code " + shexec.getExitCode()); + } + + sigKill(pid, false, sleeptimeBeforeSigkill, inBackground); + } + + /** Destroy the process group. + * @param pgrpId Process group id of to-be-killed-processes + * @param sleeptimeBeforeSigkill The time to wait before sending SIGKILL + * after sending SIGTERM + * @param inBackground Process group is to be killed in the back ground with + * a separate thread + */ + protected static void destroyProcessGroup(String pgrpId, + long sleeptimeBeforeSigkill, boolean inBackground) { + ShellCommandExecutor shexec = null; + try { + String[] args = { "kill", "--", "-" + pgrpId }; + shexec = new ShellCommandExecutor(args); + shexec.execute(); + } catch (IOException ioe) { + LOG.warn("Error executing shell command " + ioe); + } finally { + LOG.info("Killing all processes in the process group " + pgrpId + + " with SIGTERM. Exit code " + shexec.getExitCode()); + } + + sigKill(pgrpId, true, sleeptimeBeforeSigkill, inBackground); + } + + /** + * Destroy the process-tree. + * @param pid process id of the root process of the subtree of processes + * to be killed + * @param sleeptimeBeforeSigkill The time to wait before sending SIGKILL + * after sending SIGTERM + * @param isProcessGroup pid is a process group leader or not + * @param inBackground Process is to be killed in the back ground with + * a separate thread + */ + public static void destroy(String pid, long sleeptimeBeforeSigkill, + boolean isProcessGroup, boolean inBackground) { + if(isProcessGroup) { + destroyProcessGroup(pid, sleeptimeBeforeSigkill, inBackground); + } + else { + //TODO: Destroy all the processes in the subtree in this case also. + // For the time being, killing only the root process. + destroyProcess(pid, sleeptimeBeforeSigkill, inBackground); + } + } + + + /** + * Is the process with PID pid still alive? + * This method assumes that isAlive is called on a pid that was alive not + * too long ago, and hence assumes no chance of pid-wrapping-around. + */ + public static boolean isAlive(String pid) { + ShellCommandExecutor shexec = null; + try { + String[] args = { "kill", "-0", pid }; + shexec = new ShellCommandExecutor(args); + shexec.execute(); + } catch (ExitCodeException ee) { + return false; + } catch (IOException ioe) { + LOG.warn("Error executing shell command " + + Arrays.toString(shexec.getExecString()) + ioe); + return false; + } + return (shexec.getExitCode() == 0 ? true : false); + } + + /** + * Helper thread class that kills process-tree with SIGKILL in background + */ + static class SigKillThread extends Thread { + private String pid = null; + private boolean isProcessGroup = false; + + private long sleepTimeBeforeSigKill = DEFAULT_SLEEPTIME_BEFORE_SIGKILL; + + private SigKillThread(String pid, boolean isProcessGroup, long interval) { + this.pid = pid; + this.isProcessGroup = isProcessGroup; + this.setName(this.getClass().getName() + "-" + pid); + sleepTimeBeforeSigKill = interval; + } + + public void run() { + sigKillInCurrentThread(pid, isProcessGroup, sleepTimeBeforeSigKill); + } + } +} diff --git a/src/java/org/apache/hadoop/util/ProcfsBasedProcessTree.java b/src/java/org/apache/hadoop/util/ProcfsBasedProcessTree.java new file mode 100644 index 00000000000..52dd36ecc14 --- /dev/null +++ b/src/java/org/apache/hadoop/util/ProcfsBasedProcessTree.java @@ -0,0 +1,448 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.LinkedList; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * A Proc file-system based ProcessTree. Works only on Linux. + */ +public class ProcfsBasedProcessTree extends ProcessTree { + + private static final Log LOG = LogFactory + .getLog(ProcfsBasedProcessTree.class); + + private static final String PROCFS = "/proc/"; + + private static final Pattern PROCFS_STAT_FILE_FORMAT = Pattern + .compile("^([0-9-]+)\\s([^\\s]+)\\s[^\\s]\\s([0-9-]+)\\s([0-9-]+)\\s([0-9-]+)\\s([0-9-]+\\s){16}([0-9]+)(\\s[0-9-]+){16}"); + + private Integer pid = -1; + private boolean setsidUsed = false; + private long sleeptimeBeforeSigkill = DEFAULT_SLEEPTIME_BEFORE_SIGKILL; + + private Map processTree = new HashMap(); + + public ProcfsBasedProcessTree(String pid) { + this(pid, false, DEFAULT_SLEEPTIME_BEFORE_SIGKILL); + } + + public ProcfsBasedProcessTree(String pid, boolean setsidUsed, + long sigkillInterval) { + this.pid = getValidPID(pid); + this.setsidUsed = setsidUsed; + sleeptimeBeforeSigkill = sigkillInterval; + } + + /** + * Sets SIGKILL interval + * @deprecated Use {@link ProcfsBasedProcessTree#ProcfsBasedProcessTree( + * String, boolean, long)} instead + * @param interval The time to wait before sending SIGKILL + * after sending SIGTERM + */ + @Deprecated + public void setSigKillInterval(long interval) { + sleeptimeBeforeSigkill = interval; + } + + /** + * Checks if the ProcfsBasedProcessTree is available on this system. + * + * @return true if ProcfsBasedProcessTree is available. False otherwise. + */ + public static boolean isAvailable() { + try { + String osName = System.getProperty("os.name"); + if (!osName.startsWith("Linux")) { + LOG.info("ProcfsBasedProcessTree currently is supported only on " + + "Linux."); + return false; + } + } catch (SecurityException se) { + LOG.warn("Failed to get Operating System name. " + se); + return false; + } + return true; + } + + /** + * Get the process-tree with latest state. If the root-process is not alive, + * an empty tree will be returned. + * + * @return the process-tree with latest state. + */ + public ProcfsBasedProcessTree getProcessTree() { + if (pid != -1) { + // Get the list of processes + List processList = getProcessList(); + + Map allProcessInfo = new HashMap(); + processTree.clear(); + + ProcessInfo me = null; + for (Integer proc : processList) { + // Get information for each process + ProcessInfo pInfo = new ProcessInfo(proc); + if (constructProcessInfo(pInfo) != null) { + allProcessInfo.put(proc, pInfo); + if (proc.equals(this.pid)) { + me = pInfo; // cache 'me' + processTree.put(proc, pInfo); + } + } + } + + if (me == null) { + return this; + } + + // Add each process to its parent. + for (Map.Entry entry : allProcessInfo.entrySet()) { + Integer pID = entry.getKey(); + if (pID != 1) { + ProcessInfo pInfo = entry.getValue(); + ProcessInfo parentPInfo = allProcessInfo.get(pInfo.getPpid()); + if (parentPInfo != null) { + parentPInfo.addChild(pInfo); + } + } + } + + // now start constructing the process-tree + LinkedList pInfoQueue = new LinkedList(); + pInfoQueue.addAll(me.getChildren()); + while (!pInfoQueue.isEmpty()) { + ProcessInfo pInfo = pInfoQueue.remove(); + if (!processTree.containsKey(pInfo.getPid())) { + processTree.put(pInfo.getPid(), pInfo); + } + pInfoQueue.addAll(pInfo.getChildren()); + } + + if (LOG.isDebugEnabled()) { + // Log.debug the ProcfsBasedProcessTree + LOG.debug(this.toString()); + } + } + return this; + } + + /** + * Is the root-process alive? + * + * @return true if the root-process is alive, false otherwise. + */ + public boolean isAlive() { + if (pid == -1) { + return false; + } else { + return isAlive(pid.toString()); + } + } + + /** + * Is any of the subprocesses in the process-tree alive? + * + * @return true if any of the processes in the process-tree is + * alive, false otherwise. + */ + public boolean isAnyProcessInTreeAlive() { + for (Integer pId : processTree.keySet()) { + if (isAlive(pId.toString())) { + return true; + } + } + return false; + } + + /** Verify that the given process id is same as its process group id. + * @param pidStr Process id of the to-be-verified-process + */ + private static boolean assertPidPgrpidForMatch(String pidStr) { + Integer pId = Integer.parseInt(pidStr); + // Get information for this process + ProcessInfo pInfo = new ProcessInfo(pId); + pInfo = constructProcessInfo(pInfo); + //make sure that pId and its pgrpId match + if (!pInfo.getPgrpId().equals(pId)) { + LOG.warn("Unexpected: Process with PID " + pId + + " is not a process group leader."); + return false; + } + if (LOG.isDebugEnabled()) { + LOG.debug(pId + " is a process group leader, as expected."); + } + return true; + } + + /** Make sure that the given pid is a process group leader and then + * destroy the process group. + * @param pgrpId Process group id of to-be-killed-processes + * @param interval The time to wait before sending SIGKILL + * after sending SIGTERM + * @param inBackground Process is to be killed in the back ground with + * a separate thread + */ + public static void assertAndDestroyProcessGroup(String pgrpId, long interval, + boolean inBackground) + throws IOException { + // Make sure that the pid given is a process group leader + if (!assertPidPgrpidForMatch(pgrpId)) { + throw new IOException("Process with PID " + pgrpId + + " is not a process group leader."); + } + destroyProcessGroup(pgrpId, interval, inBackground); + } + + /** + * Destroy the process-tree. + */ + public void destroy() { + destroy(true); + } + + /** + * Destroy the process-tree. + * @param inBackground Process is to be killed in the back ground with + * a separate thread + */ + public void destroy(boolean inBackground) { + LOG.debug("Killing ProcfsBasedProcessTree of " + pid); + if (pid == -1) { + return; + } + + if (isAlive(pid.toString())) { + if (isSetsidAvailable && setsidUsed) { + // In this case, we know that pid got created using setsid. So kill the + // whole processGroup. + try { + assertAndDestroyProcessGroup(pid.toString(), sleeptimeBeforeSigkill, + inBackground); + } catch (IOException e) { + LOG.warn(StringUtils.stringifyException(e)); + } + } + else { + //TODO: Destroy all the processes in the subtree in this case also. + // For the time being, killing only the root process. + destroyProcess(pid.toString(), sleeptimeBeforeSigkill, inBackground); + } + } + } + + /** + * Get the cumulative virtual memory used by all the processes in the + * process-tree. + * + * @return cumulative virtual memory used by the process-tree in bytes. + */ + public long getCumulativeVmem() { + long total = 0; + for (ProcessInfo p : processTree.values()) { + if (p != null) { + total += p.getVmem(); + } + } + return total; + } + + private static Integer getValidPID(String pid) { + Integer retPid = -1; + try { + retPid = Integer.parseInt(pid); + if (retPid <= 0) { + retPid = -1; + } + } catch (NumberFormatException nfe) { + retPid = -1; + } + return retPid; + } + + /** + * Get the list of all processes in the system. + */ + private List getProcessList() { + String[] processDirs = (new File(PROCFS)).list(); + List processList = new ArrayList(); + + for (String dir : processDirs) { + try { + int pd = Integer.parseInt(dir); + if ((new File(PROCFS + dir)).isDirectory()) { + processList.add(Integer.valueOf(pd)); + } + } catch (NumberFormatException n) { + // skip this directory + } catch (SecurityException s) { + // skip this process + } + } + return processList; + } + + /** + * + * Construct the ProcessInfo using the process' PID and procfs and return the + * same. Returns null on failing to read from procfs, + */ + private static ProcessInfo constructProcessInfo(ProcessInfo pinfo) { + ProcessInfo ret = null; + // Read "/proc//stat" file + BufferedReader in = null; + FileReader fReader = null; + try { + fReader = new FileReader(PROCFS + pinfo.getPid() + "/stat"); + in = new BufferedReader(fReader); + } catch (FileNotFoundException f) { + // The process vanished in the interim! + return ret; + } + + ret = pinfo; + try { + String str = in.readLine(); // only one line + Matcher m = PROCFS_STAT_FILE_FORMAT.matcher(str); + boolean mat = m.find(); + if (mat) { + // Set ( name ) ( ppid ) ( pgrpId ) (session ) (vsize ) + pinfo.update(m.group(2), Integer.parseInt(m.group(3)), Integer + .parseInt(m.group(4)), Integer.parseInt(m.group(5)), Long + .parseLong(m.group(7))); + } + } catch (IOException io) { + LOG.warn("Error reading the stream " + io); + ret = null; + } finally { + // Close the streams + try { + if (fReader != null) { + fReader.close(); + } + try { + if (in != null) { + in.close(); + } + } catch (IOException i) { + LOG.warn("Error closing the stream " + in); + } + } catch (IOException i) { + LOG.warn("Error closing the stream " + fReader); + } + } + + return ret; + } + + /** + * Returns a string printing PIDs of process present in the + * ProcfsBasedProcessTree. Output format : [pid pid ..] + */ + public String toString() { + StringBuffer pTree = new StringBuffer("[ "); + for (Integer p : processTree.keySet()) { + pTree.append(p); + pTree.append(" "); + } + return pTree.substring(0, pTree.length()) + "]"; + } + + /** + * + * Class containing information of a process. + * + */ + private static class ProcessInfo { + private Integer pid; // process-id + private String name; // command name + private Integer pgrpId; // process group-id + private Integer ppid; // parent process-id + private Integer sessionId; // session-id + private Long vmem; // virtual memory usage + private List children = new ArrayList(); // list of children + + public ProcessInfo(int pid) { + this.pid = Integer.valueOf(pid); + } + + public Integer getPid() { + return pid; + } + + public String getName() { + return name; + } + + public Integer getPgrpId() { + return pgrpId; + } + + public Integer getPpid() { + return ppid; + } + + public Integer getSessionId() { + return sessionId; + } + + public Long getVmem() { + return vmem; + } + + public boolean isParent(ProcessInfo p) { + if (pid.equals(p.getPpid())) { + return true; + } + return false; + } + + public void update(String name, Integer ppid, Integer pgrpId, + Integer sessionId, Long vmem) { + this.name = name; + this.ppid = ppid; + this.pgrpId = pgrpId; + this.sessionId = sessionId; + this.vmem = vmem; + } + + public boolean addChild(ProcessInfo p) { + return children.add(p); + } + + public List getChildren() { + return children; + } + } +} diff --git a/src/java/org/apache/hadoop/util/ProgramDriver.java b/src/java/org/apache/hadoop/util/ProgramDriver.java new file mode 100644 index 00000000000..c5880e7fe63 --- /dev/null +++ b/src/java/org/apache/hadoop/util/ProgramDriver.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Map; +import java.util.TreeMap; + +/** A driver that is used to run programs added to it + */ + +public class ProgramDriver { + + /** + * A description of a program based on its class and a + * human-readable description. + * @date april 2006 + */ + Map programs; + + public ProgramDriver(){ + programs = new TreeMap(); + } + + static private class ProgramDescription { + + static final Class[] paramTypes = new Class[] {String[].class}; + + /** + * Create a description of an example program. + * @param mainClass the class with the main for the example program + * @param description a string to display to the user in help messages + * @throws SecurityException if we can't use reflection + * @throws NoSuchMethodException if the class doesn't have a main method + */ + public ProgramDescription(Class mainClass, + String description) + throws SecurityException, NoSuchMethodException { + this.main = mainClass.getMethod("main", paramTypes); + this.description = description; + } + + /** + * Invoke the example application with the given arguments + * @param args the arguments for the application + * @throws Throwable The exception thrown by the invoked method + */ + public void invoke(String[] args) + throws Throwable { + try { + main.invoke(null, new Object[]{args}); + } catch (InvocationTargetException except) { + throw except.getCause(); + } + } + + public String getDescription() { + return description; + } + + private Method main; + private String description; + } + + private static void printUsage(Map programs) { + System.out.println("Valid program names are:"); + for(Map.Entry item : programs.entrySet()) { + System.out.println(" " + item.getKey() + ": " + + item.getValue().getDescription()); + } + } + + /** + * This is the method that adds the classed to the repository + * @param name The name of the string you want the class instance to be called with + * @param mainClass The class that you want to add to the repository + * @param description The description of the class + * @throws NoSuchMethodException + * @throws SecurityException + */ + public void addClass (String name, Class mainClass, String description) throws Throwable { + programs.put(name , new ProgramDescription(mainClass, description)); + } + + /** + * This is a driver for the example programs. + * It looks at the first command line argument and tries to find an + * example program with that name. + * If it is found, it calls the main method in that class with the rest + * of the command line arguments. + * @param args The argument from the user. args[0] is the command to run. + * @return -1 on error, 0 on success + * @throws NoSuchMethodException + * @throws SecurityException + * @throws IllegalAccessException + * @throws IllegalArgumentException + * @throws Throwable Anything thrown by the example program's main + */ + public int driver(String[] args) + throws Throwable + { + // Make sure they gave us a program name. + if (args.length == 0) { + System.out.println("An example program must be given as the" + + " first argument."); + printUsage(programs); + return -1; + } + + // And that it is good. + ProgramDescription pgm = programs.get(args[0]); + if (pgm == null) { + System.out.println("Unknown program '" + args[0] + "' chosen."); + printUsage(programs); + return -1; + } + + // Remove the leading argument and call main + String[] new_args = new String[args.length - 1]; + for(int i=1; i < args.length; ++i) { + new_args[i-1] = args[i]; + } + pgm.invoke(new_args); + return 0; + } + +} diff --git a/src/java/org/apache/hadoop/util/Progress.java b/src/java/org/apache/hadoop/util/Progress.java new file mode 100644 index 00000000000..81be35c8e01 --- /dev/null +++ b/src/java/org/apache/hadoop/util/Progress.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.util.ArrayList; + +/** Utility to assist with generation of progress reports. Applications build + * a hierarchy of {@link Progress} instances, each modelling a phase of + * execution. The root is constructed with {@link #Progress()}. Nodes for + * sub-phases are created by calling {@link #addPhase()}. + */ +public class Progress { + private String status = ""; + private float progress; + private int currentPhase; + private ArrayList phases = new ArrayList(); + private Progress parent; + private float progressPerPhase; + + /** Creates a new root node. */ + public Progress() {} + + /** Adds a named node to the tree. */ + public Progress addPhase(String status) { + Progress phase = addPhase(); + phase.setStatus(status); + return phase; + } + + /** Adds a node to the tree. */ + public synchronized Progress addPhase() { + Progress phase = new Progress(); + phases.add(phase); + phase.setParent(this); + progressPerPhase = 1.0f / (float)phases.size(); + return phase; + } + + synchronized Progress getParent() { return parent; } + synchronized void setParent(Progress parent) { this.parent = parent; } + + /** Called during execution to move to the next phase at this level in the + * tree. */ + public synchronized void startNextPhase() { + currentPhase++; + } + + /** Returns the current sub-node executing. */ + public synchronized Progress phase() { + return phases.get(currentPhase); + } + + /** Completes this node, moving the parent node to its next child. */ + public void complete() { + // we have to traverse up to our parent, so be careful about locking. + Progress myParent; + synchronized(this) { + progress = 1.0f; + myParent = parent; + } + if (myParent != null) { + // this will synchronize on the parent, so we make sure we release + // our lock before getting the parent's, since we're traversing + // against the normal traversal direction used by get() or toString(). + // We don't need transactional semantics, so we're OK doing this. + myParent.startNextPhase(); + } + } + + /** Called during execution on a leaf node to set its progress. */ + public synchronized void set(float progress) { + this.progress = progress; + } + + /** Returns the overall progress of the root. */ + // this method probably does not need to be synchronized as getINternal() is synchronized + // and the node's parent never changes. Still, it doesn't hurt. + public synchronized float get() { + Progress node = this; + while (node.getParent() != null) { // find the root + node = parent; + } + return node.getInternal(); + } + + /** Computes progress in this node. */ + private synchronized float getInternal() { + int phaseCount = phases.size(); + if (phaseCount != 0) { + float subProgress = + currentPhase < phaseCount ? phase().getInternal() : 0.0f; + return progressPerPhase*(currentPhase + subProgress); + } else { + return progress; + } + } + + public synchronized void setStatus(String status) { + this.status = status; + } + + public String toString() { + StringBuffer result = new StringBuffer(); + toString(result); + return result.toString(); + } + + private synchronized void toString(StringBuffer buffer) { + buffer.append(status); + if (phases.size() != 0 && currentPhase < phases.size()) { + buffer.append(" > "); + phase().toString(buffer); + } + } + +} diff --git a/src/java/org/apache/hadoop/util/Progressable.java b/src/java/org/apache/hadoop/util/Progressable.java new file mode 100644 index 00000000000..5bdd7daeac1 --- /dev/null +++ b/src/java/org/apache/hadoop/util/Progressable.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +/** + * A facility for reporting progress. + * + *

Clients and/or applications can use the provided Progressable + * to explicitly report progress to the Hadoop framework. This is especially + * important for operations which take an insignificant amount of time since, + * in-lieu of the reported progress, the framework has to assume that an error + * has occured and time-out the operation.

+ */ +public interface Progressable { + /** + * Report progress to the Hadoop framework. + */ + public void progress(); +} diff --git a/src/java/org/apache/hadoop/util/QuickSort.java b/src/java/org/apache/hadoop/util/QuickSort.java new file mode 100644 index 00000000000..74bf0c1ab16 --- /dev/null +++ b/src/java/org/apache/hadoop/util/QuickSort.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +/** + * An implementation of the core algorithm of QuickSort. + */ +public final class QuickSort implements IndexedSorter { + + private static final IndexedSorter alt = new HeapSort(); + + public QuickSort() { } + + private static void fix(IndexedSortable s, int p, int r) { + if (s.compare(p, r) > 0) { + s.swap(p, r); + } + } + + /** + * Deepest recursion before giving up and doing a heapsort. + * Returns 2 * ceil(log(n)). + */ + protected static int getMaxDepth(int x) { + if (x <= 0) + throw new IllegalArgumentException("Undefined for " + x); + return (32 - Integer.numberOfLeadingZeros(x - 1)) << 2; + } + + /** + * Sort the given range of items using quick sort. + * {@inheritDoc} If the recursion depth falls below {@link #getMaxDepth}, + * then switch to {@link HeapSort}. + */ + public void sort(IndexedSortable s, int p, int r) { + sort(s, p, r, null); + } + + /** + * {@inheritDoc} + */ + public void sort(final IndexedSortable s, int p, int r, + final Progressable rep) { + sortInternal(s, p, r, rep, getMaxDepth(r - p)); + } + + private static void sortInternal(final IndexedSortable s, int p, int r, + final Progressable rep, int depth) { + if (null != rep) { + rep.progress(); + } + while (true) { + if (r-p < 13) { + for (int i = p; i < r; ++i) { + for (int j = i; j > p && s.compare(j-1, j) > 0; --j) { + s.swap(j, j-1); + } + } + return; + } + if (--depth < 0) { + // give up + alt.sort(s, p, r, rep); + return; + } + + // select, move pivot into first position + fix(s, (p+r) >>> 1, p); + fix(s, (p+r) >>> 1, r - 1); + fix(s, p, r-1); + + // Divide + int i = p; + int j = r; + int ll = p; + int rr = r; + int cr; + while(true) { + while (++i < j) { + if ((cr = s.compare(i, p)) > 0) break; + if (0 == cr && ++ll != i) { + s.swap(ll, i); + } + } + while (--j > i) { + if ((cr = s.compare(p, j)) > 0) break; + if (0 == cr && --rr != j) { + s.swap(rr, j); + } + } + if (i < j) s.swap(i, j); + else break; + } + j = i; + // swap pivot- and all eq values- into position + while (ll >= p) { + s.swap(ll--, --i); + } + while (rr < r) { + s.swap(rr++, j++); + } + + // Conquer + // Recurse on smaller interval first to keep stack shallow + assert i != j; + if (i - p < r - j) { + sortInternal(s, p, i, rep, depth); + p = j; + } else { + sortInternal(s, j, r, rep, depth); + r = i; + } + } + } + +} diff --git a/src/java/org/apache/hadoop/util/ReflectionUtils.java b/src/java/org/apache/hadoop/util/ReflectionUtils.java new file mode 100644 index 00000000000..d1718bf3560 --- /dev/null +++ b/src/java/org/apache/hadoop/util/ReflectionUtils.java @@ -0,0 +1,291 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.io.*; +import java.lang.management.*; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.commons.logging.Log; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.serializer.Deserializer; +import org.apache.hadoop.io.serializer.SerializationFactory; +import org.apache.hadoop.io.serializer.Serializer; + +/** + * General reflection utils + */ + +public class ReflectionUtils { + + private static final Class[] EMPTY_ARRAY = new Class[]{}; + private static SerializationFactory serialFactory = null; + + /** + * Cache of constructors for each class. Pins the classes so they + * can't be garbage collected until ReflectionUtils can be collected. + */ + private static final Map, Constructor> CONSTRUCTOR_CACHE = + new ConcurrentHashMap, Constructor>(); + + /** + * Check and set 'configuration' if necessary. + * + * @param theObject object for which to set configuration + * @param conf Configuration + */ + public static void setConf(Object theObject, Configuration conf) { + if (conf != null) { + if (theObject instanceof Configurable) { + ((Configurable) theObject).setConf(conf); + } + setJobConf(theObject, conf); + } + } + + /** + * This code is to support backward compatibility and break the compile + * time dependency of core on mapred. + * This should be made deprecated along with the mapred package HADOOP-1230. + * Should be removed when mapred package is removed. + */ + private static void setJobConf(Object theObject, Configuration conf) { + //If JobConf and JobConfigurable are in classpath, AND + //theObject is of type JobConfigurable AND + //conf is of type JobConf then + //invoke configure on theObject + try { + Class jobConfClass = + conf.getClassByName("org.apache.hadoop.mapred.JobConf"); + Class jobConfigurableClass = + conf.getClassByName("org.apache.hadoop.mapred.JobConfigurable"); + if (jobConfClass.isAssignableFrom(conf.getClass()) && + jobConfigurableClass.isAssignableFrom(theObject.getClass())) { + Method configureMethod = + jobConfigurableClass.getMethod("configure", jobConfClass); + configureMethod.invoke(theObject, conf); + } + } catch (ClassNotFoundException e) { + //JobConf/JobConfigurable not in classpath. no need to configure + } catch (Exception e) { + throw new RuntimeException("Error in configuring object", e); + } + } + + /** Create an object for the given class and initialize it from conf + * + * @param theClass class of which an object is created + * @param conf Configuration + * @return a new object + */ + @SuppressWarnings("unchecked") + public static T newInstance(Class theClass, Configuration conf) { + T result; + try { + Constructor meth = (Constructor) CONSTRUCTOR_CACHE.get(theClass); + if (meth == null) { + meth = theClass.getDeclaredConstructor(EMPTY_ARRAY); + meth.setAccessible(true); + CONSTRUCTOR_CACHE.put(theClass, meth); + } + result = meth.newInstance(); + } catch (Exception e) { + throw new RuntimeException(e); + } + setConf(result, conf); + return result; + } + + static private ThreadMXBean threadBean = + ManagementFactory.getThreadMXBean(); + + public static void setContentionTracing(boolean val) { + threadBean.setThreadContentionMonitoringEnabled(val); + } + + private static String getTaskName(long id, String name) { + if (name == null) { + return Long.toString(id); + } + return id + " (" + name + ")"; + } + + /** + * Print all of the thread's information and stack traces. + * + * @param stream the stream to + * @param title a string title for the stack trace + */ + public static void printThreadInfo(PrintWriter stream, + String title) { + final int STACK_DEPTH = 20; + boolean contention = threadBean.isThreadContentionMonitoringEnabled(); + long[] threadIds = threadBean.getAllThreadIds(); + stream.println("Process Thread Dump: " + title); + stream.println(threadIds.length + " active threads"); + for (long tid: threadIds) { + ThreadInfo info = threadBean.getThreadInfo(tid, STACK_DEPTH); + if (info == null) { + stream.println(" Inactive"); + continue; + } + stream.println("Thread " + + getTaskName(info.getThreadId(), + info.getThreadName()) + ":"); + Thread.State state = info.getThreadState(); + stream.println(" State: " + state); + stream.println(" Blocked count: " + info.getBlockedCount()); + stream.println(" Waited count: " + info.getWaitedCount()); + if (contention) { + stream.println(" Blocked time: " + info.getBlockedTime()); + stream.println(" Waited time: " + info.getWaitedTime()); + } + if (state == Thread.State.WAITING) { + stream.println(" Waiting on " + info.getLockName()); + } else if (state == Thread.State.BLOCKED) { + stream.println(" Blocked on " + info.getLockName()); + stream.println(" Blocked by " + + getTaskName(info.getLockOwnerId(), + info.getLockOwnerName())); + } + stream.println(" Stack:"); + for (StackTraceElement frame: info.getStackTrace()) { + stream.println(" " + frame.toString()); + } + } + stream.flush(); + } + + private static long previousLogTime = 0; + + /** + * Log the current thread stacks at INFO level. + * @param log the logger that logs the stack trace + * @param title a descriptive title for the call stacks + * @param minInterval the minimum time from the last + */ + public static void logThreadInfo(Log log, + String title, + long minInterval) { + boolean dumpStack = false; + if (log.isInfoEnabled()) { + synchronized (ReflectionUtils.class) { + long now = System.currentTimeMillis(); + if (now - previousLogTime >= minInterval * 1000) { + previousLogTime = now; + dumpStack = true; + } + } + if (dumpStack) { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + printThreadInfo(new PrintWriter(buffer), title); + log.info(buffer.toString()); + } + } + } + + /** + * Return the correctly-typed {@link Class} of the given object. + * + * @param o object whose correctly-typed Class is to be obtained + * @return the correctly typed Class of the given object. + */ + @SuppressWarnings("unchecked") + public static Class getClass(T o) { + return (Class)o.getClass(); + } + + // methods to support testing + static void clearCache() { + CONSTRUCTOR_CACHE.clear(); + } + + static int getCacheSize() { + return CONSTRUCTOR_CACHE.size(); + } + /** + * A pair of input/output buffers that we use to clone writables. + */ + private static class CopyInCopyOutBuffer { + DataOutputBuffer outBuffer = new DataOutputBuffer(); + DataInputBuffer inBuffer = new DataInputBuffer(); + /** + * Move the data from the output buffer to the input buffer. + */ + void moveData() { + inBuffer.reset(outBuffer.getData(), outBuffer.getLength()); + } + } + + /** + * Allocate a buffer for each thread that tries to clone objects. + */ + private static ThreadLocal cloneBuffers + = new ThreadLocal() { + protected synchronized CopyInCopyOutBuffer initialValue() { + return new CopyInCopyOutBuffer(); + } + }; + + private static SerializationFactory getFactory(Configuration conf) { + if (serialFactory == null) { + serialFactory = new SerializationFactory(conf); + } + return serialFactory; + } + + /** + * Make a copy of the writable object using serialization to a buffer + * @param dst the object to copy from + * @param src the object to copy into, which is destroyed + * @throws IOException + */ + @SuppressWarnings("unchecked") + public static T copy(Configuration conf, + T src, T dst) throws IOException { + CopyInCopyOutBuffer buffer = cloneBuffers.get(); + buffer.outBuffer.reset(); + SerializationFactory factory = getFactory(conf); + Class cls = (Class) src.getClass(); + Serializer serializer = factory.getSerializer(cls); + serializer.open(buffer.outBuffer); + serializer.serialize(src); + buffer.moveData(); + Deserializer deserializer = factory.getDeserializer(cls); + deserializer.open(buffer.inBuffer); + dst = deserializer.deserialize(dst); + return dst; + } + + @Deprecated + public static void cloneWritableInto(Writable dst, + Writable src) throws IOException { + CopyInCopyOutBuffer buffer = cloneBuffers.get(); + buffer.outBuffer.reset(); + src.write(buffer.outBuffer); + buffer.moveData(); + dst.readFields(buffer.inBuffer); + } +} diff --git a/src/java/org/apache/hadoop/util/RunJar.java b/src/java/org/apache/hadoop/util/RunJar.java new file mode 100644 index 00000000000..70f8ec4f64a --- /dev/null +++ b/src/java/org/apache/hadoop/util/RunJar.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.util.jar.*; +import java.lang.reflect.*; +import java.net.URL; +import java.net.URLClassLoader; +import java.io.*; +import java.util.*; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileUtil; + +/** Run a Hadoop job jar. */ +public class RunJar { + + /** Unpack a jar file into a directory. */ + public static void unJar(File jarFile, File toDir) throws IOException { + JarFile jar = new JarFile(jarFile); + try { + Enumeration entries = jar.entries(); + while (entries.hasMoreElements()) { + JarEntry entry = (JarEntry)entries.nextElement(); + if (!entry.isDirectory()) { + InputStream in = jar.getInputStream(entry); + try { + File file = new File(toDir, entry.getName()); + if (!file.getParentFile().mkdirs()) { + if (!file.getParentFile().isDirectory()) { + throw new IOException("Mkdirs failed to create " + + file.getParentFile().toString()); + } + } + OutputStream out = new FileOutputStream(file); + try { + byte[] buffer = new byte[8192]; + int i; + while ((i = in.read(buffer)) != -1) { + out.write(buffer, 0, i); + } + } finally { + out.close(); + } + } finally { + in.close(); + } + } + } + } finally { + jar.close(); + } + } + + /** Run a Hadoop job jar. If the main class is not in the jar's manifest, + * then it must be provided on the command line. */ + public static void main(String[] args) throws Throwable { + String usage = "RunJar jarFile [mainClass] args..."; + + if (args.length < 1) { + System.err.println(usage); + System.exit(-1); + } + + int firstArg = 0; + String fileName = args[firstArg++]; + File file = new File(fileName); + String mainClassName = null; + + JarFile jarFile; + try { + jarFile = new JarFile(fileName); + } catch(IOException io) { + throw new IOException("Error opening job jar: " + fileName) + .initCause(io); + } + + Manifest manifest = jarFile.getManifest(); + if (manifest != null) { + mainClassName = manifest.getMainAttributes().getValue("Main-Class"); + } + jarFile.close(); + + if (mainClassName == null) { + if (args.length < 2) { + System.err.println(usage); + System.exit(-1); + } + mainClassName = args[firstArg++]; + } + mainClassName = mainClassName.replaceAll("/", "."); + + File tmpDir = new File(new Configuration().get("hadoop.tmp.dir")); + boolean b = tmpDir.mkdirs(); + if (!b || !tmpDir.isDirectory()) { + System.err.println("Mkdirs failed to create " + tmpDir); + System.exit(-1); + } + final File workDir = File.createTempFile("hadoop-unjar", "", tmpDir); + b = workDir.delete(); + if (!b) { + System.err.println("Delete failed for " + workDir); + System.exit(-1); + } + b = workDir.mkdirs(); + if (!b || !workDir.isDirectory()) { + System.err.println("Mkdirs failed to create " + workDir); + System.exit(-1); + } + + Runtime.getRuntime().addShutdownHook(new Thread() { + public void run() { + try { + FileUtil.fullyDelete(workDir); + } catch (IOException e) { + } + } + }); + + unJar(file, workDir); + + ArrayList classPath = new ArrayList(); + classPath.add(new File(workDir+"/").toURL()); + classPath.add(file.toURL()); + classPath.add(new File(workDir, "classes/").toURL()); + File[] libs = new File(workDir, "lib").listFiles(); + if (libs != null) { + for (int i = 0; i < libs.length; i++) { + classPath.add(libs[i].toURL()); + } + } + + ClassLoader loader = + new URLClassLoader(classPath.toArray(new URL[0])); + + Thread.currentThread().setContextClassLoader(loader); + Class mainClass = Class.forName(mainClassName, true, loader); + Method main = mainClass.getMethod("main", new Class[] { + Array.newInstance(String.class, 0).getClass() + }); + String[] newArgs = Arrays.asList(args) + .subList(firstArg, args.length).toArray(new String[0]); + try { + main.invoke(null, new Object[] { newArgs }); + } catch (InvocationTargetException e) { + throw e.getTargetException(); + } + } + +} diff --git a/src/java/org/apache/hadoop/util/ServicePlugin.java b/src/java/org/apache/hadoop/util/ServicePlugin.java new file mode 100644 index 00000000000..a83294eb7d8 --- /dev/null +++ b/src/java/org/apache/hadoop/util/ServicePlugin.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import java.io.Closeable; + +/** + * Service plug-in interface. + * + * Service plug-ins may be used to expose functionality of datanodes or + * namenodes using arbitrary RPC protocols. Plug-ins are instantiated by the + * service instance, and are notified of service life-cycle events using the + * methods defined by this class. + * + * Service plug-ins are started after the service instance is started, and + * stopped before the service instance is stopped. + */ +public interface ServicePlugin extends Closeable { + + /** + * This method is invoked when the service instance has been started. + * + * @param service The service instance invoking this method + */ + void start(Object service); + + /** + * This method is invoked when the service instance is about to be shut down. + */ + void stop(); +} diff --git a/src/java/org/apache/hadoop/util/ServletUtil.java b/src/java/org/apache/hadoop/util/ServletUtil.java new file mode 100644 index 00000000000..d755b8ec101 --- /dev/null +++ b/src/java/org/apache/hadoop/util/ServletUtil.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import java.io.*; +import java.util.Calendar; + +import javax.servlet.*; + +public class ServletUtil { + /** + * Initial HTML header + */ + public static PrintWriter initHTML(ServletResponse response, String title + ) throws IOException { + response.setContentType("text/html"); + PrintWriter out = response.getWriter(); + out.println("\n" + + "\n" + + "" + title + "\n" + + "\n" + + "

" + title + "

\n"); + return out; + } + + /** + * Get a parameter from a ServletRequest. + * Return null if the parameter contains only white spaces. + */ + public static String getParameter(ServletRequest request, String name) { + String s = request.getParameter(name); + if (s == null) { + return null; + } + s = s.trim(); + return s.length() == 0? null: s; + } + + public static final String HTML_TAIL = "
\n" + + "Hadoop, " + + Calendar.getInstance().get(Calendar.YEAR) + ".\n" + + ""; + + /** + * HTML footer to be added in the jsps. + * @return the HTML footer. + */ + public static String htmlFooter() { + return HTML_TAIL; + } + + /** + * Generate the percentage graph and returns HTML representation string + * of the same. + * + * @param perc The percentage value for which graph is to be generated + * @param width The width of the display table + * @return HTML String representation of the percentage graph + * @throws IOException + */ + public static String percentageGraph(int perc, int width) throws IOException { + assert perc >= 0; assert perc <= 100; + + StringBuilder builder = new StringBuilder(); + + builder.append(""); + if(perc > 0) { + builder.append(""); + }if(perc < 100) { + builder.append(""); + } + builder.append("
"); + return builder.toString(); + } + + /** + * Generate the percentage graph and returns HTML representation string + * of the same. + * @param perc The percentage value for which graph is to be generated + * @param width The width of the display table + * @return HTML String representation of the percentage graph + * @throws IOException + */ + public static String percentageGraph(float perc, int width) throws IOException { + return percentageGraph((int)perc, width); + } +} diff --git a/src/java/org/apache/hadoop/util/Shell.java b/src/java/org/apache/hadoop/util/Shell.java new file mode 100644 index 00000000000..7964721982f --- /dev/null +++ b/src/java/org/apache/hadoop/util/Shell.java @@ -0,0 +1,357 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; + +/** + * A base class for running a Unix command. + * + * Shell can be used to run unix commands like du or + * df. It also offers facilities to gate commands by + * time-intervals. + */ +abstract public class Shell { + + public static final Log LOG = LogFactory.getLog(Shell.class); + + /** a Unix command to get the current user's name */ + public final static String USER_NAME_COMMAND = "whoami"; + /** a Unix command to get the current user's groups list */ + public static String[] getGROUPS_COMMAND() { + return new String[]{"bash", "-c", "groups"}; + } + /** a Unix command to set permission */ + public static final String SET_PERMISSION_COMMAND = "chmod"; + /** a Unix command to set owner */ + public static final String SET_OWNER_COMMAND = "chown"; + public static final String SET_GROUP_COMMAND = "chgrp"; + /** Return a Unix command to get permission information. */ + public static String[] getGET_PERMISSION_COMMAND() { + //force /bin/ls, except on windows. + return new String[] {(WINDOWS ? "ls" : "/bin/ls"), "-ld"}; + } + + /** + * Get the Unix command for setting the maximum virtual memory available + * to a given child process. This is only relevant when we are forking a + * process from within the {@link org.apache.hadoop.mapred.Mapper} or the + * {@link org.apache.hadoop.mapred.Reducer} implementations + * e.g. Hadoop Pipes + * or Hadoop Streaming. + * + * It also checks to ensure that we are running on a *nix platform else + * (e.g. in Cygwin/Windows) it returns null. + * @param conf configuration + * @return a String[] with the ulimit command arguments or + * null if we are running on a non *nix platform or + * if the limit is unspecified. + */ + public static String[] getUlimitMemoryCommand(Configuration conf) { + // ulimit isn't supported on Windows + if (WINDOWS) { + return null; + } + + // get the memory limit from the configuration + String ulimit = conf.get("mapred.child.ulimit"); + if (ulimit == null) { + return null; + } + + // Parse it to ensure it is legal/sane + int memoryLimit = Integer.valueOf(ulimit); + + return new String[] {"ulimit", "-v", String.valueOf(memoryLimit)}; + } + + /** Set to true on Windows platforms */ + public static final boolean WINDOWS /* borrowed from Path.WINDOWS */ + = System.getProperty("os.name").startsWith("Windows"); + + private long interval; // refresh interval in msec + private long lastTime; // last time the command was performed + private Map environment; // env for the command execution + private File dir; + private Process process; // sub process used to execute the command + private int exitCode; + + public Shell() { + this(0L); + } + + /** + * @param interval the minimum duration to wait before re-executing the + * command. + */ + public Shell( long interval ) { + this.interval = interval; + this.lastTime = (interval<0) ? 0 : -interval; + } + + /** set the environment for the command + * @param env Mapping of environment variables + */ + protected void setEnvironment(Map env) { + this.environment = env; + } + + /** set the working directory + * @param dir The directory where the command would be executed + */ + protected void setWorkingDirectory(File dir) { + this.dir = dir; + } + + /** check to see if a command needs to be executed and execute if needed */ + protected void run() throws IOException { + if (lastTime + interval > System.currentTimeMillis()) + return; + exitCode = 0; // reset for next run + runCommand(); + } + + /** Run a command */ + private void runCommand() throws IOException { + ProcessBuilder builder = new ProcessBuilder(getExecString()); + boolean completed = false; + + if (environment != null) { + builder.environment().putAll(this.environment); + } + if (dir != null) { + builder.directory(this.dir); + } + + process = builder.start(); + final BufferedReader errReader = + new BufferedReader(new InputStreamReader(process + .getErrorStream())); + BufferedReader inReader = + new BufferedReader(new InputStreamReader(process + .getInputStream())); + final StringBuffer errMsg = new StringBuffer(); + + // read error and input streams as this would free up the buffers + // free the error stream buffer + Thread errThread = new Thread() { + @Override + public void run() { + try { + String line = errReader.readLine(); + while((line != null) && !isInterrupted()) { + errMsg.append(line); + errMsg.append(System.getProperty("line.separator")); + line = errReader.readLine(); + } + } catch(IOException ioe) { + LOG.warn("Error reading the error stream", ioe); + } + } + }; + try { + errThread.start(); + } catch (IllegalStateException ise) { } + try { + parseExecResult(inReader); // parse the output + // clear the input stream buffer + String line = inReader.readLine(); + while(line != null) { + line = inReader.readLine(); + } + // wait for the process to finish and check the exit code + exitCode = process.waitFor(); + try { + // make sure that the error thread exits + errThread.join(); + } catch (InterruptedException ie) { + LOG.warn("Interrupted while reading the error stream", ie); + } + completed = true; + if (exitCode != 0) { + throw new ExitCodeException(exitCode, errMsg.toString()); + } + } catch (InterruptedException ie) { + throw new IOException(ie.toString()); + } finally { + // close the input stream + try { + inReader.close(); + } catch (IOException ioe) { + LOG.warn("Error while closing the input stream", ioe); + } + if (!completed) { + errThread.interrupt(); + } + try { + errReader.close(); + } catch (IOException ioe) { + LOG.warn("Error while closing the error stream", ioe); + } + process.destroy(); + lastTime = System.currentTimeMillis(); + } + } + + /** return an array containing the command name & its parameters */ + protected abstract String[] getExecString(); + + /** Parse the execution result */ + protected abstract void parseExecResult(BufferedReader lines) + throws IOException; + + /** get the current sub-process executing the given command + * @return process executing the command + */ + public Process getProcess() { + return process; + } + + /** get the exit code + * @return the exit code of the process + */ + public int getExitCode() { + return exitCode; + } + + /** + * This is an IOException with exit code added. + */ + public static class ExitCodeException extends IOException { + int exitCode; + + public ExitCodeException(int exitCode, String message) { + super(message); + this.exitCode = exitCode; + } + + public int getExitCode() { + return exitCode; + } + } + + /** + * A simple shell command executor. + * + * ShellCommandExecutorshould be used in cases where the output + * of the command needs no explicit parsing and where the command, working + * directory and the environment remains unchanged. The output of the command + * is stored as-is and is expected to be small. + */ + public static class ShellCommandExecutor extends Shell { + + private String[] command; + private StringBuffer output; + + public ShellCommandExecutor(String[] execString) { + command = execString.clone(); + } + + public ShellCommandExecutor(String[] execString, File dir) { + this(execString); + this.setWorkingDirectory(dir); + } + + public ShellCommandExecutor(String[] execString, File dir, + Map env) { + this(execString, dir); + this.setEnvironment(env); + } + + /** Execute the shell command. */ + public void execute() throws IOException { + this.run(); + } + + protected String[] getExecString() { + return command; + } + + protected void parseExecResult(BufferedReader lines) throws IOException { + output = new StringBuffer(); + char[] buf = new char[512]; + int nRead; + while ( (nRead = lines.read(buf, 0, buf.length)) > 0 ) { + output.append(buf, 0, nRead); + } + } + + /** Get the output of the shell command.*/ + public String getOutput() { + return (output == null) ? "" : output.toString(); + } + + /** + * Returns the commands of this instance. + * Arguments with spaces in are presented with quotes round; other + * arguments are presented raw + * + * @return a string representation of the object. + */ + public String toString() { + StringBuilder builder = new StringBuilder(); + String[] args = getExecString(); + for (String s : args) { + if (s.indexOf(' ') >= 0) { + builder.append('"').append(s).append('"'); + } else { + builder.append(s); + } + builder.append(' '); + } + return builder.toString(); + } + } + + /** + * Static method to execute a shell command. + * Covers most of the simple cases without requiring the user to implement + * the Shell interface. + * @param cmd shell command to execute. + * @return the output of the executed command. + */ + public static String execCommand(String ... cmd) throws IOException { + return execCommand(null, cmd); + } + + /** + * Static method to execute a shell command. + * Covers most of the simple cases without requiring the user to implement + * the Shell interface. + * @param env the map of environment key=value + * @param cmd shell command to execute. + * @return the output of the executed command. + */ + public static String execCommand(Map env, String ... cmd) + throws IOException { + ShellCommandExecutor exec = new ShellCommandExecutor(cmd); + if (env != null) { + exec.setEnvironment(env); + } + exec.execute(); + return exec.getOutput(); + } +} diff --git a/src/java/org/apache/hadoop/util/StringUtils.java b/src/java/org/apache/hadoop/util/StringUtils.java new file mode 100644 index 00000000000..8e1caaa19ce --- /dev/null +++ b/src/java/org/apache/hadoop/util/StringUtils.java @@ -0,0 +1,679 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.net.URI; +import java.net.URISyntaxException; +import java.text.DateFormat; +import java.text.DecimalFormat; +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Date; +import java.util.List; +import java.util.Locale; +import java.util.StringTokenizer; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.net.NetUtils; + +/** + * General string utils + */ +public class StringUtils { + + private static final DecimalFormat decimalFormat; + static { + NumberFormat numberFormat = NumberFormat.getNumberInstance(Locale.ENGLISH); + decimalFormat = (DecimalFormat) numberFormat; + decimalFormat.applyPattern("#.##"); + } + + /** + * Make a string representation of the exception. + * @param e The exception to stringify + * @return A string with exception name and call stack. + */ + public static String stringifyException(Throwable e) { + StringWriter stm = new StringWriter(); + PrintWriter wrt = new PrintWriter(stm); + e.printStackTrace(wrt); + wrt.close(); + return stm.toString(); + } + + /** + * Given a full hostname, return the word upto the first dot. + * @param fullHostname the full hostname + * @return the hostname to the first dot + */ + public static String simpleHostname(String fullHostname) { + int offset = fullHostname.indexOf('.'); + if (offset != -1) { + return fullHostname.substring(0, offset); + } + return fullHostname; + } + + private static DecimalFormat oneDecimal = new DecimalFormat("0.0"); + + /** + * Given an integer, return a string that is in an approximate, but human + * readable format. + * It uses the bases 'k', 'm', and 'g' for 1024, 1024**2, and 1024**3. + * @param number the number to format + * @return a human readable form of the integer + */ + public static String humanReadableInt(long number) { + long absNumber = Math.abs(number); + double result = number; + String suffix = ""; + if (absNumber < 1024) { + // nothing + } else if (absNumber < 1024 * 1024) { + result = number / 1024.0; + suffix = "k"; + } else if (absNumber < 1024 * 1024 * 1024) { + result = number / (1024.0 * 1024); + suffix = "m"; + } else { + result = number / (1024.0 * 1024 * 1024); + suffix = "g"; + } + return oneDecimal.format(result) + suffix; + } + + /** + * Format a percentage for presentation to the user. + * @param done the percentage to format (0.0 to 1.0) + * @param digits the number of digits past the decimal point + * @return a string representation of the percentage + */ + public static String formatPercent(double done, int digits) { + DecimalFormat percentFormat = new DecimalFormat("0.00%"); + double scale = Math.pow(10.0, digits+2); + double rounded = Math.floor(done * scale); + percentFormat.setDecimalSeparatorAlwaysShown(false); + percentFormat.setMinimumFractionDigits(digits); + percentFormat.setMaximumFractionDigits(digits); + return percentFormat.format(rounded / scale); + } + + /** + * Given an array of strings, return a comma-separated list of its elements. + * @param strs Array of strings + * @return Empty string if strs.length is 0, comma separated list of strings + * otherwise + */ + + public static String arrayToString(String[] strs) { + if (strs.length == 0) { return ""; } + StringBuffer sbuf = new StringBuffer(); + sbuf.append(strs[0]); + for (int idx = 1; idx < strs.length; idx++) { + sbuf.append(","); + sbuf.append(strs[idx]); + } + return sbuf.toString(); + } + + /** + * Given an array of bytes it will convert the bytes to a hex string + * representation of the bytes + * @param bytes + * @param start start index, inclusively + * @param end end index, exclusively + * @return hex string representation of the byte array + */ + public static String byteToHexString(byte[] bytes, int start, int end) { + if (bytes == null) { + throw new IllegalArgumentException("bytes == null"); + } + StringBuilder s = new StringBuilder(); + for(int i = start; i < end; i++) { + s.append(String.format("%02x", bytes[i])); + } + return s.toString(); + } + + /** Same as byteToHexString(bytes, 0, bytes.length). */ + public static String byteToHexString(byte bytes[]) { + return byteToHexString(bytes, 0, bytes.length); + } + + /** + * Given a hexstring this will return the byte array corresponding to the + * string + * @param hex the hex String array + * @return a byte array that is a hex string representation of the given + * string. The size of the byte array is therefore hex.length/2 + */ + public static byte[] hexStringToByte(String hex) { + byte[] bts = new byte[hex.length() / 2]; + for (int i = 0; i < bts.length; i++) { + bts[i] = (byte) Integer.parseInt(hex.substring(2 * i, 2 * i + 2), 16); + } + return bts; + } + /** + * + * @param uris + */ + public static String uriToString(URI[] uris){ + if (uris == null) { + return null; + } + StringBuffer ret = new StringBuffer(uris[0].toString()); + for(int i = 1; i < uris.length;i++){ + ret.append(","); + ret.append(uris[i].toString()); + } + return ret.toString(); + } + + /** + * + * @param str + */ + public static URI[] stringToURI(String[] str){ + if (str == null) + return null; + URI[] uris = new URI[str.length]; + for (int i = 0; i < str.length;i++){ + try{ + uris[i] = new URI(str[i]); + }catch(URISyntaxException ur){ + System.out.println("Exception in specified URI's " + StringUtils.stringifyException(ur)); + //making sure its asssigned to null in case of an error + uris[i] = null; + } + } + return uris; + } + + /** + * + * @param str + */ + public static Path[] stringToPath(String[] str){ + if (str == null) { + return null; + } + Path[] p = new Path[str.length]; + for (int i = 0; i < str.length;i++){ + p[i] = new Path(str[i]); + } + return p; + } + /** + * + * Given a finish and start time in long milliseconds, returns a + * String in the format Xhrs, Ymins, Z sec, for the time difference between two times. + * If finish time comes before start time then negative valeus of X, Y and Z wil return. + * + * @param finishTime finish time + * @param startTime start time + */ + public static String formatTimeDiff(long finishTime, long startTime){ + long timeDiff = finishTime - startTime; + return formatTime(timeDiff); + } + + /** + * + * Given the time in long milliseconds, returns a + * String in the format Xhrs, Ymins, Z sec. + * + * @param timeDiff The time difference to format + */ + public static String formatTime(long timeDiff){ + StringBuffer buf = new StringBuffer(); + long hours = timeDiff / (60*60*1000); + long rem = (timeDiff % (60*60*1000)); + long minutes = rem / (60*1000); + rem = rem % (60*1000); + long seconds = rem / 1000; + + if (hours != 0){ + buf.append(hours); + buf.append("hrs, "); + } + if (minutes != 0){ + buf.append(minutes); + buf.append("mins, "); + } + // return "0sec if no difference + buf.append(seconds); + buf.append("sec"); + return buf.toString(); + } + /** + * Formats time in ms and appends difference (finishTime - startTime) + * as returned by formatTimeDiff(). + * If finish time is 0, empty string is returned, if start time is 0 + * then difference is not appended to return value. + * @param dateFormat date format to use + * @param finishTime fnish time + * @param startTime start time + * @return formatted value. + */ + public static String getFormattedTimeWithDiff(DateFormat dateFormat, + long finishTime, long startTime){ + StringBuffer buf = new StringBuffer(); + if (0 != finishTime) { + buf.append(dateFormat.format(new Date(finishTime))); + if (0 != startTime){ + buf.append(" (" + formatTimeDiff(finishTime , startTime) + ")"); + } + } + return buf.toString(); + } + + /** + * Returns an arraylist of strings. + * @param str the comma seperated string values + * @return the arraylist of the comma seperated string values + */ + public static String[] getStrings(String str){ + Collection values = getStringCollection(str); + if(values.size() == 0) { + return null; + } + return values.toArray(new String[values.size()]); + } + + /** + * Returns a collection of strings. + * @param str comma seperated string values + * @return an ArrayList of string values + */ + public static Collection getStringCollection(String str){ + List values = new ArrayList(); + if (str == null) + return values; + StringTokenizer tokenizer = new StringTokenizer (str,","); + values = new ArrayList(); + while (tokenizer.hasMoreTokens()) { + values.add(tokenizer.nextToken()); + } + return values; + } + + final public static char COMMA = ','; + final public static String COMMA_STR = ","; + final public static char ESCAPE_CHAR = '\\'; + + /** + * Split a string using the default separator + * @param str a string that may have escaped separator + * @return an array of strings + */ + public static String[] split(String str) { + return split(str, ESCAPE_CHAR, COMMA); + } + + /** + * Split a string using the given separator + * @param str a string that may have escaped separator + * @param escapeChar a char that be used to escape the separator + * @param separator a separator char + * @return an array of strings + */ + public static String[] split( + String str, char escapeChar, char separator) { + if (str==null) { + return null; + } + ArrayList strList = new ArrayList(); + StringBuilder split = new StringBuilder(); + int index = 0; + while ((index = findNext(str, separator, escapeChar, index, split)) >= 0) { + ++index; // move over the separator for next search + strList.add(split.toString()); + split.setLength(0); // reset the buffer + } + strList.add(split.toString()); + // remove trailing empty split(s) + int last = strList.size(); // last split + while (--last>=0 && "".equals(strList.get(last))) { + strList.remove(last); + } + return strList.toArray(new String[strList.size()]); + } + + /** + * Finds the first occurrence of the separator character ignoring the escaped + * separators starting from the index. Note the substring between the index + * and the position of the separator is passed. + * @param str the source string + * @param separator the character to find + * @param escapeChar character used to escape + * @param start from where to search + * @param split used to pass back the extracted string + */ + public static int findNext(String str, char separator, char escapeChar, + int start, StringBuilder split) { + int numPreEscapes = 0; + for (int i = start; i < str.length(); i++) { + char curChar = str.charAt(i); + if (numPreEscapes == 0 && curChar == separator) { // separator + return i; + } else { + split.append(curChar); + numPreEscapes = (curChar == escapeChar) + ? (++numPreEscapes) % 2 + : 0; + } + } + return -1; + } + + /** + * Escape commas in the string using the default escape char + * @param str a string + * @return an escaped string + */ + public static String escapeString(String str) { + return escapeString(str, ESCAPE_CHAR, COMMA); + } + + /** + * Escape charToEscape in the string + * with the escape char escapeChar + * + * @param str string + * @param escapeChar escape char + * @param charToEscape the char to be escaped + * @return an escaped string + */ + public static String escapeString( + String str, char escapeChar, char charToEscape) { + return escapeString(str, escapeChar, new char[] {charToEscape}); + } + + // check if the character array has the character + private static boolean hasChar(char[] chars, char character) { + for (char target : chars) { + if (character == target) { + return true; + } + } + return false; + } + + /** + * @param charsToEscape array of characters to be escaped + */ + public static String escapeString(String str, char escapeChar, + char[] charsToEscape) { + if (str == null) { + return null; + } + StringBuilder result = new StringBuilder(); + for (int i=0; icharToEscape in the string + * with the escape char escapeChar + * + * @param str string + * @param escapeChar escape char + * @param charToEscape the escaped char + * @return an unescaped string + */ + public static String unEscapeString( + String str, char escapeChar, char charToEscape) { + return unEscapeString(str, escapeChar, new char[] {charToEscape}); + } + + /** + * @param charsToEscape array of characters to unescape + */ + public static String unEscapeString(String str, char escapeChar, + char[] charsToEscape) { + if (str == null) { + return null; + } + StringBuilder result = new StringBuilder(str.length()); + boolean hasPreEscape = false; + for (int i=0; i clazz, String[] args, + final org.apache.commons.logging.Log LOG) { + final String hostname = NetUtils.getHostname(); + final String classname = clazz.getSimpleName(); + LOG.info( + toStartupShutdownString("STARTUP_MSG: ", new String[] { + "Starting " + classname, + " host = " + hostname, + " args = " + Arrays.asList(args), + " version = " + VersionInfo.getVersion(), + " classpath = " + System.getProperty("java.class.path"), + " build = " + VersionInfo.getUrl() + " -r " + + VersionInfo.getRevision() + + "; compiled by '" + VersionInfo.getUser() + + "' on " + VersionInfo.getDate()} + ) + ); + + Runtime.getRuntime().addShutdownHook(new Thread() { + public void run() { + LOG.info(toStartupShutdownString("SHUTDOWN_MSG: ", new String[]{ + "Shutting down " + classname + " at " + hostname})); + } + }); + } + + /** + * The traditional binary prefixes, kilo, mega, ..., exa, + * which can be represented by a 64-bit integer. + * TraditionalBinaryPrefix symbol are case insensitive. + */ + public static enum TraditionalBinaryPrefix { + KILO(1024), + MEGA(KILO.value << 10), + GIGA(MEGA.value << 10), + TERA(GIGA.value << 10), + PETA(TERA.value << 10), + EXA(PETA.value << 10); + + public final long value; + public final char symbol; + + TraditionalBinaryPrefix(long value) { + this.value = value; + this.symbol = toString().charAt(0); + } + + /** + * @return The TraditionalBinaryPrefix object corresponding to the symbol. + */ + public static TraditionalBinaryPrefix valueOf(char symbol) { + symbol = Character.toUpperCase(symbol); + for(TraditionalBinaryPrefix prefix : TraditionalBinaryPrefix.values()) { + if (symbol == prefix.symbol) { + return prefix; + } + } + throw new IllegalArgumentException("Unknown symbol '" + symbol + "'"); + } + + /** + * Convert a string to long. + * The input string is first be trimmed + * and then it is parsed with traditional binary prefix. + * + * For example, + * "-1230k" will be converted to -1230 * 1024 = -1259520; + * "891g" will be converted to 891 * 1024^3 = 956703965184; + * + * @param s input string + * @return a long value represented by the input string. + */ + public static long string2long(String s) { + s = s.trim(); + final int lastpos = s.length() - 1; + final char lastchar = s.charAt(lastpos); + if (Character.isDigit(lastchar)) + return Long.parseLong(s); + else { + long prefix = TraditionalBinaryPrefix.valueOf(lastchar).value; + long num = Long.parseLong(s.substring(0, lastpos)); + if (num > (Long.MAX_VALUE/prefix) || num < (Long.MIN_VALUE/prefix)) { + throw new IllegalArgumentException(s + " does not fit in a Long"); + } + return num * prefix; + } + } + } + + /** + * Escapes HTML Special characters present in the string. + * @param string + * @return HTML Escaped String representation + */ + public static String escapeHTML(String string) { + if(string == null) { + return null; + } + StringBuffer sb = new StringBuffer(); + boolean lastCharacterWasSpace = false; + char[] chars = string.toCharArray(); + for(char c : chars) { + if(c == ' ') { + if(lastCharacterWasSpace){ + lastCharacterWasSpace = false; + sb.append(" "); + }else { + lastCharacterWasSpace=true; + sb.append(" "); + } + }else { + lastCharacterWasSpace = false; + switch(c) { + case '<': sb.append("<"); break; + case '>': sb.append(">"); break; + case '&': sb.append("&"); break; + case '"': sb.append("""); break; + default : sb.append(c);break; + } + } + } + + return sb.toString(); + } + + /** + * Return an abbreviated English-language desc of the byte length + */ + public static String byteDesc(long len) { + double val = 0.0; + String ending = ""; + if (len < 1024 * 1024) { + val = (1.0 * len) / 1024; + ending = " KB"; + } else if (len < 1024 * 1024 * 1024) { + val = (1.0 * len) / (1024 * 1024); + ending = " MB"; + } else if (len < 1024L * 1024 * 1024 * 1024) { + val = (1.0 * len) / (1024 * 1024 * 1024); + ending = " GB"; + } else if (len < 1024L * 1024 * 1024 * 1024 * 1024) { + val = (1.0 * len) / (1024L * 1024 * 1024 * 1024); + ending = " TB"; + } else { + val = (1.0 * len) / (1024L * 1024 * 1024 * 1024 * 1024); + ending = " PB"; + } + return limitDecimalTo2(val) + ending; + } + + public static synchronized String limitDecimalTo2(double d) { + return decimalFormat.format(d); + } +} diff --git a/src/java/org/apache/hadoop/util/Tool.java b/src/java/org/apache/hadoop/util/Tool.java new file mode 100644 index 00000000000..8cc9f47c20d --- /dev/null +++ b/src/java/org/apache/hadoop/util/Tool.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.conf.Configurable; + +/** + * A tool interface that supports handling of generic command-line options. + * + *

Tool, is the standard for any Map-Reduce tool/application. + * The tool/application should delegate the handling of + * + * standard command-line options to {@link ToolRunner#run(Tool, String[])} + * and only handle its custom arguments.

+ * + *

Here is how a typical Tool is implemented:

+ *

+ *     public class MyApp extends Configured implements Tool {
+ *     
+ *       public int run(String[] args) throws Exception {
+ *         // Configuration processed by ToolRunner
+ *         Configuration conf = getConf();
+ *         
+ *         // Create a JobConf using the processed conf
+ *         JobConf job = new JobConf(conf, MyApp.class);
+ *         
+ *         // Process custom command-line options
+ *         Path in = new Path(args[1]);
+ *         Path out = new Path(args[2]);
+ *         
+ *         // Specify various job-specific parameters     
+ *         job.setJobName("my-app");
+ *         job.setInputPath(in);
+ *         job.setOutputPath(out);
+ *         job.setMapperClass(MyApp.MyMapper.class);
+ *         job.setReducerClass(MyApp.MyReducer.class);
+ *
+ *         // Submit the job, then poll for progress until the job is complete
+ *         JobClient.runJob(job);
+ *       }
+ *       
+ *       public static void main(String[] args) throws Exception {
+ *         // Let ToolRunner handle generic command-line options 
+ *         int res = ToolRunner.run(new Configuration(), new Sort(), args);
+ *         
+ *         System.exit(res);
+ *       }
+ *     }
+ * 

+ * + * @see GenericOptionsParser + * @see ToolRunner + */ +public interface Tool extends Configurable { + /** + * Execute the command with the given arguments. + * + * @param args command specific arguments. + * @return exit code. + * @throws Exception + */ + int run(String [] args) throws Exception; +} diff --git a/src/java/org/apache/hadoop/util/ToolRunner.java b/src/java/org/apache/hadoop/util/ToolRunner.java new file mode 100644 index 00000000000..27e08a5fb7d --- /dev/null +++ b/src/java/org/apache/hadoop/util/ToolRunner.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import java.io.PrintStream; + +import org.apache.hadoop.conf.Configuration; + +/** + * A utility to help run {@link Tool}s. + * + *

ToolRunner can be used to run classes implementing + * Tool interface. It works in conjunction with + * {@link GenericOptionsParser} to parse the + * + * generic hadoop command line arguments and modifies the + * Configuration of the Tool. The + * application-specific options are passed along without being modified. + *

+ * + * @see Tool + * @see GenericOptionsParser + */ +public class ToolRunner { + + /** + * Runs the given Tool by {@link Tool#run(String[])}, after + * parsing with the given generic arguments. Uses the given + * Configuration, or builds one if null. + * + * Sets the Tool's configuration with the possibly modified + * version of the conf. + * + * @param conf Configuration for the Tool. + * @param tool Tool to run. + * @param args command-line arguments to the tool. + * @return exit code of the {@link Tool#run(String[])} method. + */ + public static int run(Configuration conf, Tool tool, String[] args) + throws Exception{ + if(conf == null) { + conf = new Configuration(); + } + GenericOptionsParser parser = new GenericOptionsParser(conf, args); + //set the configuration back, so that Tool can configure itself + tool.setConf(conf); + + //get the args w/o generic hadoop args + String[] toolArgs = parser.getRemainingArgs(); + return tool.run(toolArgs); + } + + /** + * Runs the Tool with its Configuration. + * + * Equivalent to run(tool.getConf(), tool, args). + * + * @param tool Tool to run. + * @param args command-line arguments to the tool. + * @return exit code of the {@link Tool#run(String[])} method. + */ + public static int run(Tool tool, String[] args) + throws Exception{ + return run(tool.getConf(), tool, args); + } + + /** + * Prints generic command-line argurments and usage information. + * + * @param out stream to write usage information to. + */ + public static void printGenericCommandUsage(PrintStream out) { + GenericOptionsParser.printGenericCommandUsage(out); + } + +} diff --git a/src/java/org/apache/hadoop/util/UTF8ByteArrayUtils.java b/src/java/org/apache/hadoop/util/UTF8ByteArrayUtils.java new file mode 100644 index 00000000000..498daa80974 --- /dev/null +++ b/src/java/org/apache/hadoop/util/UTF8ByteArrayUtils.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +public class UTF8ByteArrayUtils { + /** + * Find the first occurrence of the given byte b in a UTF-8 encoded string + * @param utf a byte array containing a UTF-8 encoded string + * @param start starting offset + * @param end ending position + * @param b the byte to find + * @return position that first byte occures otherwise -1 + */ + public static int findByte(byte [] utf, int start, int end, byte b) { + for(int i=start; iBloom filter, as defined by Bloom in 1970. + *

+ * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + * the networking research community in the past decade thanks to the bandwidth efficiencies that it + * offers for the transmission of set membership information between networked hosts. A sender encodes + * the information into a bit vector, the Bloom filter, that is more compact than a conventional + * representation. Computation and space costs for construction are linear in the number of elements. + * The receiver uses the filter to test whether various elements are members of the set. Though the + * filter will occasionally return a false positive, it will never return a false negative. When creating + * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + * + *

+ * Originally created by + * European Commission One-Lab Project 034819. + * + * @see Filter The general behavior of a filter + * + * @see Space/Time Trade-Offs in Hash Coding with Allowable Errors + */ +public class BloomFilter extends Filter { + private static final byte[] bitvalues = new byte[] { + (byte)0x01, + (byte)0x02, + (byte)0x04, + (byte)0x08, + (byte)0x10, + (byte)0x20, + (byte)0x40, + (byte)0x80 + }; + + /** The bit vector. */ + BitSet bits; + + /** Default constructor - use with readFields */ + public BloomFilter() { + super(); + } + + /** + * Constructor + * @param vectorSize The vector size of this filter. + * @param nbHash The number of hash function to consider. + * @param hashType type of the hashing function (see + * {@link org.apache.hadoop.util.hash.Hash}). + */ + public BloomFilter(int vectorSize, int nbHash, int hashType) { + super(vectorSize, nbHash, hashType); + + bits = new BitSet(this.vectorSize); + } + + @Override + public void add(Key key) { + if(key == null) { + throw new NullPointerException("key cannot be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for(int i = 0; i < nbHash; i++) { + bits.set(h[i]); + } + } + + @Override + public void and(Filter filter) { + if(filter == null + || !(filter instanceof BloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be and-ed"); + } + + this.bits.and(((BloomFilter) filter).bits); + } + + @Override + public boolean membershipTest(Key key) { + if(key == null) { + throw new NullPointerException("key cannot be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + for(int i = 0; i < nbHash; i++) { + if(!bits.get(h[i])) { + return false; + } + } + return true; + } + + @Override + public void not() { + bits.flip(0, vectorSize - 1); + } + + @Override + public void or(Filter filter) { + if(filter == null + || !(filter instanceof BloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be or-ed"); + } + bits.or(((BloomFilter) filter).bits); + } + + @Override + public void xor(Filter filter) { + if(filter == null + || !(filter instanceof BloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be xor-ed"); + } + bits.xor(((BloomFilter) filter).bits); + } + + @Override + public String toString() { + return bits.toString(); + } + + /** + * @return size of the the bloomfilter + */ + public int getVectorSize() { + return this.vectorSize; + } + + // Writable + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + byte[] bytes = new byte[getNBytes()]; + for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) { + if (bitIndex == 8) { + bitIndex = 0; + byteIndex++; + } + if (bitIndex == 0) { + bytes[byteIndex] = 0; + } + if (bits.get(i)) { + bytes[byteIndex] |= bitvalues[bitIndex]; + } + } + out.write(bytes); + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + bits = new BitSet(this.vectorSize); + byte[] bytes = new byte[getNBytes()]; + in.readFully(bytes); + for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) { + if (bitIndex == 8) { + bitIndex = 0; + byteIndex++; + } + if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) { + bits.set(i); + } + } + } + + /* @return number of bytes needed to hold bit vector */ + private int getNBytes() { + return (vectorSize + 7) / 8; + } +}//end class diff --git a/src/java/org/apache/hadoop/util/bloom/CountingBloomFilter.java b/src/java/org/apache/hadoop/util/bloom/CountingBloomFilter.java new file mode 100644 index 00000000000..527d2bff713 --- /dev/null +++ b/src/java/org/apache/hadoop/util/bloom/CountingBloomFilter.java @@ -0,0 +1,305 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Implements a counting Bloom filter, as defined by Fan et al. in a ToN + * 2000 paper. + *

+ * A counting Bloom filter is an improvement to standard a Bloom filter as it + * allows dynamic additions and deletions of set membership information. This + * is achieved through the use of a counting vector instead of a bit vector. + *

+ * Originally created by + * European Commission One-Lab Project 034819. + * + * @see Filter The general behavior of a filter + * + * @see Summary cache: a scalable wide-area web cache sharing protocol + */ +public final class CountingBloomFilter extends Filter { + /** Storage for the counting buckets */ + private long[] buckets; + + /** We are using 4bit buckets, so each bucket can count to 15 */ + private final static long BUCKET_MAX_VALUE = 15; + + /** Default constructor - use with readFields */ + public CountingBloomFilter() {} + + /** + * Constructor + * @param vectorSize The vector size of this filter. + * @param nbHash The number of hash function to consider. + * @param hashType type of the hashing function (see + * {@link org.apache.hadoop.util.hash.Hash}). + */ + public CountingBloomFilter(int vectorSize, int nbHash, int hashType) { + super(vectorSize, nbHash, hashType); + buckets = new long[buckets2words(vectorSize)]; + } + + /** returns the number of 64 bit words it would take to hold vectorSize buckets */ + private static int buckets2words(int vectorSize) { + return ((vectorSize - 1) >>> 4) + 1; + } + + + @Override + public void add(Key key) { + if(key == null) { + throw new NullPointerException("key can not be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for(int i = 0; i < nbHash; i++) { + // find the bucket + int wordNum = h[i] >> 4; // div 16 + int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 + + long bucketMask = 15L << bucketShift; + long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; + + // only increment if the count in the bucket is less than BUCKET_MAX_VALUE + if(bucketValue < BUCKET_MAX_VALUE) { + // increment by 1 + buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue + 1) << bucketShift); + } + } + } + + /** + * Removes a specified key from this counting Bloom filter. + *

+ * Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + * @param key The key to remove. + */ + public void delete(Key key) { + if(key == null) { + throw new NullPointerException("Key may not be null"); + } + if(!membershipTest(key)) { + throw new IllegalArgumentException("Key is not a member"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for(int i = 0; i < nbHash; i++) { + // find the bucket + int wordNum = h[i] >> 4; // div 16 + int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 + + long bucketMask = 15L << bucketShift; + long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; + + // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE + if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) { + // decrement by 1 + buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift); + } + } + } + + @Override + public void and(Filter filter) { + if(filter == null + || !(filter instanceof CountingBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be and-ed"); + } + CountingBloomFilter cbf = (CountingBloomFilter)filter; + + int sizeInWords = buckets2words(vectorSize); + for(int i = 0; i < sizeInWords; i++) { + this.buckets[i] &= cbf.buckets[i]; + } + } + + @Override + public boolean membershipTest(Key key) { + if(key == null) { + throw new NullPointerException("Key may not be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for(int i = 0; i < nbHash; i++) { + // find the bucket + int wordNum = h[i] >> 4; // div 16 + int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 + + long bucketMask = 15L << bucketShift; + + if((buckets[wordNum] & bucketMask) == 0) { + return false; + } + } + + return true; + } + + /** + * This method calculates an approximate count of the key, i.e. how many + * times the key was added to the filter. This allows the filter to be + * used as an approximate key -> count map. + *

NOTE: due to the bucket size of this filter, inserting the same + * key more than 15 times will cause an overflow at all filter positions + * associated with this key, and it will significantly increase the error + * rate for this and other keys. For this reason the filter can only be + * used to store small count values 0 <= N << 15. + * @param key key to be tested + * @return 0 if the key is not present. Otherwise, a positive value v will + * be returned such that v == count with probability equal to the + * error rate of this filter, and v > count otherwise. + * Additionally, if the filter experienced an underflow as a result of + * {@link #delete(Key)} operation, the return value may be lower than the + * count with the probability of the false negative rate of such + * filter. + */ + public int approximateCount(Key key) { + int res = Integer.MAX_VALUE; + int[] h = hash.hash(key); + hash.clear(); + for (int i = 0; i < nbHash; i++) { + // find the bucket + int wordNum = h[i] >> 4; // div 16 + int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 + + long bucketMask = 15L << bucketShift; + long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; + if (bucketValue < res) res = (int)bucketValue; + } + if (res != Integer.MAX_VALUE) { + return res; + } else { + return 0; + } + } + + @Override + public void not() { + throw new UnsupportedOperationException("not() is undefined for " + + this.getClass().getName()); + } + + @Override + public void or(Filter filter) { + if(filter == null + || !(filter instanceof CountingBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be or-ed"); + } + + CountingBloomFilter cbf = (CountingBloomFilter)filter; + + int sizeInWords = buckets2words(vectorSize); + for(int i = 0; i < sizeInWords; i++) { + this.buckets[i] |= cbf.buckets[i]; + } + } + + @Override + public void xor(Filter filter) { + throw new UnsupportedOperationException("xor() is undefined for " + + this.getClass().getName()); + } + + @Override + public String toString() { + StringBuilder res = new StringBuilder(); + + for(int i = 0; i < vectorSize; i++) { + if(i > 0) { + res.append(" "); + } + + int wordNum = i >> 4; // div 16 + int bucketShift = (i & 0x0f) << 2; // (mod 16) * 4 + + long bucketMask = 15L << bucketShift; + long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; + + res.append(bucketValue); + } + + return res.toString(); + } + + // Writable + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + int sizeInWords = buckets2words(vectorSize); + for(int i = 0; i < sizeInWords; i++) { + out.writeLong(buckets[i]); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + int sizeInWords = buckets2words(vectorSize); + buckets = new long[sizeInWords]; + for(int i = 0; i < sizeInWords; i++) { + buckets[i] = in.readLong(); + } + } +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/util/bloom/DynamicBloomFilter.java b/src/java/org/apache/hadoop/util/bloom/DynamicBloomFilter.java new file mode 100644 index 00000000000..caabb4a05ba --- /dev/null +++ b/src/java/org/apache/hadoop/util/bloom/DynamicBloomFilter.java @@ -0,0 +1,293 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Implements a dynamic Bloom filter, as defined in the INFOCOM 2006 paper. + *

+ * A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + * each of the s rows is a standard Bloom filter. The creation + * process of a DBF is iterative. At the start, the DBF is a 1 * m + * bit matrix, i.e., it is composed of a single standard Bloom filter. + * It assumes that nr elements are recorded in the + * initial bit vector, where nr <= n (n is + * the cardinality of the set A to record in the filter). + *

+ * As the size of A grows during the execution of the application, + * several keys must be inserted in the DBF. When inserting a key into the DBF, + * one must first get an active Bloom filter in the matrix. A Bloom filter is + * active when the number of recorded keys, nr, is + * strictly less than the current cardinality of A, n. + * If an active Bloom filter is found, the key is inserted and + * nr is incremented by one. On the other hand, if there + * is no active Bloom filter, a new one is created (i.e., a new row is added to + * the matrix) according to the current size of A and the element + * is added in this new Bloom filter and the nr value of + * this new Bloom filter is set to one. A given key is said to belong to the + * DBF if the k positions are set to one in one of the matrix rows. + *

+ * Originally created by + * European Commission One-Lab Project 034819. + * + * @see Filter The general behavior of a filter + * @see BloomFilter A Bloom filter + * + * @see Theory and Network Applications of Dynamic Bloom Filters + */ +public class DynamicBloomFilter extends Filter { + /** + * Threshold for the maximum number of key to record in a dynamic Bloom filter row. + */ + private int nr; + + /** + * The number of keys recorded in the current standard active Bloom filter. + */ + private int currentNbRecord; + + /** + * The matrix of Bloom filter. + */ + private BloomFilter[] matrix; + + /** + * Zero-args constructor for the serialization. + */ + public DynamicBloomFilter() { } + + /** + * Constructor. + *

+ * Builds an empty Dynamic Bloom filter. + * @param vectorSize The number of bits in the vector. + * @param nbHash The number of hash function to consider. + * @param hashType type of the hashing function (see + * {@link org.apache.hadoop.util.hash.Hash}). + * @param nr The threshold for the maximum number of keys to record in a + * dynamic Bloom filter row. + */ + public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) { + super(vectorSize, nbHash, hashType); + + this.nr = nr; + this.currentNbRecord = 0; + + matrix = new BloomFilter[1]; + matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType); + } + + @Override + public void add(Key key) { + if (key == null) { + throw new NullPointerException("Key can not be null"); + } + + BloomFilter bf = getActiveStandardBF(); + + if (bf == null) { + addRow(); + bf = matrix[matrix.length - 1]; + currentNbRecord = 0; + } + + bf.add(key); + + currentNbRecord++; + } + + @Override + public void and(Filter filter) { + if (filter == null + || !(filter instanceof DynamicBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be and-ed"); + } + + DynamicBloomFilter dbf = (DynamicBloomFilter)filter; + + if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) { + throw new IllegalArgumentException("filters cannot be and-ed"); + } + + for (int i = 0; i < matrix.length; i++) { + matrix[i].and(dbf.matrix[i]); + } + } + + @Override + public boolean membershipTest(Key key) { + if (key == null) { + return true; + } + + for (int i = 0; i < matrix.length; i++) { + if (matrix[i].membershipTest(key)) { + return true; + } + } + + return false; + } + + @Override + public void not() { + for (int i = 0; i < matrix.length; i++) { + matrix[i].not(); + } + } + + @Override + public void or(Filter filter) { + if (filter == null + || !(filter instanceof DynamicBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be or-ed"); + } + + DynamicBloomFilter dbf = (DynamicBloomFilter)filter; + + if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) { + throw new IllegalArgumentException("filters cannot be or-ed"); + } + for (int i = 0; i < matrix.length; i++) { + matrix[i].or(dbf.matrix[i]); + } + } + + @Override + public void xor(Filter filter) { + if (filter == null + || !(filter instanceof DynamicBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be xor-ed"); + } + DynamicBloomFilter dbf = (DynamicBloomFilter)filter; + + if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) { + throw new IllegalArgumentException("filters cannot be xor-ed"); + } + + for(int i = 0; ithis dynamic Bloom filter. + */ + private void addRow() { + BloomFilter[] tmp = new BloomFilter[matrix.length + 1]; + + for (int i = 0; i < matrix.length; i++) { + tmp[i] = matrix[i]; + } + + tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType); + + matrix = tmp; + } + + /** + * Returns the active standard Bloom filter in this dynamic Bloom filter. + * @return BloomFilter The active standard Bloom filter. + * Null otherwise. + */ + private BloomFilter getActiveStandardBF() { + if (currentNbRecord >= nr) { + return null; + } + + return matrix[matrix.length - 1]; + } +} diff --git a/src/java/org/apache/hadoop/util/bloom/Filter.java b/src/java/org/apache/hadoop/util/bloom/Filter.java new file mode 100644 index 00000000000..e95273b5913 --- /dev/null +++ b/src/java/org/apache/hadoop/util/bloom/Filter.java @@ -0,0 +1,213 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 + * (http://www.one-lab.org) + * + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.util.hash.Hash; + +/** + * Defines the general behavior of a filter. + *

+ * A filter is a data structure which aims at offering a lossy summary of a set A. The + * key idea is to map entries of A (also called keys) into several positions + * in a vector through the use of several hash functions. + *

+ * Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension). + *

+ * It must be extended in order to define the real behavior. + * + * @see Key The general behavior of a key + * @see HashFunction A hash function + */ +public abstract class Filter implements Writable { + private static final int VERSION = -1; // negative to accommodate for old format + /** The vector size of this filter. */ + protected int vectorSize; + + /** The hash function used to map a key to several positions in the vector. */ + protected HashFunction hash; + + /** The number of hash function to consider. */ + protected int nbHash; + + /** Type of hashing function to use. */ + protected int hashType; + + protected Filter() {} + + /** + * Constructor. + * @param vectorSize The vector size of this filter. + * @param nbHash The number of hash functions to consider. + * @param hashType type of the hashing function (see {@link Hash}). + */ + protected Filter(int vectorSize, int nbHash, int hashType) { + this.vectorSize = vectorSize; + this.nbHash = nbHash; + this.hashType = hashType; + this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType); + } + + /** + * Adds a key to this filter. + * @param key The key to add. + */ + public abstract void add(Key key); + + /** + * Determines wether a specified key belongs to this filter. + * @param key The key to test. + * @return boolean True if the specified key belongs to this filter. + * False otherwise. + */ + public abstract boolean membershipTest(Key key); + + /** + * Peforms a logical AND between this filter and a specified filter. + *

+ * Invariant: The result is assigned to this filter. + * @param filter The filter to AND with. + */ + public abstract void and(Filter filter); + + /** + * Peforms a logical OR between this filter and a specified filter. + *

+ * Invariant: The result is assigned to this filter. + * @param filter The filter to OR with. + */ + public abstract void or(Filter filter); + + /** + * Peforms a logical XOR between this filter and a specified filter. + *

+ * Invariant: The result is assigned to this filter. + * @param filter The filter to XOR with. + */ + public abstract void xor(Filter filter); + + /** + * Performs a logical NOT on this filter. + *

+ * The result is assigned to this filter. + */ + public abstract void not(); + + /** + * Adds a list of keys to this filter. + * @param keys The list of keys. + */ + public void add(List keys){ + if(keys == null) { + throw new IllegalArgumentException("ArrayList may not be null"); + } + + for(Key key: keys) { + add(key); + } + }//end add() + + /** + * Adds a collection of keys to this filter. + * @param keys The collection of keys. + */ + public void add(Collection keys){ + if(keys == null) { + throw new IllegalArgumentException("Collection may not be null"); + } + for(Key key: keys) { + add(key); + } + }//end add() + + /** + * Adds an array of keys to this filter. + * @param keys The array of keys. + */ + public void add(Key[] keys){ + if(keys == null) { + throw new IllegalArgumentException("Key[] may not be null"); + } + for(int i = 0; i < keys.length; i++) { + add(keys[i]); + } + }//end add() + + // Writable interface + + public void write(DataOutput out) throws IOException { + out.writeInt(VERSION); + out.writeInt(this.nbHash); + out.writeByte(this.hashType); + out.writeInt(this.vectorSize); + } + + public void readFields(DataInput in) throws IOException { + int ver = in.readInt(); + if (ver > 0) { // old unversioned format + this.nbHash = ver; + this.hashType = Hash.JENKINS_HASH; + } else if (ver == VERSION) { + this.nbHash = in.readInt(); + this.hashType = in.readByte(); + } else { + throw new IOException("Unsupported version: " + ver); + } + this.vectorSize = in.readInt(); + this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType); + } +}//end class diff --git a/src/java/org/apache/hadoop/util/bloom/HashFunction.java b/src/java/org/apache/hadoop/util/bloom/HashFunction.java new file mode 100644 index 00000000000..535ce1c47b9 --- /dev/null +++ b/src/java/org/apache/hadoop/util/bloom/HashFunction.java @@ -0,0 +1,119 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 + * (http://www.one-lab.org) + * + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util.bloom; + +import org.apache.hadoop.util.hash.Hash; + +/** + * Implements a hash object that returns a certain number of hashed values. + * + * @see Key The general behavior of a key being stored in a filter + * @see Filter The general behavior of a filter + */ +public final class HashFunction { + /** The number of hashed values. */ + private int nbHash; + + /** The maximum highest returned value. */ + private int maxValue; + + /** Hashing algorithm to use. */ + private Hash hashFunction; + + /** + * Constructor. + *

+ * Builds a hash function that must obey to a given maximum number of returned values and a highest value. + * @param maxValue The maximum highest returned value. + * @param nbHash The number of resulting hashed values. + * @param hashType type of the hashing function (see {@link Hash}). + */ + public HashFunction(int maxValue, int nbHash, int hashType) { + if (maxValue <= 0) { + throw new IllegalArgumentException("maxValue must be > 0"); + } + + if (nbHash <= 0) { + throw new IllegalArgumentException("nbHash must be > 0"); + } + + this.maxValue = maxValue; + this.nbHash = nbHash; + this.hashFunction = Hash.getInstance(hashType); + if (this.hashFunction == null) + throw new IllegalArgumentException("hashType must be known"); + } + + /** Clears this hash function. A NOOP */ + public void clear() { + } + + /** + * Hashes a specified key into several integers. + * @param k The specified key. + * @return The array of hashed values. + */ + public int[] hash(Key k){ + byte[] b = k.getBytes(); + if (b == null) { + throw new NullPointerException("buffer reference is null"); + } + if (b.length == 0) { + throw new IllegalArgumentException("key length must be > 0"); + } + int[] result = new int[nbHash]; + for (int i = 0, initval = 0; i < nbHash; i++) { + initval = hashFunction.hash(b, initval); + result[i] = Math.abs(initval % maxValue); + } + return result; + } +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/util/bloom/Key.java b/src/java/org/apache/hadoop/util/bloom/Key.java new file mode 100644 index 00000000000..69c7f174036 --- /dev/null +++ b/src/java/org/apache/hadoop/util/bloom/Key.java @@ -0,0 +1,178 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.WritableComparable; + +/** + * The general behavior of a key that must be stored in a filter. + * + * @see Filter The general behavior of a filter + */ +public class Key implements WritableComparable { + /** Byte value of key */ + byte[] bytes; + + /** + * The weight associated to this key. + *

+ * Invariant: if it is not specified, each instance of + * Key will have a default weight of 1.0 + */ + double weight; + + /** default constructor - use with readFields */ + public Key() {} + + /** + * Constructor. + *

+ * Builds a key with a default weight. + * @param value The byte value of this key. + */ + public Key(byte[] value) { + this(value, 1.0); + } + + /** + * Constructor. + *

+ * Builds a key with a specified weight. + * @param value The value of this key. + * @param weight The weight associated to this key. + */ + public Key(byte[] value, double weight) { + set(value, weight); + } + + /** + * @param value + * @param weight + */ + public void set(byte[] value, double weight) { + if (value == null) { + throw new IllegalArgumentException("value can not be null"); + } + this.bytes = value; + this.weight = weight; + } + + /** @return byte[] The value of this key. */ + public byte[] getBytes() { + return this.bytes; + } + + /** @return Returns the weight associated to this key. */ + public double getWeight() { + return weight; + } + + /** + * Increments the weight of this key with a specified value. + * @param weight The increment. + */ + public void incrementWeight(double weight) { + this.weight += weight; + } + + /** Increments the weight of this key by one. */ + public void incrementWeight() { + this.weight++; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Key)) { + return false; + } + return this.compareTo((Key)o) == 0; + } + + @Override + public int hashCode() { + int result = 0; + for (int i = 0; i < bytes.length; i++) { + result ^= Byte.valueOf(bytes[i]).hashCode(); + } + result ^= Double.valueOf(weight).hashCode(); + return result; + } + + // Writable + + public void write(DataOutput out) throws IOException { + out.writeInt(bytes.length); + out.write(bytes); + out.writeDouble(weight); + } + + public void readFields(DataInput in) throws IOException { + this.bytes = new byte[in.readInt()]; + in.readFully(this.bytes); + weight = in.readDouble(); + } + + // Comparable + + public int compareTo(Key other) { + int result = this.bytes.length - other.getBytes().length; + for (int i = 0; result == 0 && i < bytes.length; i++) { + result = this.bytes[i] - other.bytes[i]; + } + + if (result == 0) { + result = Double.valueOf(this.weight - other.weight).intValue(); + } + return result; + } +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/util/bloom/RemoveScheme.java b/src/java/org/apache/hadoop/util/bloom/RemoveScheme.java new file mode 100644 index 00000000000..462fc3a972e --- /dev/null +++ b/src/java/org/apache/hadoop/util/bloom/RemoveScheme.java @@ -0,0 +1,91 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 + * (http://www.one-lab.org) + * + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util.bloom; + +/** + * Defines the different remove scheme for retouched Bloom filters. + *

+ * Originally created by + * European Commission One-Lab Project 034819. + */ +public interface RemoveScheme { + /** + * Random selection. + *

+ * The idea is to randomly select a bit to reset. + */ + public final static short RANDOM = 0; + + /** + * MinimumFN Selection. + *

+ * The idea is to select the bit to reset that will generate the minimum + * number of false negative. + */ + public final static short MINIMUM_FN = 1; + + /** + * MaximumFP Selection. + *

+ * The idea is to select the bit to reset that will remove the maximum number + * of false positive. + */ + public final static short MAXIMUM_FP = 2; + + /** + * Ratio Selection. + *

+ * The idea is to select the bit to reset that will, at the same time, remove + * the maximum number of false positve while minimizing the amount of false + * negative generated. + */ + public final static short RATIO = 3; +} diff --git a/src/java/org/apache/hadoop/util/bloom/RetouchedBloomFilter.java b/src/java/org/apache/hadoop/util/bloom/RetouchedBloomFilter.java new file mode 100644 index 00000000000..c48fb340344 --- /dev/null +++ b/src/java/org/apache/hadoop/util/bloom/RetouchedBloomFilter.java @@ -0,0 +1,450 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +/** + * Implements a retouched Bloom filter, as defined in the CoNEXT 2006 paper. + *

+ * It allows the removal of selected false positives at the cost of introducing + * random false negatives, and with the benefit of eliminating some random false + * positives at the same time. + * + *

+ * Originally created by + * European Commission One-Lab Project 034819. + * + * @see Filter The general behavior of a filter + * @see BloomFilter A Bloom filter + * @see RemoveScheme The different selective clearing algorithms + * + * @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives + */ +public final class RetouchedBloomFilter extends BloomFilter +implements RemoveScheme { + /** + * KeyList vector (or ElementList Vector, as defined in the paper) of false positives. + */ + List[] fpVector; + + /** + * KeyList vector of keys recorded in the filter. + */ + List[] keyVector; + + /** + * Ratio vector. + */ + double[] ratio; + + private Random rand; + + /** Default constructor - use with readFields */ + public RetouchedBloomFilter() {} + + /** + * Constructor + * @param vectorSize The vector size of this filter. + * @param nbHash The number of hash function to consider. + * @param hashType type of the hashing function (see + * {@link org.apache.hadoop.util.hash.Hash}). + */ + public RetouchedBloomFilter(int vectorSize, int nbHash, int hashType) { + super(vectorSize, nbHash, hashType); + + this.rand = null; + createVector(); + } + + @Override + public void add(Key key) { + if (key == null) { + throw new NullPointerException("key can not be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for (int i = 0; i < nbHash; i++) { + bits.set(h[i]); + keyVector[h[i]].add(key); + } + } + + /** + * Adds a false positive information to this retouched Bloom filter. + *

+ * Invariant: if the false positive is null, nothing happens. + * @param key The false positive key to add. + */ + public void addFalsePositive(Key key) { + if (key == null) { + throw new NullPointerException("key can not be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for (int i = 0; i < nbHash; i++) { + fpVector[h[i]].add(key); + } + } + + /** + * Adds a collection of false positive information to this retouched Bloom filter. + * @param coll The collection of false positive. + */ + public void addFalsePositive(Collection coll) { + if (coll == null) { + throw new NullPointerException("Collection can not be null"); + } + + for (Key k : coll) { + addFalsePositive(k); + } + } + + /** + * Adds a list of false positive information to this retouched Bloom filter. + * @param keys The list of false positive. + */ + public void addFalsePositive(List keys) { + if (keys == null) { + throw new NullPointerException("ArrayList can not be null"); + } + + for (Key k : keys) { + addFalsePositive(k); + } + } + + /** + * Adds an array of false positive information to this retouched Bloom filter. + * @param keys The array of false positive. + */ + public void addFalsePositive(Key[] keys) { + if (keys == null) { + throw new NullPointerException("Key[] can not be null"); + } + + for (int i = 0; i < keys.length; i++) { + addFalsePositive(keys[i]); + } + } + + /** + * Performs the selective clearing for a given key. + * @param k The false positive key to remove from this retouched Bloom filter. + * @param scheme The selective clearing scheme to apply. + */ + public void selectiveClearing(Key k, short scheme) { + if (k == null) { + throw new NullPointerException("Key can not be null"); + } + + if (!membershipTest(k)) { + throw new IllegalArgumentException("Key is not a member"); + } + + int index = 0; + int[] h = hash.hash(k); + + switch(scheme) { + + case RANDOM: + index = randomRemove(); + break; + + case MINIMUM_FN: + index = minimumFnRemove(h); + break; + + case MAXIMUM_FP: + index = maximumFpRemove(h); + break; + + case RATIO: + index = ratioRemove(h); + break; + + default: + throw new AssertionError("Undefined selective clearing scheme"); + + } + + clearBit(index); + } + + private int randomRemove() { + if (rand == null) { + rand = new Random(); + } + + return rand.nextInt(nbHash); + } + + /** + * Chooses the bit position that minimizes the number of false negative generated. + * @param h The different bit positions. + * @return The position that minimizes the number of false negative generated. + */ + private int minimumFnRemove(int[] h) { + int minIndex = Integer.MAX_VALUE; + double minValue = Double.MAX_VALUE; + + for (int i = 0; i < nbHash; i++) { + double keyWeight = getWeight(keyVector[h[i]]); + + if (keyWeight < minValue) { + minIndex = h[i]; + minValue = keyWeight; + } + + } + + return minIndex; + } + + /** + * Chooses the bit position that maximizes the number of false positive removed. + * @param h The different bit positions. + * @return The position that maximizes the number of false positive removed. + */ + private int maximumFpRemove(int[] h) { + int maxIndex = Integer.MIN_VALUE; + double maxValue = Double.MIN_VALUE; + + for (int i = 0; i < nbHash; i++) { + double fpWeight = getWeight(fpVector[h[i]]); + + if (fpWeight > maxValue) { + maxValue = fpWeight; + maxIndex = h[i]; + } + } + + return maxIndex; + } + + /** + * Chooses the bit position that minimizes the number of false negative generated while maximizing. + * the number of false positive removed. + * @param h The different bit positions. + * @return The position that minimizes the number of false negative generated while maximizing. + */ + private int ratioRemove(int[] h) { + computeRatio(); + int minIndex = Integer.MAX_VALUE; + double minValue = Double.MAX_VALUE; + + for (int i = 0; i < nbHash; i++) { + if (ratio[h[i]] < minValue) { + minValue = ratio[h[i]]; + minIndex = h[i]; + } + } + + return minIndex; + } + + /** + * Clears a specified bit in the bit vector and keeps up-to-date the KeyList vectors. + * @param index The position of the bit to clear. + */ + private void clearBit(int index) { + if (index < 0 || index >= vectorSize) { + throw new ArrayIndexOutOfBoundsException(index); + } + + List kl = keyVector[index]; + List fpl = fpVector[index]; + + // update key list + int listSize = kl.size(); + for (int i = 0; i < listSize && !kl.isEmpty(); i++) { + removeKey(kl.get(0), keyVector); + } + + kl.clear(); + keyVector[index].clear(); + + //update false positive list + listSize = fpl.size(); + for (int i = 0; i < listSize && !fpl.isEmpty(); i++) { + removeKey(fpl.get(0), fpVector); + } + + fpl.clear(); + fpVector[index].clear(); + + //update ratio + ratio[index] = 0.0; + + //update bit vector + bits.clear(index); + } + + /** + * Removes a given key from this filer. + * @param k The key to remove. + * @param vector The counting vector associated to the key. + */ + private void removeKey(Key k, List[] vector) { + if (k == null) { + throw new NullPointerException("Key can not be null"); + } + if (vector == null) { + throw new NullPointerException("ArrayList[] can not be null"); + } + + int[] h = hash.hash(k); + hash.clear(); + + for (int i = 0; i < nbHash; i++) { + vector[h[i]].remove(k); + } + } + + /** + * Computes the ratio A/FP. + */ + private void computeRatio() { + for (int i = 0; i < vectorSize; i++) { + double keyWeight = getWeight(keyVector[i]); + double fpWeight = getWeight(fpVector[i]); + + if (keyWeight > 0 && fpWeight > 0) { + ratio[i] = keyWeight / fpWeight; + } + } + } + + private double getWeight(List keyList) { + double weight = 0.0; + for (Key k : keyList) { + weight += k.getWeight(); + } + return weight; + } + + /** + * Creates and initialises the various vectors. + */ + @SuppressWarnings("unchecked") + private void createVector() { + fpVector = new List[vectorSize]; + keyVector = new List[vectorSize]; + ratio = new double[vectorSize]; + + for (int i = 0; i < vectorSize; i++) { + fpVector[i] = Collections.synchronizedList(new ArrayList()); + keyVector[i] = Collections.synchronizedList(new ArrayList()); + ratio[i] = 0.0; + } + } + + // Writable + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + for (int i = 0; i < fpVector.length; i++) { + List list = fpVector[i]; + out.writeInt(list.size()); + for (Key k : list) { + k.write(out); + } + } + for (int i = 0; i < keyVector.length; i++) { + List list = keyVector[i]; + out.writeInt(list.size()); + for (Key k : list) { + k.write(out); + } + } + for (int i = 0; i < ratio.length; i++) { + out.writeDouble(ratio[i]); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + createVector(); + for (int i = 0; i < fpVector.length; i++) { + List list = fpVector[i]; + int size = in.readInt(); + for (int j = 0; j < size; j++) { + Key k = new Key(); + k.readFields(in); + list.add(k); + } + } + for (int i = 0; i < keyVector.length; i++) { + List list = keyVector[i]; + int size = in.readInt(); + for (int j = 0; j < size; j++) { + Key k = new Key(); + k.readFields(in); + list.add(k); + } + } + for (int i = 0; i < ratio.length; i++) { + ratio[i] = in.readDouble(); + } + } +} diff --git a/src/java/org/apache/hadoop/util/hash/Hash.java b/src/java/org/apache/hadoop/util/hash/Hash.java new file mode 100644 index 00000000000..6d3eb4ac4d4 --- /dev/null +++ b/src/java/org/apache/hadoop/util/hash/Hash.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.hash; + +import org.apache.hadoop.conf.Configuration; + +/** + * This class represents a common API for hashing functions. + */ +public abstract class Hash { + /** Constant to denote invalid hash type. */ + public static final int INVALID_HASH = -1; + /** Constant to denote {@link JenkinsHash}. */ + public static final int JENKINS_HASH = 0; + /** Constant to denote {@link MurmurHash}. */ + public static final int MURMUR_HASH = 1; + + /** + * This utility method converts String representation of hash function name + * to a symbolic constant. Currently two function types are supported, + * "jenkins" and "murmur". + * @param name hash function name + * @return one of the predefined constants + */ + public static int parseHashType(String name) { + if ("jenkins".equalsIgnoreCase(name)) { + return JENKINS_HASH; + } else if ("murmur".equalsIgnoreCase(name)) { + return MURMUR_HASH; + } else { + return INVALID_HASH; + } + } + + /** + * This utility method converts the name of the configured + * hash type to a symbolic constant. + * @param conf configuration + * @return one of the predefined constants + */ + public static int getHashType(Configuration conf) { + String name = conf.get("hadoop.util.hash.type", "murmur"); + return parseHashType(name); + } + + /** + * Get a singleton instance of hash function of a given type. + * @param type predefined hash type + * @return hash function instance, or null if type is invalid + */ + public static Hash getInstance(int type) { + switch(type) { + case JENKINS_HASH: + return JenkinsHash.getInstance(); + case MURMUR_HASH: + return MurmurHash.getInstance(); + default: + return null; + } + } + + /** + * Get a singleton instance of hash function of a type + * defined in the configuration. + * @param conf current configuration + * @return defined hash type, or null if type is invalid + */ + public static Hash getInstance(Configuration conf) { + int type = getHashType(conf); + return getInstance(type); + } + + /** + * Calculate a hash using all bytes from the input argument, and + * a seed of -1. + * @param bytes input bytes + * @return hash value + */ + public int hash(byte[] bytes) { + return hash(bytes, bytes.length, -1); + } + + /** + * Calculate a hash using all bytes from the input argument, + * and a provided seed value. + * @param bytes input bytes + * @param initval seed value + * @return hash value + */ + public int hash(byte[] bytes, int initval) { + return hash(bytes, bytes.length, initval); + } + + /** + * Calculate a hash using bytes from 0 to length, and + * the provided seed value + * @param bytes input bytes + * @param length length of the valid bytes to consider + * @param initval seed value + * @return hash value + */ + public abstract int hash(byte[] bytes, int length, int initval); +} diff --git a/src/java/org/apache/hadoop/util/hash/JenkinsHash.java b/src/java/org/apache/hadoop/util/hash/JenkinsHash.java new file mode 100644 index 00000000000..89fd6cb02e2 --- /dev/null +++ b/src/java/org/apache/hadoop/util/hash/JenkinsHash.java @@ -0,0 +1,258 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.hash; + +import java.io.FileInputStream; +import java.io.IOException; + +/** + * Produces 32-bit hash for hash table lookup. + * + *

lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * You can use this free for any purpose.  It's in the public domain.
+ * It has no warranty.
+ * 
+ * + * @see lookup3.c + * @see Hash Functions (and how this + * function compares to others such as CRC, MD?, etc + * @see Has update on the + * Dr. Dobbs Article + */ +public class JenkinsHash extends Hash { + private static long INT_MASK = 0x00000000ffffffffL; + private static long BYTE_MASK = 0x00000000000000ffL; + + private static JenkinsHash _instance = new JenkinsHash(); + + public static Hash getInstance() { + return _instance; + } + + private static long rot(long val, int pos) { + return ((Integer.rotateLeft( + (int)(val & INT_MASK), pos)) & INT_MASK); + } + + /** + * taken from hashlittle() -- hash a variable-length key into a 32-bit value + * + * @param key the key (the unaligned variable-length array of bytes) + * @param nbytes number of bytes to include in hash + * @param initval can be any integer value + * @return a 32-bit value. Every bit of the key affects every bit of the + * return value. Two keys differing by one or two bits will have totally + * different hash values. + * + *

The best hash table sizes are powers of 2. There is no need to do mod + * a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. + * For example, if you need only 10 bits, do + * h = (h & hashmask(10)); + * In which case, the hash table should have hashsize(10) elements. + * + *

If you are hashing n strings byte[][] k, do it like this: + * for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h); + * + *

By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this + * code any way you wish, private, educational, or commercial. It's free. + * + *

Use for hash table lookup, or anything where one collision in 2^^32 is + * acceptable. Do NOT use for cryptographic purposes. + */ + @SuppressWarnings("fallthrough") + public int hash(byte[] key, int nbytes, int initval) { + int length = nbytes; + long a, b, c; // We use longs because we don't have unsigned ints + a = b = c = (0x00000000deadbeefL + length + initval) & INT_MASK; + int offset = 0; + for (; length > 12; offset += 12, length -= 12) { + a = (a + (key[offset + 0] & BYTE_MASK)) & INT_MASK; + a = (a + (((key[offset + 1] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + a = (a + (((key[offset + 2] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + a = (a + (((key[offset + 3] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + b = (b + (key[offset + 4] & BYTE_MASK)) & INT_MASK; + b = (b + (((key[offset + 5] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + b = (b + (((key[offset + 6] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + b = (b + (((key[offset + 7] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + c = (c + (key[offset + 8] & BYTE_MASK)) & INT_MASK; + c = (c + (((key[offset + 9] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + c = (c + (((key[offset + 10] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + c = (c + (((key[offset + 11] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + + /* + * mix -- mix 3 32-bit values reversibly. + * This is reversible, so any information in (a,b,c) before mix() is + * still in (a,b,c) after mix(). + * + * If four pairs of (a,b,c) inputs are run through mix(), or through + * mix() in reverse, there are at least 32 bits of the output that + * are sometimes the same for one pair and different for another pair. + * + * This was tested for: + * - pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * - the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that + * satisfy this are + * 4 6 8 16 19 4 + * 9 15 3 18 27 15 + * 14 9 3 7 17 3 + * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for + * "differ" defined as + with a one-bit base and a two-bit delta. I + * used http://burtleburtle.net/bob/hash/avalanche.html to choose + * the operations, constants, and arrangements of the variables. + * + * This does not achieve avalanche. There are input bits of (a,b,c) + * that fail to affect some output bits of (a,b,c), especially of a. + * The most thoroughly mixed value is c, but it doesn't really even + * achieve avalanche in c. + * + * This allows some parallelism. Read-after-writes are good at doubling + * the number of bits affected, so the goal of mixing pulls in the + * opposite direction as the goal of parallelism. I did what I could. + * Rotates seem to cost as much as shifts on every machine I could lay + * my hands on, and rotates are much kinder to the top and bottom bits, + * so I used rotates. + * + * #define mix(a,b,c) \ + * { \ + * a -= c; a ^= rot(c, 4); c += b; \ + * b -= a; b ^= rot(a, 6); a += c; \ + * c -= b; c ^= rot(b, 8); b += a; \ + * a -= c; a ^= rot(c,16); c += b; \ + * b -= a; b ^= rot(a,19); a += c; \ + * c -= b; c ^= rot(b, 4); b += a; \ + * } + * + * mix(a,b,c); + */ + a = (a - c) & INT_MASK; a ^= rot(c, 4); c = (c + b) & INT_MASK; + b = (b - a) & INT_MASK; b ^= rot(a, 6); a = (a + c) & INT_MASK; + c = (c - b) & INT_MASK; c ^= rot(b, 8); b = (b + a) & INT_MASK; + a = (a - c) & INT_MASK; a ^= rot(c,16); c = (c + b) & INT_MASK; + b = (b - a) & INT_MASK; b ^= rot(a,19); a = (a + c) & INT_MASK; + c = (c - b) & INT_MASK; c ^= rot(b, 4); b = (b + a) & INT_MASK; + } + + //-------------------------------- last block: affect all 32 bits of (c) + switch (length) { // all the case statements fall through + case 12: + c = (c + (((key[offset + 11] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + case 11: + c = (c + (((key[offset + 10] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + case 10: + c = (c + (((key[offset + 9] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + case 9: + c = (c + (key[offset + 8] & BYTE_MASK)) & INT_MASK; + case 8: + b = (b + (((key[offset + 7] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + case 7: + b = (b + (((key[offset + 6] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + case 6: + b = (b + (((key[offset + 5] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + case 5: + b = (b + (key[offset + 4] & BYTE_MASK)) & INT_MASK; + case 4: + a = (a + (((key[offset + 3] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + case 3: + a = (a + (((key[offset + 2] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + case 2: + a = (a + (((key[offset + 1] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + case 1: + a = (a + (key[offset + 0] & BYTE_MASK)) & INT_MASK; + break; + case 0: + return (int)(c & INT_MASK); + } + /* + * final -- final mixing of 3 32-bit values (a,b,c) into c + * + * Pairs of (a,b,c) values differing in only a few bits will usually + * produce values of c that look totally different. This was tested for + * - pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * + * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * + * - the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * These constants passed: + * 14 11 25 16 4 14 24 + * 12 14 25 16 4 14 24 + * and these came close: + * 4 8 15 26 3 22 24 + * 10 8 15 26 3 22 24 + * 11 8 15 26 3 22 24 + * + * #define final(a,b,c) \ + * { + * c ^= b; c -= rot(b,14); \ + * a ^= c; a -= rot(c,11); \ + * b ^= a; b -= rot(a,25); \ + * c ^= b; c -= rot(b,16); \ + * a ^= c; a -= rot(c,4); \ + * b ^= a; b -= rot(a,14); \ + * c ^= b; c -= rot(b,24); \ + * } + * + */ + c ^= b; c = (c - rot(b,14)) & INT_MASK; + a ^= c; a = (a - rot(c,11)) & INT_MASK; + b ^= a; b = (b - rot(a,25)) & INT_MASK; + c ^= b; c = (c - rot(b,16)) & INT_MASK; + a ^= c; a = (a - rot(c,4)) & INT_MASK; + b ^= a; b = (b - rot(a,14)) & INT_MASK; + c ^= b; c = (c - rot(b,24)) & INT_MASK; + + return (int)(c & INT_MASK); + } + + /** + * Compute the hash of the specified file + * @param args name of file to compute hash of. + * @throws IOException + */ + public static void main(String[] args) throws IOException { + if (args.length != 1) { + System.err.println("Usage: JenkinsHash filename"); + System.exit(-1); + } + FileInputStream in = new FileInputStream(args[0]); + byte[] bytes = new byte[512]; + int value = 0; + JenkinsHash hash = new JenkinsHash(); + for (int length = in.read(bytes); length > 0 ; length = in.read(bytes)) { + value = hash.hash(bytes, length, value); + } + System.out.println(Math.abs(value)); + } +} diff --git a/src/java/org/apache/hadoop/util/hash/MurmurHash.java b/src/java/org/apache/hadoop/util/hash/MurmurHash.java new file mode 100644 index 00000000000..09e311d0681 --- /dev/null +++ b/src/java/org/apache/hadoop/util/hash/MurmurHash.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.hash; + +/** + * This is a very fast, non-cryptographic hash suitable for general hash-based + * lookup. See http://murmurhash.googlepages.com/ for more details. + * + *

The C version of MurmurHash 2.0 found at that site was ported + * to Java by Andrzej Bialecki (ab at getopt org).

+ */ +public class MurmurHash extends Hash { + private static MurmurHash _instance = new MurmurHash(); + + public static Hash getInstance() { + return _instance; + } + + public int hash(byte[] data, int length, int seed) { + int m = 0x5bd1e995; + int r = 24; + + int h = seed ^ length; + + int len_4 = length >> 2; + + for (int i = 0; i < len_4; i++) { + int i_4 = i << 2; + int k = data[i_4 + 3]; + k = k << 8; + k = k | (data[i_4 + 2] & 0xff); + k = k << 8; + k = k | (data[i_4 + 1] & 0xff); + k = k << 8; + k = k | (data[i_4 + 0] & 0xff); + k *= m; + k ^= k >>> r; + k *= m; + h *= m; + h ^= k; + } + + // avoid calculating modulo + int len_m = len_4 << 2; + int left = length - len_m; + + if (left != 0) { + if (left >= 3) { + h ^= (int) data[length - 3] << 16; + } + if (left >= 2) { + h ^= (int) data[length - 2] << 8; + } + if (left >= 1) { + h ^= (int) data[length - 1]; + } + + h *= m; + } + + h ^= h >>> 13; + h *= m; + h ^= h >>> 15; + + return h; + } +} diff --git a/src/java/org/apache/hadoop/util/package.html b/src/java/org/apache/hadoop/util/package.html new file mode 100644 index 00000000000..e6512f1e437 --- /dev/null +++ b/src/java/org/apache/hadoop/util/package.html @@ -0,0 +1,23 @@ + + + + + +Common utilities. + + diff --git a/src/java/overview.html b/src/java/overview.html new file mode 100644 index 00000000000..736da78aa1f --- /dev/null +++ b/src/java/overview.html @@ -0,0 +1,292 @@ + + + + + Hadoop + + + +Hadoop is a distributed computing platform. + +

Hadoop primarily consists of the Hadoop Distributed FileSystem +(HDFS) and an +implementation of the +Map-Reduce programming paradigm.

+ + +

Hadoop is a software framework that lets one easily write and run applications +that process vast amounts of data. Here's what makes Hadoop especially useful:

+
    +
  • + Scalable: Hadoop can reliably store and process petabytes. +
  • +
  • + Economical: It distributes the data and processing across clusters + of commonly available computers. These clusters can number into the thousands + of nodes. +
  • +
  • + Efficient: By distributing the data, Hadoop can process it in parallel + on the nodes where the data is located. This makes it extremely rapid. +
  • +
  • + Reliable: Hadoop automatically maintains multiple copies of data and + automatically redeploys computing tasks based on failures. +
  • +
+ +

Requirements

+ +

Platforms

+ +
    +
  • + Hadoop was been demonstrated on GNU/Linux clusters with 2000 nodes. +
  • +
  • + Win32 is supported as a development platform. Distributed operation + has not been well tested on Win32, so this is not a production + platform. +
  • +
+ +

Requisite Software

+ +
    +
  1. + Java 1.6.x, preferably from + Sun. + Set JAVA_HOME to the root of your Java installation. +
  2. +
  3. + ssh must be installed and sshd must be running to use Hadoop's + scripts to manage remote Hadoop daemons. +
  4. +
  5. + rsync may be installed to use Hadoop's scripts to manage remote + Hadoop installations. +
  6. +
+ +

Additional requirements for Windows

+ +
    +
  1. + Cygwin - Required for shell support in + addition to the required software above. +
  2. +
+ +

Installing Required Software

+ +

If your platform does not have the required software listed above, you +will have to install it.

+ +

For example on Ubuntu Linux:

+

+$ sudo apt-get install ssh
+$ sudo apt-get install rsync
+

+ +

On Windows, if you did not install the required software when you +installed cygwin, start the cygwin installer and select the packages:

+
    +
  • openssh - the "Net" category
  • +
  • rsync - the "Net" category
  • +
+ +

Getting Started

+ +

First, you need to get a copy of the Hadoop code.

+ +

Edit the file conf/hadoop-env.sh to define at least +JAVA_HOME.

+ +

Try the following command:

+bin/hadoop +

This will display the documentation for the Hadoop command script.

+ +

Standalone operation

+ +

By default, Hadoop is configured to run things in a non-distributed +mode, as a single Java process. This is useful for debugging, and can +be demonstrated as follows:

+ +mkdir input
+cp conf/*.xml input
+bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
+cat output/* +
+

This will display counts for each match of the +regular expression.

+ +

Note that input is specified as a directory containing input +files and that output is also specified as a directory where parts are +written.

+ +

Distributed operation

+ +To configure Hadoop for distributed operation you must specify the +following: + +
    + +
  1. The NameNode (Distributed Filesystem master) host. This is +specified with the configuration property fs.default.name. +
  2. + +
  3. The {@link org.apache.hadoop.mapred.JobTracker} (MapReduce master) +host and port. This is specified with the configuration property +mapred.job.tracker. +
  4. + +
  5. A slaves file that lists the names of all the hosts in +the cluster. The default slaves file is conf/slaves. + +
+ +

Pseudo-distributed configuration

+ +You can in fact run everything on a single host. To run things this +way, put the following in: +
+
+conf/core-site.xml: +<configuration> + + <property> + <name>fs.default.name</name> + <value>hdfs://localhost/</value> + </property> + +</configuration> + +conf/hdfs-site.xml: +<configuration> + + <property> + <name>dfs.replication</name> + <value>1</value> + </property> + +</configuration> + +conf/mapred-site.xml: +<configuration> + + <property> + <name>mapred.job.tracker</name> + <value>localhost:9001</value> + </property> + +</configuration> + +

(We also set the HDFS replication level to 1 in order to +reduce warnings when running on a single node.)

+ +

Now check that the command
ssh localhost
does not +require a password. If it does, execute the following commands:

+ +

ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
+cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys +

+ +

Bootstrapping

+ +

A new distributed filesystem must be formatted with the following +command, run on the master node:

+ +

bin/hadoop namenode -format

+ +

The Hadoop daemons are started with the following command:

+ +

bin/start-all.sh

+ +

Daemon log output is written to the logs/ directory.

+ +

Input files are copied into the distributed filesystem as follows:

+ +

bin/hadoop fs -put input input

+ +

Distributed execution

+ +

Things are run as before, but output must be copied locally to +examine it:

+ + +bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
+bin/hadoop fs -get output output +cat output/* +
+ +

When you're done, stop the daemons with:

+ +

bin/stop-all.sh

+ +

Fully-distributed operation

+ +

Fully distributed operation is just like the pseudo-distributed operation +described above, except, specify:

+ +
    + +
  1. The hostname or IP address of your master server in the value +for fs.default.name, + as hdfs://master.example.com/ in conf/core-site.xml.
  2. + +
  3. The host and port of the your master server in the value +of mapred.job.tracker +as master.example.com:port in conf/mapred-site.xml.
  4. + +
  5. Directories for dfs.name.dir and +dfs.data.dir +in conf/hdfs-site.xml. +These are local directories used to hold distributed filesystem +data on the master node and slave nodes respectively. Note +that dfs.data.dir may contain a space- or comma-separated +list of directory names, so that data may be stored on multiple local +devices.
  6. + +
  7. mapred.local.dir + in conf/mapred-site.xml, the local directory where temporary + MapReduce data is stored. It also may be a list of directories.
  8. + +
  9. mapred.map.tasks +and mapred.reduce.tasks +in conf/mapred-site.xml. +As a rule of thumb, use 10x the +number of slave processors for mapred.map.tasks, and 2x the +number of slave processors for mapred.reduce.tasks.
  10. + +
+ +

Finally, list all slave hostnames or IP addresses in your +conf/slaves file, one per line. Then format your filesystem +and start your cluster on your master node, as above. + + + +