diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 3d84323a4df..47fc93078cb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -143,6 +143,8 @@ Release 2.0.1-alpha - UNRELEASED HDFS-3606. libhdfs: create self-contained unit test. (Colin Patrick McCabe via eli) + HDFS-3539. libhdfs code cleanups. (Colin Patrick McCabe via eli) + OPTIMIZATIONS HDFS-2982. Startup performance suffers when there are many edit log diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_connect.c b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_connect.c index 9a22ac0f4f1..2b3a38f0a51 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_connect.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_connect.c @@ -23,6 +23,8 @@ #include #include +#include +#include #define HADOOP_SECURITY_AUTHENTICATION "hadoop.security.authentication" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_dfs.c b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_dfs.c index 3b19f45c0b2..9cb9075fc54 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_dfs.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_dfs.c @@ -22,6 +22,9 @@ #include "fuse_init.h" #include "fuse_connect.h" +#include +#include + int is_protected(const char *path) { dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_impls_chown.c b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_impls_chown.c index 53e48b4f7ec..c0b5bfb72ba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_impls_chown.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_impls_chown.c @@ -21,7 +21,9 @@ #include "fuse_impls.h" #include "fuse_connect.h" - int dfs_chown(const char *path, uid_t uid, gid_t gid) +#include + +int dfs_chown(const char *path, uid_t uid, gid_t gid) { TRACE1("chown", path) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_impls_release.c b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_impls_release.c index 2019ff528e3..e15dd572d63 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_impls_release.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_impls_release.c @@ -21,6 +21,8 @@ #include "fuse_file_handle.h" #include "fuse_connect.h" +#include + /** * release a fuse_file_info structure. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_init.c b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_init.c index 22c7767755a..878b6f64ba9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_init.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_init.c @@ -16,14 +16,16 @@ * limitations under the License. */ -#include - #include "fuse_dfs.h" #include "fuse_init.h" #include "fuse_options.h" #include "fuse_context_handle.h" #include "fuse_connect.h" +#include +#include +#include + // Hacked up function to basically do: // protectedpaths = split(options.protected,':'); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_options.c b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_options.c index 4264f0188c7..30c03991009 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_options.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/fuse-dfs/src/fuse_options.c @@ -16,11 +16,12 @@ * limitations under the License. */ +#include "fuse_context_handle.h" #include "fuse_dfs.h" #include "fuse_options.h" -#include -#include "fuse_context_handle.h" +#include +#include void print_options() { printf("options:\n" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs.c b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs.c index 04c4af7d27c..8ded7bfe760 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs.c @@ -19,6 +19,8 @@ #include "hdfs.h" #include "hdfsJniHelper.h" +#include +#include /* Some frequently used Java paths */ #define HADOOP_CONF "org/apache/hadoop/conf/Configuration" @@ -47,6 +49,50 @@ #define KERBEROS_TICKET_CACHE_PATH "hadoop.security.kerberos.ticket.cache.path" +// Bit fields for hdfsFile_internal flags +#define HDFS_FILE_SUPPORTS_DIRECT_READ (1<<0) + +tSize readDirect(hdfsFS fs, hdfsFile f, void* buffer, tSize length); + +/** + * The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream . + */ +enum hdfsStreamType +{ + UNINITIALIZED = 0, + INPUT = 1, + OUTPUT = 2, +}; + +/** + * The 'file-handle' to a file in hdfs. + */ +struct hdfsFile_internal { + void* file; + enum hdfsStreamType type; + int flags; +}; + +int hdfsFileIsOpenForRead(hdfsFile file) +{ + return (file->type == INPUT); +} + +int hdfsFileIsOpenForWrite(hdfsFile file) +{ + return (file->type == OUTPUT); +} + +int hdfsFileUsesDirectRead(hdfsFile file) +{ + return !!(file->flags & HDFS_FILE_SUPPORTS_DIRECT_READ); +} + +void hdfsFileDisableDirectRead(hdfsFile file) +{ + file->flags &= ~HDFS_FILE_SUPPORTS_DIRECT_READ; +} + /** * hdfsJniEnv: A wrapper struct to be used as 'value' * while saving thread -> JNIEnv* mappings @@ -2182,7 +2228,7 @@ getFileInfoFromStat(JNIEnv *env, jobject jStat, hdfsFileInfo *fileInfo) "FileStatus::getModificationTime"); return -1; } - fileInfo->mLastMod = (tTime) (jVal.j / 1000); + fileInfo->mLastMod = jVal.j / 1000; if (invokeMethod(env, &jVal, &jExc, INSTANCE, jStat, HADOOP_STAT, "getAccessTime", "()J") != 0) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs.h b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs.h index c69c4cd5d58..d5cef6e84a9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs.h +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs.h @@ -19,18 +19,10 @@ #ifndef LIBHDFS_HDFS_H #define LIBHDFS_HDFS_H -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include +#include /* for EINTERNAL, etc. */ +#include /* for O_RDONLY, O_WRONLY */ +#include /* for uint64_t, etc. */ +#include /* for time_t */ #ifndef O_RDONLY #define O_RDONLY 1 @@ -46,10 +38,10 @@ /** All APIs set errno to meaningful values */ + #ifdef __cplusplus extern "C" { #endif - /** * Some utility decls used in libhdfs. */ @@ -67,33 +59,37 @@ extern "C" { /** * The C reflection of org.apache.org.hadoop.FileSystem . */ - typedef void* hdfsFS; - + struct hdfs_internal; + typedef struct hdfs_internal* hdfsFS; - /** - * The C equivalent of org.apache.org.hadoop.FSData(Input|Output)Stream . - */ - enum hdfsStreamType - { - UNINITIALIZED = 0, - INPUT = 1, - OUTPUT = 2, - }; - - - // Bit fields for hdfsFile_internal flags - #define HDFS_FILE_SUPPORTS_DIRECT_READ (1<<0) - - /** - * The 'file-handle' to a file in hdfs. - */ - struct hdfsFile_internal { - void* file; - enum hdfsStreamType type; - uint32_t flags; - }; + struct hdfsFile_internal; typedef struct hdfsFile_internal* hdfsFile; - + + /** + * Determine if a file is open for read. + * + * @param file The HDFS file + * @return 1 if the file is open for read; 0 otherwise + */ + int hdfsFileIsOpenForRead(hdfsFile file); + + /** + * Determine if a file is open for write. + * + * @param file The HDFS file + * @return 1 if the file is open for write; 0 otherwise + */ + int hdfsFileIsOpenForWrite(hdfsFile file); + + /** + * Disable the direct read optimization for a file. + * + * This is mainly provided for unit testing purposes. + * + * @param file The HDFS file + */ + void hdfsFileDisableDirectRead(hdfsFile file); + /** * hdfsConnectAsUser - Connect to a hdfs file system as a specific user * Connect to the hdfs. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_read.c b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_read.c index 5a83b7ecc26..423f703e1cc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_read.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_read.c @@ -18,6 +18,9 @@ #include "hdfs.h" +#include +#include + int main(int argc, char **argv) { if (argc != 4) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_test.c b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_test.c index 21a4f8190e9..c2a0cbd218f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_test.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_test.c @@ -17,8 +17,15 @@ */ #include "hdfs.h" +#include "hdfs_test.h" -tSize readDirect(hdfsFS fs, hdfsFile f, void* buffer, tSize length); +#include +#include +#include +#include +#include +#include +#include void permission_disp(short permissions, char *rtr) { rtr[9] = '\0'; @@ -53,6 +60,9 @@ void permission_disp(short permissions, char *rtr) { } int main(int argc, char **argv) { + char buffer[32]; + tSize num_written_bytes; + hdfsFS fs = hdfsConnectNewInstance("default", 0); if(!fs) { fprintf(stderr, "Oops! Failed to connect to hdfs!\n"); @@ -77,7 +87,7 @@ int main(int argc, char **argv) { exit(-1); } fprintf(stderr, "Opened %s for writing successfully...\n", writePath); - tSize num_written_bytes = + num_written_bytes = hdfsWrite(fs, writeFile, (void*)fileContents, strlen(fileContents)+1); if (num_written_bytes != strlen(fileContents) + 1) { fprintf(stderr, "Failed to write correct number of bytes - expected %d, got %d\n", @@ -127,6 +137,13 @@ int main(int argc, char **argv) { exit(-1); } + if (!hdfsFileIsOpenForRead(readFile)) { + fprintf(stderr, "hdfsFileIsOpenForRead: we just opened a file " + "with O_RDONLY, and it did not show up as 'open for " + "read'\n"); + exit(-1); + } + fprintf(stderr, "hdfsAvailable: %d\n", hdfsAvailable(fs, readFile)); tOffset seekPos = 1; @@ -144,7 +161,7 @@ int main(int argc, char **argv) { } fprintf(stderr, "Current position: %ld\n", currentPos); - if ((readFile->flags & HDFS_FILE_SUPPORTS_DIRECT_READ) == 0) { + if (!hdfsFileUsesDirectRead(readFile)) { fprintf(stderr, "Direct read support incorrectly not detected " "for HDFS filesystem\n"); exit(-1); @@ -152,11 +169,31 @@ int main(int argc, char **argv) { fprintf(stderr, "Direct read support detected for HDFS\n"); - // Clear flags so that we really go through slow read path - readFile->flags &= ~HDFS_FILE_SUPPORTS_DIRECT_READ; + // Test the direct read path + if(hdfsSeek(fs, readFile, 0)) { + fprintf(stderr, "Failed to seek %s for reading!\n", readPath); + exit(-1); + } + memset(buffer, 0, sizeof(buffer)); + tSize num_read_bytes = hdfsRead(fs, readFile, (void*)buffer, + sizeof(buffer)); + if (strncmp(fileContents, buffer, strlen(fileContents)) != 0) { + fprintf(stderr, "Failed to read (direct). Expected %s but got %s (%d bytes)\n", + fileContents, buffer, num_read_bytes); + exit(-1); + } + fprintf(stderr, "Read (direct) following %d bytes:\n%s\n", + num_read_bytes, buffer); + if (hdfsSeek(fs, readFile, 0L)) { + fprintf(stderr, "Failed to seek to file start!\n"); + exit(-1); + } - static char buffer[32]; - tSize num_read_bytes = hdfsRead(fs, readFile, (void*)buffer, + // Disable the direct read path so that we really go through the slow + // read path + hdfsFileDisableDirectRead(readFile); + + num_read_bytes = hdfsRead(fs, readFile, (void*)buffer, sizeof(buffer)); fprintf(stderr, "Read following %d bytes:\n%s\n", num_read_bytes, buffer); @@ -168,24 +205,6 @@ int main(int argc, char **argv) { fprintf(stderr, "Read following %d bytes:\n%s\n", num_read_bytes, buffer); - if (hdfsSeek(fs, readFile, 0L)) { - fprintf(stderr, - "Failed to seek to file start for direct read test!\n"); - exit(-1); - } - - readFile->flags |= HDFS_FILE_SUPPORTS_DIRECT_READ; - - memset(buffer, 0, strlen(fileContents + 1)); - num_read_bytes = hdfsRead(fs, readFile, (void*)buffer, - sizeof(buffer)); - if (strncmp(fileContents, buffer, strlen(fileContents)) != 0) { - fprintf(stderr, "Failed to read (direct). Expected %s but got %s (%d bytes)\n", - fileContents, buffer, num_read_bytes); - exit(-1); - } - fprintf(stderr, "Read (direct) following %d bytes:\n%s\n", - num_read_bytes, buffer); hdfsCloseFile(fs, readFile); // Test correct behaviour for unsupported filesystems @@ -195,34 +214,18 @@ int main(int argc, char **argv) { exit(-1); } - tSize num_written_bytes = hdfsWrite(lfs, localFile, - (void*)fileContents, - strlen(fileContents) + 1); + num_written_bytes = hdfsWrite(lfs, localFile, (void*)fileContents, + strlen(fileContents) + 1); hdfsCloseFile(lfs, localFile); localFile = hdfsOpenFile(lfs, writePath, O_RDONLY, 0, 0, 0); - if (localFile->flags & HDFS_FILE_SUPPORTS_DIRECT_READ) { + if (hdfsFileUsesDirectRead(localFile)) { fprintf(stderr, "Direct read support incorrectly detected for local " "filesystem\n"); exit(-1); } - memset(buffer, 0, strlen(fileContents + 1)); - int result = readDirect(lfs, localFile, (void*)buffer, sizeof(buffer)); - if (result != -1) { - fprintf(stderr, "Expected error from local direct read not seen!\n"); - exit(-1); - } - - if (errno != ENOTSUP) { - fprintf(stderr, "Error code not correctly set to ENOTSUP, was %d!\n", - errno); - exit(-1); - } - - fprintf(stderr, "Expected exception thrown for unsupported direct read\n"); - hdfsCloseFile(lfs, localFile); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_test.h b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_test.h new file mode 100644 index 00000000000..ec59ba3367b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_test.h @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LIBHDFS_HDFS_TEST_H +#define LIBHDFS_HDFS_TEST_H + +struct hdfs_internal; + +/** + * Some functions that are visible only for testing. + * + * This header is not meant to be exported or used outside of the libhdfs unit + * tests. + */ + +#ifdef __cplusplus +extern "C" { +#endif + /** + * Determine if a file is using the "direct read" optimization. + * + * @param file The HDFS file + * @return 1 if the file is using the direct read optimization, + * 0 otherwise. + */ + int hdfsFileUsesDirectRead(struct hdfs_internal *file); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_write.c b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_write.c index c096f953100..b0f320c525a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_write.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/hdfs_write.c @@ -16,10 +16,12 @@ * limitations under the License. */ -#include - #include "hdfs.h" +#include +#include +#include + int main(int argc, char **argv) { if (argc != 4) {