HDFS-10740: libhdfs++: Implement recursive directory generator. Contributed by Anatoli Shein.

This commit is contained in:
James 2016-08-12 11:44:40 -04:00 committed by James Clampffer
parent 2a8edd4e52
commit cbb3e9fe58
3 changed files with 202 additions and 0 deletions

View File

@ -17,3 +17,4 @@
#
add_subdirectory(cat)
add_subdirectory(gendirs)

View File

@ -0,0 +1,35 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Default LIBHDFSPP_DIR to the default install location. You can override
# it by add -DLIBHDFSPP_DIR=... to your cmake invocation
set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
include_directories( ${LIBHDFSPP_DIR}/include )
link_directories( ${LIBHDFSPP_DIR}/lib )
add_executable(gendirs_cpp gendirs.cpp)
target_link_libraries(gendirs_cpp hdfspp)
# Several examples in different languages need to produce executables with
# same names. To allow executables with same names we keep their CMake
# names different, but specify their executable names as follows:
set_target_properties( gendirs_cpp
PROPERTIES
OUTPUT_NAME "gendirs"
)

View File

@ -0,0 +1,166 @@
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/
/**
* A recursive directory generator tool.
*
* Generates a directory tree with specified depth and fanout starting from
* a given path. Generation is asynchronous.
*
* Usage: gendirs [hdfs://[<hostname>:<port>]]/<path-to-dir> <depth> <fanout>
*
* Example: gendirs hdfs://localhost.localdomain:9433/dir0 3 10
*
* @param path-to-dir Absolute path to the directory tree root where the
* directory tree will be generated
* @param depth Depth of the directory tree (number of levels from
* root to leaves)
* @param fanout Fanout of each directory (number of sub-directories to
* be created inside each directory except leaf directories)
*
**/
#include "hdfspp/hdfspp.h"
#include "fs/namenode_operations.h"
#include "common/hdfs_configuration.h"
#include "common/configuration_loader.h"
#include "common/uri.h"
#include <google/protobuf/io/coded_stream.h>
#include <future>
using namespace std;
using namespace hdfs;
#define SCHEME "hdfs"
void GenerateDirectories (shared_ptr<FileSystem> fs, int depth, int level, int fanout, string path, vector<future<Status>> & futures) {
//Level contains our current depth in the directory tree
if(level < depth) {
for(int i = 0; i < fanout; i++){
//Recursive calls to cover all possible paths from the root to the leave nodes
GenerateDirectories(fs, depth, level+1, fanout, path + "dir" + to_string(i) + "/", futures);
}
} else {
//We have reached the leaf nodes and now start making calls to create directories
//We make a promise which will be set when the call finishes and executes our handler
auto callstate = make_shared<promise<Status>>();
//Extract a future from this promise
future<Status> future(callstate->get_future());
//Save this future to the vector of futures which will be used to wait on all promises
//after the whole recursion is done
futures.push_back(move(future));
//Create a handler that will be executed when Mkdirs is done
auto handler = [callstate](const Status &s) {
callstate->set_value(s);
};
//Asynchronous call to create this directory along with all missing parent directories
fs->Mkdirs(path, NameNodeOperations::GetDefaultPermissionMask(), true, handler);
}
}
int main(int argc, char *argv[]) {
if (argc != 4) {
cerr << "usage: gendirs [hdfs://[<hostname>:<port>]]/<path-to-dir> <depth> <fanout>" << endl;
return 1;
}
optional<URI> uri;
const string uri_path = argv[1];
const int depth = stoi(argv[2]);
const int fanout = stoi(argv[3]);
//Separate check for scheme is required, otherwise common/uri.h library causes memory issues under valgrind
size_t scheme_end = uri_path.find("://");
if (scheme_end != string::npos) {
if(uri_path.substr(0, string(SCHEME).size()).compare(SCHEME) != 0) {
cerr << "Scheme " << uri_path.substr(0, scheme_end) << ":// is not supported" << endl;
return 1;
} else {
uri = URI::parse_from_string(uri_path);
}
}
if (!uri) {
cerr << "Malformed URI: " << uri_path << endl;
return 1;
}
ConfigurationLoader loader;
optional<HdfsConfiguration> config = loader.LoadDefaultResources<HdfsConfiguration>();
const char * envHadoopConfDir = getenv("HADOOP_CONF_DIR");
if (envHadoopConfDir && (*envHadoopConfDir != 0) ) {
config = loader.OverlayResourceFile(*config, string(envHadoopConfDir) + "/core-site.xml");
}
Options options;
options.rpc_timeout = numeric_limits<int>::max();
if(config){
options = config->GetOptions();
}
IoService * io_service = IoService::New();
FileSystem *fs_raw = FileSystem::New(io_service, "", options);
if (!fs_raw) {
cerr << "Could not create FileSystem object" << endl;
return 1;
}
//Wrapping fs_raw into a unique pointer to guarantee deletion
shared_ptr<FileSystem> fs(fs_raw);
//Get port from the uri, otherwise use the default port
string port = to_string(uri->get_port().value_or(8020));
Status stat = fs->Connect(uri->get_host(), port);
if (!stat.ok()) {
cerr << "Could not connect to " << uri->get_host() << ":" << port << endl;
return 1;
}
/**
* We do not want the recursion to block on anything, therefore we will be
* making asynchronous calls recursively, and then just waiting for all
* the calls to finish.
*
* This array of futures will be populated by the recursive function below.
* Each new asynchronous Mkdirs call will add a future to this vector, and will
* create a promise, which will only be set when the call was completed and
* processed. After the whole recursion is complete we will need to wait until
* all promises are set before we can exit.
**/
vector<future<Status>> futures;
GenerateDirectories(fs, depth, 0, fanout, uri->get_path() + "/", futures);
/**
* We are waiting here until all promises are set, and checking whether
* the returned statuses contained any errors.
**/
for(future<Status> &fs : futures){
Status stat = fs.get();
if (!stat.ok()) {
cerr << "Error: " << stat.ToString() << endl;
}
}
cout << "All done!" << endl;
// Clean up static data and prevent valgrind memory leaks
google::protobuf::ShutdownProtobufLibrary();
return 0;
}