HDFS-10740: libhdfs++: Implement recursive directory generator. Contributed by Anatoli Shein.
This commit is contained in:
parent
2a8edd4e52
commit
cbb3e9fe58
|
@ -17,3 +17,4 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
add_subdirectory(cat)
|
add_subdirectory(cat)
|
||||||
|
add_subdirectory(gendirs)
|
|
@ -0,0 +1,35 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# Default LIBHDFSPP_DIR to the default install location. You can override
|
||||||
|
# it by add -DLIBHDFSPP_DIR=... to your cmake invocation
|
||||||
|
set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
|
||||||
|
|
||||||
|
include_directories( ${LIBHDFSPP_DIR}/include )
|
||||||
|
link_directories( ${LIBHDFSPP_DIR}/lib )
|
||||||
|
|
||||||
|
add_executable(gendirs_cpp gendirs.cpp)
|
||||||
|
target_link_libraries(gendirs_cpp hdfspp)
|
||||||
|
|
||||||
|
# Several examples in different languages need to produce executables with
|
||||||
|
# same names. To allow executables with same names we keep their CMake
|
||||||
|
# names different, but specify their executable names as follows:
|
||||||
|
set_target_properties( gendirs_cpp
|
||||||
|
PROPERTIES
|
||||||
|
OUTPUT_NAME "gendirs"
|
||||||
|
)
|
|
@ -0,0 +1,166 @@
|
||||||
|
/*
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A recursive directory generator tool.
|
||||||
|
*
|
||||||
|
* Generates a directory tree with specified depth and fanout starting from
|
||||||
|
* a given path. Generation is asynchronous.
|
||||||
|
*
|
||||||
|
* Usage: gendirs [hdfs://[<hostname>:<port>]]/<path-to-dir> <depth> <fanout>
|
||||||
|
*
|
||||||
|
* Example: gendirs hdfs://localhost.localdomain:9433/dir0 3 10
|
||||||
|
*
|
||||||
|
* @param path-to-dir Absolute path to the directory tree root where the
|
||||||
|
* directory tree will be generated
|
||||||
|
* @param depth Depth of the directory tree (number of levels from
|
||||||
|
* root to leaves)
|
||||||
|
* @param fanout Fanout of each directory (number of sub-directories to
|
||||||
|
* be created inside each directory except leaf directories)
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
|
||||||
|
#include "hdfspp/hdfspp.h"
|
||||||
|
#include "fs/namenode_operations.h"
|
||||||
|
#include "common/hdfs_configuration.h"
|
||||||
|
#include "common/configuration_loader.h"
|
||||||
|
#include "common/uri.h"
|
||||||
|
|
||||||
|
#include <google/protobuf/io/coded_stream.h>
|
||||||
|
#include <future>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace hdfs;
|
||||||
|
|
||||||
|
#define SCHEME "hdfs"
|
||||||
|
|
||||||
|
void GenerateDirectories (shared_ptr<FileSystem> fs, int depth, int level, int fanout, string path, vector<future<Status>> & futures) {
|
||||||
|
//Level contains our current depth in the directory tree
|
||||||
|
if(level < depth) {
|
||||||
|
for(int i = 0; i < fanout; i++){
|
||||||
|
//Recursive calls to cover all possible paths from the root to the leave nodes
|
||||||
|
GenerateDirectories(fs, depth, level+1, fanout, path + "dir" + to_string(i) + "/", futures);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//We have reached the leaf nodes and now start making calls to create directories
|
||||||
|
//We make a promise which will be set when the call finishes and executes our handler
|
||||||
|
auto callstate = make_shared<promise<Status>>();
|
||||||
|
//Extract a future from this promise
|
||||||
|
future<Status> future(callstate->get_future());
|
||||||
|
//Save this future to the vector of futures which will be used to wait on all promises
|
||||||
|
//after the whole recursion is done
|
||||||
|
futures.push_back(move(future));
|
||||||
|
//Create a handler that will be executed when Mkdirs is done
|
||||||
|
auto handler = [callstate](const Status &s) {
|
||||||
|
callstate->set_value(s);
|
||||||
|
};
|
||||||
|
//Asynchronous call to create this directory along with all missing parent directories
|
||||||
|
fs->Mkdirs(path, NameNodeOperations::GetDefaultPermissionMask(), true, handler);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
if (argc != 4) {
|
||||||
|
cerr << "usage: gendirs [hdfs://[<hostname>:<port>]]/<path-to-dir> <depth> <fanout>" << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
optional<URI> uri;
|
||||||
|
const string uri_path = argv[1];
|
||||||
|
const int depth = stoi(argv[2]);
|
||||||
|
const int fanout = stoi(argv[3]);
|
||||||
|
|
||||||
|
//Separate check for scheme is required, otherwise common/uri.h library causes memory issues under valgrind
|
||||||
|
size_t scheme_end = uri_path.find("://");
|
||||||
|
if (scheme_end != string::npos) {
|
||||||
|
if(uri_path.substr(0, string(SCHEME).size()).compare(SCHEME) != 0) {
|
||||||
|
cerr << "Scheme " << uri_path.substr(0, scheme_end) << ":// is not supported" << endl;
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
uri = URI::parse_from_string(uri_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!uri) {
|
||||||
|
cerr << "Malformed URI: " << uri_path << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ConfigurationLoader loader;
|
||||||
|
optional<HdfsConfiguration> config = loader.LoadDefaultResources<HdfsConfiguration>();
|
||||||
|
const char * envHadoopConfDir = getenv("HADOOP_CONF_DIR");
|
||||||
|
if (envHadoopConfDir && (*envHadoopConfDir != 0) ) {
|
||||||
|
config = loader.OverlayResourceFile(*config, string(envHadoopConfDir) + "/core-site.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
Options options;
|
||||||
|
options.rpc_timeout = numeric_limits<int>::max();
|
||||||
|
if(config){
|
||||||
|
options = config->GetOptions();
|
||||||
|
}
|
||||||
|
|
||||||
|
IoService * io_service = IoService::New();
|
||||||
|
|
||||||
|
FileSystem *fs_raw = FileSystem::New(io_service, "", options);
|
||||||
|
if (!fs_raw) {
|
||||||
|
cerr << "Could not create FileSystem object" << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
//Wrapping fs_raw into a unique pointer to guarantee deletion
|
||||||
|
shared_ptr<FileSystem> fs(fs_raw);
|
||||||
|
|
||||||
|
//Get port from the uri, otherwise use the default port
|
||||||
|
string port = to_string(uri->get_port().value_or(8020));
|
||||||
|
Status stat = fs->Connect(uri->get_host(), port);
|
||||||
|
if (!stat.ok()) {
|
||||||
|
cerr << "Could not connect to " << uri->get_host() << ":" << port << endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We do not want the recursion to block on anything, therefore we will be
|
||||||
|
* making asynchronous calls recursively, and then just waiting for all
|
||||||
|
* the calls to finish.
|
||||||
|
*
|
||||||
|
* This array of futures will be populated by the recursive function below.
|
||||||
|
* Each new asynchronous Mkdirs call will add a future to this vector, and will
|
||||||
|
* create a promise, which will only be set when the call was completed and
|
||||||
|
* processed. After the whole recursion is complete we will need to wait until
|
||||||
|
* all promises are set before we can exit.
|
||||||
|
**/
|
||||||
|
vector<future<Status>> futures;
|
||||||
|
|
||||||
|
GenerateDirectories(fs, depth, 0, fanout, uri->get_path() + "/", futures);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We are waiting here until all promises are set, and checking whether
|
||||||
|
* the returned statuses contained any errors.
|
||||||
|
**/
|
||||||
|
for(future<Status> &fs : futures){
|
||||||
|
Status stat = fs.get();
|
||||||
|
if (!stat.ok()) {
|
||||||
|
cerr << "Error: " << stat.ToString() << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "All done!" << endl;
|
||||||
|
|
||||||
|
// Clean up static data and prevent valgrind memory leaks
|
||||||
|
google::protobuf::ShutdownProtobufLibrary();
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue