HDFS-16470. Make HDFS find tool cross platform (#4076)

* The source files for hdfs_find uses
   getopt for parsing the command
   line arguments. getopt is available
   only on Linux and thus, isn't cross
   platform.
* Thus, we need to replace getopt
   with boost::program_options to
   make hdfs_find cross platform.
This commit is contained in:
Gautham B A 2022-03-18 21:41:01 +05:30 committed by GitHub
parent da9970dd69
commit a631f45a99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 590 additions and 152 deletions

View File

@ -36,6 +36,7 @@ add_executable(hdfs_tool_tests
hdfs-mkdir-mock.cc hdfs-mkdir-mock.cc
hdfs-rm-mock.cc hdfs-rm-mock.cc
hdfs-get-mock.cc hdfs-get-mock.cc
hdfs-find-mock.cc
main.cc) main.cc)
target_include_directories(hdfs_tool_tests PRIVATE target_include_directories(hdfs_tool_tests PRIVATE
../tools ../tools
@ -56,6 +57,7 @@ target_include_directories(hdfs_tool_tests PRIVATE
../../tools/hdfs-mkdir ../../tools/hdfs-mkdir
../../tools/hdfs-rm ../../tools/hdfs-rm
../../tools/hdfs-get ../../tools/hdfs-get
../../tools/hdfs-find
../../tools/hdfs-cat) ../../tools/hdfs-cat)
target_link_libraries(hdfs_tool_tests PRIVATE target_link_libraries(hdfs_tool_tests PRIVATE
gmock_main gmock_main
@ -75,5 +77,6 @@ target_link_libraries(hdfs_tool_tests PRIVATE
hdfs_mkdir_lib hdfs_mkdir_lib
hdfs_rm_lib hdfs_rm_lib
hdfs_get_lib hdfs_get_lib
hdfs_find_lib
hdfs_cat_lib) hdfs_cat_lib)
add_test(hdfs_tool_tests hdfs_tool_tests) add_test(hdfs_tool_tests hdfs_tool_tests)

View File

@ -48,9 +48,11 @@ void CreateSnapshotMock::SetExpectations(
} }
if (*test_case_func == &PassNOptAndAPath<CreateSnapshotMock>) { if (*test_case_func == &PassNOptAndAPath<CreateSnapshotMock>) {
const auto arg1 = args[1]; const auto opt_n = args[0];
const auto arg2 = std::optional{args[0]}; const auto path = args[2];
EXPECT_CALL(*this, HandleSnapshot(arg1, arg2)) const auto opt_n_value = std::optional{args[1]};
ASSERT_EQ(opt_n, "-n");
EXPECT_CALL(*this, HandleSnapshot(path, opt_n_value))
.Times(1) .Times(1)
.WillOnce(testing::Return(true)); .WillOnce(testing::Return(true));
} }

View File

@ -0,0 +1,93 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "hdfs-find-mock.h"
#include "hdfs-tool-tests.h"
#include "hdfspp/hdfspp.h"
namespace hdfs::tools::test {
FindMock::~FindMock() = default;
void FindMock::SetExpectations(
std::function<std::unique_ptr<FindMock>()> test_case,
const std::vector<std::string> &args) const {
// Get the pointer to the function that defines the test case
const auto test_case_func =
test_case.target<std::unique_ptr<FindMock> (*)()>();
ASSERT_NE(test_case_func, nullptr);
// Set the expected method calls and their corresponding arguments for each
// test case
if (*test_case_func == &CallHelp<FindMock>) {
EXPECT_CALL(*this, HandleHelp()).Times(1).WillOnce(testing::Return(true));
return;
}
if (*test_case_func == &PassAPath<FindMock>) {
const auto arg1 = args[0];
EXPECT_CALL(*this, HandlePath(arg1, "*",
hdfs::FileSystem::GetDefaultFindMaxDepth()))
.Times(1)
.WillOnce(testing::Return(true));
}
if (*test_case_func == &PassNOptAndAPath<FindMock>) {
const auto arg1 = args[0];
const auto arg2 = args[1];
const auto arg3 = args[2];
ASSERT_EQ(arg1, "-n");
EXPECT_CALL(*this, HandlePath(arg3, arg2,
hdfs::FileSystem::GetDefaultFindMaxDepth()))
.Times(1)
.WillOnce(testing::Return(true));
}
if (*test_case_func == &PassMOptPermissionsAndAPath<FindMock>) {
const auto arg1 = args[0];
const auto arg2 = args[1];
const auto arg3 = args[2];
ASSERT_EQ(arg1, "-m");
EXPECT_CALL(*this,
HandlePath(arg3, "*", static_cast<uint32_t>(std::stoi(arg2))))
.Times(1)
.WillOnce(testing::Return(true));
}
if (*test_case_func == &PassNStrMNumAndAPath<FindMock>) {
const auto arg1 = args[0];
const auto arg2 = args[1];
const auto arg3 = args[2];
const auto arg4 = args[3];
const auto arg5 = args[4];
ASSERT_EQ(arg1, "-n");
ASSERT_EQ(arg3, "-m");
EXPECT_CALL(*this,
HandlePath(arg5, arg2, static_cast<uint32_t>(std::stoi(arg4))))
.Times(1)
.WillOnce(testing::Return(true));
}
}
} // namespace hdfs::tools::test

View File

@ -0,0 +1,69 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIBHDFSPP_TOOLS_HDFS_FIND_MOCK
#define LIBHDFSPP_TOOLS_HDFS_FIND_MOCK
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include <gmock/gmock.h>
#include "hdfs-find.h"
namespace hdfs::tools::test {
/**
* {@class FindMock} is an {@class Find} whereby it mocks the
* HandleHelp and HandlePath methods for testing their functionality.
*/
class FindMock : public hdfs::tools::Find {
public:
/**
* {@inheritdoc}
*/
FindMock(const int argc, char **argv) : Find(argc, argv) {}
// Abiding to the Rule of 5
FindMock(const FindMock &) = delete;
FindMock(FindMock &&) = delete;
FindMock &operator=(const FindMock &) = delete;
FindMock &operator=(FindMock &&) = delete;
~FindMock() override;
/**
* Defines the methods and the corresponding arguments that are expected
* to be called on this instance of {@link HdfsTool} for the given test case.
*
* @param test_case An {@link std::function} object that points to the
* function defining the test case
* @param args The arguments that are passed to this test case
*/
void SetExpectations(std::function<std::unique_ptr<FindMock>()> test_case,
const std::vector<std::string> &args = {}) const;
MOCK_METHOD(bool, HandleHelp, (), (const, override));
MOCK_METHOD(bool, HandlePath,
(const std::string &, const std::string &, uint32_t),
(const, override));
};
} // namespace hdfs::tools::test
#endif

View File

@ -31,6 +31,7 @@
#include "hdfs-df-mock.h" #include "hdfs-df-mock.h"
#include "hdfs-disallow-snapshot-mock.h" #include "hdfs-disallow-snapshot-mock.h"
#include "hdfs-du-mock.h" #include "hdfs-du-mock.h"
#include "hdfs-find-mock.h"
#include "hdfs-get-mock.h" #include "hdfs-get-mock.h"
#include "hdfs-mkdir-mock.h" #include "hdfs-mkdir-mock.h"
#include "hdfs-move-to-local-mock.h" #include "hdfs-move-to-local-mock.h"
@ -140,6 +141,14 @@ INSTANTIATE_TEST_SUITE_P(
PassAPath<hdfs::tools::test::RmMock>, PassAPath<hdfs::tools::test::RmMock>,
PassRecursivePath<hdfs::tools::test::RmMock>)); PassRecursivePath<hdfs::tools::test::RmMock>));
INSTANTIATE_TEST_SUITE_P(
HdfsFind, HdfsToolBasicTest,
testing::Values(CallHelp<hdfs::tools::test::FindMock>,
PassAPath<hdfs::tools::test::FindMock>,
PassNStrMNumAndAPath<hdfs::tools::test::FindMock>,
PassMOptPermissionsAndAPath<hdfs::tools::test::FindMock>,
PassNOptAndAPath<hdfs::tools::test::FindMock>));
// Negative tests // Negative tests
INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(
HdfsAllowSnapshot, HdfsToolNegativeTestThrows, HdfsAllowSnapshot, HdfsToolNegativeTestThrows,
@ -210,6 +219,17 @@ INSTANTIATE_TEST_SUITE_P(
PassRecursiveOwnerAndAPath<hdfs::tools::test::RmMock>, PassRecursiveOwnerAndAPath<hdfs::tools::test::RmMock>,
PassMOpt<hdfs::tools::test::RmMock>)); PassMOpt<hdfs::tools::test::RmMock>));
INSTANTIATE_TEST_SUITE_P(
HdfsFind, HdfsToolNegativeTestThrows,
testing::Values(Pass2Paths<hdfs::tools::test::FindMock>,
Pass3Paths<hdfs::tools::test::FindMock>,
PassRecursiveOwnerAndAPath<hdfs::tools::test::FindMock>,
PassRecursive<hdfs::tools::test::FindMock>,
PassRecursivePath<hdfs::tools::test::FindMock>,
PassMPOptsPermissionsAndAPath<hdfs::tools::test::FindMock>,
PassMOpt<hdfs::tools::test::FindMock>,
PassNOpt<hdfs::tools::test::FindMock>));
INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(
HdfsRm, HdfsToolNegativeTestNoThrow, HdfsRm, HdfsToolNegativeTestNoThrow,
testing::Values(PassRecursive<hdfs::tools::test::RmMock>)); testing::Values(PassRecursive<hdfs::tools::test::RmMock>));

View File

@ -118,7 +118,7 @@ template <class T> std::unique_ptr<T> PassNOptAndAPath() {
static char *argv[] = {exe.data(), arg1.data(), arg2.data(), arg3.data()}; static char *argv[] = {exe.data(), arg1.data(), arg2.data(), arg3.data()};
auto hdfs_tool = std::make_unique<T>(argc, argv); auto hdfs_tool = std::make_unique<T>(argc, argv);
hdfs_tool->SetExpectations(PassNOptAndAPath<T>, {arg2, arg3}); hdfs_tool->SetExpectations(PassNOptAndAPath<T>, {arg1, arg2, arg3});
return hdfs_tool; return hdfs_tool;
} }
@ -271,4 +271,34 @@ template <class T> std::unique_ptr<T> PassMPOptsPermissionsAndAPath() {
return hdfs_tool; return hdfs_tool;
} }
template <class T> std::unique_ptr<T> PassNStrMNumAndAPath() {
constexpr auto argc = 6;
static std::string exe("hdfs_tool_name");
static std::string arg1("-n");
static std::string arg2("some_str");
static std::string arg3("-m");
static std::string arg4("757");
static std::string arg5("some/path");
static char *argv[] = {exe.data(), arg1.data(), arg2.data(),
arg3.data(), arg4.data(), arg5.data()};
auto hdfs_tool = std::make_unique<T>(argc, argv);
hdfs_tool->SetExpectations(PassNStrMNumAndAPath<T>,
{arg1, arg2, arg3, arg4, arg5});
return hdfs_tool;
}
template <class T> std::unique_ptr<T> PassNOpt() {
constexpr auto argc = 2;
static std::string exe("hdfs_tool_name");
static std::string arg1("-n");
static char *argv[] = {exe.data(), arg1.data()};
auto hdfs_tool = std::make_unique<T>(argc, argv);
hdfs_tool->SetExpectations(PassNOpt<T>, {arg1});
return hdfs_tool;
}
#endif #endif

View File

@ -41,8 +41,7 @@ add_subdirectory(hdfs-chown)
add_subdirectory(hdfs-chmod) add_subdirectory(hdfs-chmod)
add_executable(hdfs_find hdfs_find.cc) add_subdirectory(hdfs-find)
target_link_libraries(hdfs_find tools_common hdfspp_static)
add_subdirectory(hdfs-mkdir) add_subdirectory(hdfs-mkdir)

View File

@ -0,0 +1,27 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
add_library(hdfs_find_lib STATIC $<TARGET_OBJECTS:hdfs_tool_obj> hdfs-find.cc)
target_include_directories(hdfs_find_lib PRIVATE ../../tools ${Boost_INCLUDE_DIRS})
target_link_libraries(hdfs_find_lib PRIVATE Boost::boost Boost::program_options tools_common hdfspp_static)
add_executable(hdfs_find main.cc)
target_include_directories(hdfs_find PRIVATE ../../tools)
target_link_libraries(hdfs_find PRIVATE hdfs_find_lib)
install(TARGETS hdfs_find RUNTIME DESTINATION bin)

View File

@ -0,0 +1,193 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <future>
#include <iostream>
#include <memory>
#include <ostream>
#include <sstream>
#include <string>
#include "hdfs-find.h"
#include "tools_common.h"
namespace hdfs::tools {
Find::Find(const int argc, char **argv) : HdfsTool(argc, argv) {}
bool Find::Initialize() {
auto add_options = opt_desc_.add_options();
add_options(
"help,h",
"Finds all files recursively starting from the specified PATH and prints "
"their file paths. This hdfs_find tool mimics the POSIX find.");
add_options(
"name,n", po::value<std::string>(),
"If provided, all results will be matching the NAME pattern otherwise, "
"the implicit '*' will be used NAME allows wild-cards");
add_options(
"max-depth,m", po::value<u_int32_t>(),
"If provided, the maximum depth to recurse after the end of the path is "
"reached will be limited by MAX_DEPTH otherwise, the maximum depth to "
"recurse is unbound MAX_DEPTH can be set to 0 for pure globbing and "
"ignoring the NAME option (no recursion after the end of the path)");
add_options("path", po::value<std::string>(),
"The path where we want to start the find operation");
// We allow only one positional argument to be passed to this tool. An
// exception is thrown if multiple arguments are passed.
pos_opt_desc_.add("path", 1);
po::store(po::command_line_parser(argc_, argv_)
.options(opt_desc_)
.positional(pos_opt_desc_)
.run(),
opt_val_);
po::notify(opt_val_);
return true;
}
std::string Find::GetDescription() const {
std::stringstream desc;
desc << "Usage: hdfs_find [OPTION] PATH" << std::endl
<< std::endl
<< "Finds all files recursively starting from the" << std::endl
<< "specified PATH and prints their file paths." << std::endl
<< "This hdfs_find tool mimics the POSIX find." << std::endl
<< std::endl
<< "Both PATH and NAME can have wild-cards." << std::endl
<< std::endl
<< " -n NAME if provided all results will be matching the NAME "
"pattern"
<< std::endl
<< " otherwise, the implicit '*' will be used"
<< std::endl
<< " NAME allows wild-cards" << std::endl
<< std::endl
<< " -m MAX_DEPTH if provided the maximum depth to recurse after the "
"end of"
<< std::endl
<< " the path is reached will be limited by MAX_DEPTH"
<< std::endl
<< " otherwise, the maximum depth to recurse is unbound"
<< std::endl
<< " MAX_DEPTH can be set to 0 for pure globbing and "
"ignoring"
<< std::endl
<< " the NAME option (no recursion after the end of the "
"path)"
<< std::endl
<< std::endl
<< " -h display this help and exit" << std::endl
<< std::endl
<< "Examples:" << std::endl
<< "hdfs_find hdfs://localhost.localdomain:8020/dir?/tree* -n "
"some?file*name"
<< std::endl
<< "hdfs_find / -n file_name -m 3" << std::endl;
return desc.str();
}
bool Find::Do() {
if (!Initialize()) {
std::cerr << "Unable to initialize HDFS find tool" << std::endl;
return false;
}
if (!ValidateConstraints()) {
std::cout << GetDescription();
return false;
}
if (opt_val_.count("help") > 0) {
return HandleHelp();
}
if (opt_val_.count("path") > 0) {
const auto path = opt_val_["path"].as<std::string>();
const auto name =
opt_val_.count("name") > 0 ? opt_val_["name"].as<std::string>() : "*";
const auto max_depth = opt_val_.count("max-depth") <= 0
? hdfs::FileSystem::GetDefaultFindMaxDepth()
: opt_val_["max-depth"].as<uint32_t>();
return HandlePath(path, name, max_depth);
}
return false;
}
bool Find::HandleHelp() const {
std::cout << GetDescription();
return true;
}
bool Find::HandlePath(const std::string &path, const std::string &name,
const uint32_t max_depth) const {
// Building a URI object from the given path
auto uri = hdfs::parse_path_or_exit(path);
const auto fs = hdfs::doConnect(uri, true);
if (!fs) {
std::cerr << "Could not connect the file system." << std::endl;
return false;
}
const auto promise = std::make_shared<std::promise<void>>();
std::future<void> future(promise->get_future());
auto final_status = hdfs::Status::OK();
/**
* Keep requesting more until we get the entire listing. Set the promise
* when we have the entire listing to stop.
*
* Find guarantees that the handler will only be called once at a time,
* so we do not need any locking here. It also guarantees that the handler
* will be only called once with has_more_results set to false.
*/
auto handler = [promise,
&final_status](const hdfs::Status &status,
const std::vector<hdfs::StatInfo> &stat_info,
const bool has_more_results) -> bool {
// Print result chunks as they arrive
if (!stat_info.empty()) {
for (hdfs::StatInfo const &info : stat_info) {
std::cout << info.str() << std::endl;
}
}
if (!status.ok() && final_status.ok()) {
// We make sure we set 'status' only on the first error
final_status = status;
}
if (!has_more_results) {
promise->set_value(); // Set promise
return false; // Request stop sending results
}
return true; // request more results
};
// Asynchronous call to Find
fs->Find(uri.get_path(), name, max_depth, handler);
// Block until promise is set
future.get();
if (!final_status.ok()) {
std::cerr << "Error: " << final_status.ToString() << std::endl;
return false;
}
return true;
}
} // namespace hdfs::tools

View File

@ -0,0 +1,96 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIBHDFSPP_TOOLS_HDFS_FIND
#define LIBHDFSPP_TOOLS_HDFS_FIND
#include <string>
#include <boost/program_options.hpp>
#include "hdfs-tool.h"
namespace hdfs::tools {
/**
* {@class Find} is an {@class HdfsTool} finds all files recursively starting
* from the specified PATH and prints their file paths. This tool mimics the
* POSIX find.
*/
class Find : public HdfsTool {
public:
/**
* {@inheritdoc}
*/
Find(int argc, char **argv);
// Abiding to the Rule of 5
Find(const Find &) = default;
Find(Find &&) = default;
Find &operator=(const Find &) = delete;
Find &operator=(Find &&) = delete;
~Find() override = default;
/**
* {@inheritdoc}
*/
[[nodiscard]] std::string GetDescription() const override;
/**
* {@inheritdoc}
*/
[[nodiscard]] bool Do() override;
protected:
/**
* {@inheritdoc}
*/
[[nodiscard]] bool Initialize() override;
/**
* {@inheritdoc}
*/
[[nodiscard]] bool ValidateConstraints() const override { return argc_ > 1; }
/**
* {@inheritdoc}
*/
[[nodiscard]] bool HandleHelp() const override;
/**
* Handle the path argument that's passed to this tool.
*
* @param path The path to the directory to begin the find.
* @param name The pattern name of the search term.
* @param max_depth The maximum depth of the traversal while searching through
* the folders.
*
* @return A boolean indicating the result of this operation.
*/
[[nodiscard]] virtual bool HandlePath(const std::string &path,
const std::string &name,
uint32_t max_depth) const;
private:
/**
* A boost data-structure containing the description of positional arguments
* passed to the command-line.
*/
po::positional_options_description pos_opt_desc_;
};
} // namespace hdfs::tools
#endif

View File

@ -0,0 +1,52 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstdlib>
#include <exception>
#include <iostream>
#include <google/protobuf/stubs/common.h>
#include "hdfs-find.h"
int main(int argc, char *argv[]) {
const auto result = std::atexit([]() -> void {
// Clean up static data on exit and prevent valgrind memory leaks
google::protobuf::ShutdownProtobufLibrary();
});
if (result != 0) {
std::cerr
<< "Error: Unable to schedule clean-up tasks for HDFS find tool, exiting"
<< std::endl;
std::exit(EXIT_FAILURE);
}
hdfs::tools::Find find(argc, argv);
auto success = false;
try {
success = find.Do();
} catch (const std::exception &e) {
std::cerr << "Error: " << e.what() << std::endl;
}
if (!success) {
std::exit(EXIT_FAILURE);
}
return 0;
}

View File

@ -1,146 +0,0 @@
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/
#include <google/protobuf/stubs/common.h>
#include <unistd.h>
#include <future>
#include "tools_common.h"
void usage(){
std::cout << "Usage: hdfs_find [OPTION] PATH"
<< std::endl
<< std::endl << "Finds all files recursively starting from the"
<< std::endl << "specified PATH and prints their file paths."
<< std::endl << "This hdfs_find tool mimics the POSIX find."
<< std::endl
<< std::endl << "Both PATH and NAME can have wild-cards."
<< std::endl
<< std::endl << " -n NAME if provided all results will be matching the NAME pattern"
<< std::endl << " otherwise, the implicit '*' will be used"
<< std::endl << " NAME allows wild-cards"
<< std::endl
<< std::endl << " -m MAX_DEPTH if provided the maximum depth to recurse after the end of"
<< std::endl << " the path is reached will be limited by MAX_DEPTH"
<< std::endl << " otherwise, the maximum depth to recurse is unbound"
<< std::endl << " MAX_DEPTH can be set to 0 for pure globbing and ignoring"
<< std::endl << " the NAME option (no recursion after the end of the path)"
<< std::endl
<< std::endl << " -h display this help and exit"
<< std::endl
<< std::endl << "Examples:"
<< std::endl << "hdfs_find hdfs://localhost.localdomain:8020/dir?/tree* -n some?file*name"
<< std::endl << "hdfs_find / -n file_name -m 3"
<< std::endl;
}
int main(int argc, char *argv[]) {
//We should have at least 2 arguments
if (argc < 2) {
usage();
exit(EXIT_FAILURE);
}
int input;
//If NAME is not specified we use implicit "*"
std::string name = "*";
//If MAX_DEPTH is not specified we use the max value of uint_32_t
uint32_t max_depth = hdfs::FileSystem::GetDefaultFindMaxDepth();
//Using GetOpt to read in the values
opterr = 0;
while ((input = getopt(argc, argv, "hn:m:")) != -1) {
switch (input)
{
case 'h':
usage();
exit(EXIT_SUCCESS);
case 'n':
name = optarg;
break;
case 'm':
max_depth = std::stoi(optarg);
break;
case '?':
if (optopt == 'n' || optopt == 'm')
std::cerr << "Option -" << (char) optopt << " requires an argument." << std::endl;
else if (isprint(optopt))
std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl;
else
std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl;
usage();
exit(EXIT_FAILURE);
default:
exit(EXIT_FAILURE);
}
}
std::string uri_path = argv[optind];
//Building a URI object from the given uri_path
hdfs::URI uri = hdfs::parse_path_or_exit(uri_path);
std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
if (!fs) {
std::cerr << "Could not connect the file system. " << std::endl;
exit(EXIT_FAILURE);
}
std::shared_ptr<std::promise<void>> promise = std::make_shared<std::promise<void>>();
std::future<void> future(promise->get_future());
hdfs::Status status = hdfs::Status::OK();
/**
* Keep requesting more until we get the entire listing. Set the promise
* when we have the entire listing to stop.
*
* Find guarantees that the handler will only be called once at a time,
* so we do not need any locking here. It also guarantees that the handler will be
* only called once with has_more_results set to false.
*/
auto handler = [promise, &status]
(const hdfs::Status &s, const std::vector<hdfs::StatInfo> & si, bool has_more_results) -> bool {
//Print result chunks as they arrive
if(!si.empty()) {
for (hdfs::StatInfo const& s : si) {
std::cout << s.str() << std::endl;
}
}
if(!s.ok() && status.ok()){
//We make sure we set 'status' only on the first error.
status = s;
}
if (!has_more_results) {
promise->set_value(); //set promise
return false; //request stop sending results
}
return true; //request more results
};
//Asynchronous call to Find
fs->Find(uri.get_path(), name, max_depth, handler);
//block until promise is set
future.get();
if(!status.ok()) {
std::cerr << "Error: " << status.ToString() << std::endl;
}
// Clean up static data and prevent valgrind memory leaks
google::protobuf::ShutdownProtobufLibrary();
return 0;
}