From a631f45a99c7abf8c9a2dcfb10afb668c8ff6b09 Mon Sep 17 00:00:00 2001 From: Gautham B A Date: Fri, 18 Mar 2022 21:41:01 +0530 Subject: [PATCH] HDFS-16470. Make HDFS find tool cross platform (#4076) * The source files for hdfs_find uses getopt for parsing the command line arguments. getopt is available only on Linux and thus, isn't cross platform. * Thus, we need to replace getopt with boost::program_options to make hdfs_find cross platform. --- .../libhdfspp/tests/tools/CMakeLists.txt | 3 + .../tests/tools/hdfs-create-snapshot-mock.cc | 8 +- .../libhdfspp/tests/tools/hdfs-find-mock.cc | 93 +++++++++ .../libhdfspp/tests/tools/hdfs-find-mock.h | 69 +++++++ .../libhdfspp/tests/tools/hdfs-tool-tests.cc | 20 ++ .../libhdfspp/tests/tools/hdfs-tool-tests.h | 32 ++- .../native/libhdfspp/tools/CMakeLists.txt | 3 +- .../libhdfspp/tools/hdfs-find/CMakeLists.txt | 27 +++ .../libhdfspp/tools/hdfs-find/hdfs-find.cc | 193 ++++++++++++++++++ .../libhdfspp/tools/hdfs-find/hdfs-find.h | 96 +++++++++ .../native/libhdfspp/tools/hdfs-find/main.cc | 52 +++++ .../main/native/libhdfspp/tools/hdfs_find.cc | 146 ------------- 12 files changed, 590 insertions(+), 152 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.cc create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.h create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/CMakeLists.txt create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.cc create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.h create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/main.cc delete mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cc diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt index 769e5da0f1c..22d677f0a29 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt @@ -36,6 +36,7 @@ add_executable(hdfs_tool_tests hdfs-mkdir-mock.cc hdfs-rm-mock.cc hdfs-get-mock.cc + hdfs-find-mock.cc main.cc) target_include_directories(hdfs_tool_tests PRIVATE ../tools @@ -56,6 +57,7 @@ target_include_directories(hdfs_tool_tests PRIVATE ../../tools/hdfs-mkdir ../../tools/hdfs-rm ../../tools/hdfs-get + ../../tools/hdfs-find ../../tools/hdfs-cat) target_link_libraries(hdfs_tool_tests PRIVATE gmock_main @@ -75,5 +77,6 @@ target_link_libraries(hdfs_tool_tests PRIVATE hdfs_mkdir_lib hdfs_rm_lib hdfs_get_lib + hdfs_find_lib hdfs_cat_lib) add_test(hdfs_tool_tests hdfs_tool_tests) diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-create-snapshot-mock.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-create-snapshot-mock.cc index 323963181fc..6a4f96a83fd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-create-snapshot-mock.cc +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-create-snapshot-mock.cc @@ -48,9 +48,11 @@ void CreateSnapshotMock::SetExpectations( } if (*test_case_func == &PassNOptAndAPath) { - const auto arg1 = args[1]; - const auto arg2 = std::optional{args[0]}; - EXPECT_CALL(*this, HandleSnapshot(arg1, arg2)) + const auto opt_n = args[0]; + const auto path = args[2]; + const auto opt_n_value = std::optional{args[1]}; + ASSERT_EQ(opt_n, "-n"); + EXPECT_CALL(*this, HandleSnapshot(path, opt_n_value)) .Times(1) .WillOnce(testing::Return(true)); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.cc new file mode 100644 index 00000000000..9fd57ec270c --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.cc @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include "hdfs-find-mock.h" +#include "hdfs-tool-tests.h" +#include "hdfspp/hdfspp.h" + +namespace hdfs::tools::test { +FindMock::~FindMock() = default; + +void FindMock::SetExpectations( + std::function()> test_case, + const std::vector &args) const { + // Get the pointer to the function that defines the test case + const auto test_case_func = + test_case.target (*)()>(); + ASSERT_NE(test_case_func, nullptr); + + // Set the expected method calls and their corresponding arguments for each + // test case + if (*test_case_func == &CallHelp) { + EXPECT_CALL(*this, HandleHelp()).Times(1).WillOnce(testing::Return(true)); + return; + } + + if (*test_case_func == &PassAPath) { + const auto arg1 = args[0]; + EXPECT_CALL(*this, HandlePath(arg1, "*", + hdfs::FileSystem::GetDefaultFindMaxDepth())) + .Times(1) + .WillOnce(testing::Return(true)); + } + + if (*test_case_func == &PassNOptAndAPath) { + const auto arg1 = args[0]; + const auto arg2 = args[1]; + const auto arg3 = args[2]; + ASSERT_EQ(arg1, "-n"); + EXPECT_CALL(*this, HandlePath(arg3, arg2, + hdfs::FileSystem::GetDefaultFindMaxDepth())) + .Times(1) + .WillOnce(testing::Return(true)); + } + + if (*test_case_func == &PassMOptPermissionsAndAPath) { + const auto arg1 = args[0]; + const auto arg2 = args[1]; + const auto arg3 = args[2]; + ASSERT_EQ(arg1, "-m"); + EXPECT_CALL(*this, + HandlePath(arg3, "*", static_cast(std::stoi(arg2)))) + .Times(1) + .WillOnce(testing::Return(true)); + } + + if (*test_case_func == &PassNStrMNumAndAPath) { + const auto arg1 = args[0]; + const auto arg2 = args[1]; + const auto arg3 = args[2]; + const auto arg4 = args[3]; + const auto arg5 = args[4]; + ASSERT_EQ(arg1, "-n"); + ASSERT_EQ(arg3, "-m"); + EXPECT_CALL(*this, + HandlePath(arg5, arg2, static_cast(std::stoi(arg4)))) + .Times(1) + .WillOnce(testing::Return(true)); + } +} +} // namespace hdfs::tools::test diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.h new file mode 100644 index 00000000000..7520ac7c0da --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-find-mock.h @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LIBHDFSPP_TOOLS_HDFS_FIND_MOCK +#define LIBHDFSPP_TOOLS_HDFS_FIND_MOCK + +#include +#include +#include +#include + +#include + +#include "hdfs-find.h" + +namespace hdfs::tools::test { +/** + * {@class FindMock} is an {@class Find} whereby it mocks the + * HandleHelp and HandlePath methods for testing their functionality. + */ +class FindMock : public hdfs::tools::Find { +public: + /** + * {@inheritdoc} + */ + FindMock(const int argc, char **argv) : Find(argc, argv) {} + + // Abiding to the Rule of 5 + FindMock(const FindMock &) = delete; + FindMock(FindMock &&) = delete; + FindMock &operator=(const FindMock &) = delete; + FindMock &operator=(FindMock &&) = delete; + ~FindMock() override; + + /** + * Defines the methods and the corresponding arguments that are expected + * to be called on this instance of {@link HdfsTool} for the given test case. + * + * @param test_case An {@link std::function} object that points to the + * function defining the test case + * @param args The arguments that are passed to this test case + */ + void SetExpectations(std::function()> test_case, + const std::vector &args = {}) const; + + MOCK_METHOD(bool, HandleHelp, (), (const, override)); + + MOCK_METHOD(bool, HandlePath, + (const std::string &, const std::string &, uint32_t), + (const, override)); +}; +} // namespace hdfs::tools::test + +#endif diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc index 50d555aebda..53df82021bd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.cc @@ -31,6 +31,7 @@ #include "hdfs-df-mock.h" #include "hdfs-disallow-snapshot-mock.h" #include "hdfs-du-mock.h" +#include "hdfs-find-mock.h" #include "hdfs-get-mock.h" #include "hdfs-mkdir-mock.h" #include "hdfs-move-to-local-mock.h" @@ -140,6 +141,14 @@ INSTANTIATE_TEST_SUITE_P( PassAPath, PassRecursivePath)); +INSTANTIATE_TEST_SUITE_P( + HdfsFind, HdfsToolBasicTest, + testing::Values(CallHelp, + PassAPath, + PassNStrMNumAndAPath, + PassMOptPermissionsAndAPath, + PassNOptAndAPath)); + // Negative tests INSTANTIATE_TEST_SUITE_P( HdfsAllowSnapshot, HdfsToolNegativeTestThrows, @@ -210,6 +219,17 @@ INSTANTIATE_TEST_SUITE_P( PassRecursiveOwnerAndAPath, PassMOpt)); +INSTANTIATE_TEST_SUITE_P( + HdfsFind, HdfsToolNegativeTestThrows, + testing::Values(Pass2Paths, + Pass3Paths, + PassRecursiveOwnerAndAPath, + PassRecursive, + PassRecursivePath, + PassMPOptsPermissionsAndAPath, + PassMOpt, + PassNOpt)); + INSTANTIATE_TEST_SUITE_P( HdfsRm, HdfsToolNegativeTestNoThrow, testing::Values(PassRecursive)); diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.h index 12dbc6c01ce..f27a2b022da 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-tests.h @@ -118,7 +118,7 @@ template std::unique_ptr PassNOptAndAPath() { static char *argv[] = {exe.data(), arg1.data(), arg2.data(), arg3.data()}; auto hdfs_tool = std::make_unique(argc, argv); - hdfs_tool->SetExpectations(PassNOptAndAPath, {arg2, arg3}); + hdfs_tool->SetExpectations(PassNOptAndAPath, {arg1, arg2, arg3}); return hdfs_tool; } @@ -271,4 +271,34 @@ template std::unique_ptr PassMPOptsPermissionsAndAPath() { return hdfs_tool; } +template std::unique_ptr PassNStrMNumAndAPath() { + constexpr auto argc = 6; + static std::string exe("hdfs_tool_name"); + static std::string arg1("-n"); + static std::string arg2("some_str"); + static std::string arg3("-m"); + static std::string arg4("757"); + static std::string arg5("some/path"); + + static char *argv[] = {exe.data(), arg1.data(), arg2.data(), + arg3.data(), arg4.data(), arg5.data()}; + + auto hdfs_tool = std::make_unique(argc, argv); + hdfs_tool->SetExpectations(PassNStrMNumAndAPath, + {arg1, arg2, arg3, arg4, arg5}); + return hdfs_tool; +} + +template std::unique_ptr PassNOpt() { + constexpr auto argc = 2; + static std::string exe("hdfs_tool_name"); + static std::string arg1("-n"); + + static char *argv[] = {exe.data(), arg1.data()}; + + auto hdfs_tool = std::make_unique(argc, argv); + hdfs_tool->SetExpectations(PassNOpt, {arg1}); + return hdfs_tool; +} + #endif diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt index bed78b7a577..214d7b56dcb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt @@ -41,8 +41,7 @@ add_subdirectory(hdfs-chown) add_subdirectory(hdfs-chmod) -add_executable(hdfs_find hdfs_find.cc) -target_link_libraries(hdfs_find tools_common hdfspp_static) +add_subdirectory(hdfs-find) add_subdirectory(hdfs-mkdir) diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/CMakeLists.txt new file mode 100644 index 00000000000..c6ce0213287 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/CMakeLists.txt @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +add_library(hdfs_find_lib STATIC $ hdfs-find.cc) +target_include_directories(hdfs_find_lib PRIVATE ../../tools ${Boost_INCLUDE_DIRS}) +target_link_libraries(hdfs_find_lib PRIVATE Boost::boost Boost::program_options tools_common hdfspp_static) + +add_executable(hdfs_find main.cc) +target_include_directories(hdfs_find PRIVATE ../../tools) +target_link_libraries(hdfs_find PRIVATE hdfs_find_lib) + +install(TARGETS hdfs_find RUNTIME DESTINATION bin) diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.cc new file mode 100644 index 00000000000..a149d05c139 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.cc @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include "hdfs-find.h" +#include "tools_common.h" + +namespace hdfs::tools { +Find::Find(const int argc, char **argv) : HdfsTool(argc, argv) {} + +bool Find::Initialize() { + auto add_options = opt_desc_.add_options(); + add_options( + "help,h", + "Finds all files recursively starting from the specified PATH and prints " + "their file paths. This hdfs_find tool mimics the POSIX find."); + add_options( + "name,n", po::value(), + "If provided, all results will be matching the NAME pattern otherwise, " + "the implicit '*' will be used NAME allows wild-cards"); + add_options( + "max-depth,m", po::value(), + "If provided, the maximum depth to recurse after the end of the path is " + "reached will be limited by MAX_DEPTH otherwise, the maximum depth to " + "recurse is unbound MAX_DEPTH can be set to 0 for pure globbing and " + "ignoring the NAME option (no recursion after the end of the path)"); + add_options("path", po::value(), + "The path where we want to start the find operation"); + + // We allow only one positional argument to be passed to this tool. An + // exception is thrown if multiple arguments are passed. + pos_opt_desc_.add("path", 1); + + po::store(po::command_line_parser(argc_, argv_) + .options(opt_desc_) + .positional(pos_opt_desc_) + .run(), + opt_val_); + po::notify(opt_val_); + return true; +} + +std::string Find::GetDescription() const { + std::stringstream desc; + desc << "Usage: hdfs_find [OPTION] PATH" << std::endl + << std::endl + << "Finds all files recursively starting from the" << std::endl + << "specified PATH and prints their file paths." << std::endl + << "This hdfs_find tool mimics the POSIX find." << std::endl + << std::endl + << "Both PATH and NAME can have wild-cards." << std::endl + << std::endl + << " -n NAME if provided all results will be matching the NAME " + "pattern" + << std::endl + << " otherwise, the implicit '*' will be used" + << std::endl + << " NAME allows wild-cards" << std::endl + << std::endl + << " -m MAX_DEPTH if provided the maximum depth to recurse after the " + "end of" + << std::endl + << " the path is reached will be limited by MAX_DEPTH" + << std::endl + << " otherwise, the maximum depth to recurse is unbound" + << std::endl + << " MAX_DEPTH can be set to 0 for pure globbing and " + "ignoring" + << std::endl + << " the NAME option (no recursion after the end of the " + "path)" + << std::endl + << std::endl + << " -h display this help and exit" << std::endl + << std::endl + << "Examples:" << std::endl + << "hdfs_find hdfs://localhost.localdomain:8020/dir?/tree* -n " + "some?file*name" + << std::endl + << "hdfs_find / -n file_name -m 3" << std::endl; + return desc.str(); +} + +bool Find::Do() { + if (!Initialize()) { + std::cerr << "Unable to initialize HDFS find tool" << std::endl; + return false; + } + + if (!ValidateConstraints()) { + std::cout << GetDescription(); + return false; + } + + if (opt_val_.count("help") > 0) { + return HandleHelp(); + } + + if (opt_val_.count("path") > 0) { + const auto path = opt_val_["path"].as(); + const auto name = + opt_val_.count("name") > 0 ? opt_val_["name"].as() : "*"; + const auto max_depth = opt_val_.count("max-depth") <= 0 + ? hdfs::FileSystem::GetDefaultFindMaxDepth() + : opt_val_["max-depth"].as(); + return HandlePath(path, name, max_depth); + } + + return false; +} + +bool Find::HandleHelp() const { + std::cout << GetDescription(); + return true; +} + +bool Find::HandlePath(const std::string &path, const std::string &name, + const uint32_t max_depth) const { + // Building a URI object from the given path + auto uri = hdfs::parse_path_or_exit(path); + + const auto fs = hdfs::doConnect(uri, true); + if (!fs) { + std::cerr << "Could not connect the file system." << std::endl; + return false; + } + + const auto promise = std::make_shared>(); + std::future future(promise->get_future()); + auto final_status = hdfs::Status::OK(); + + /** + * Keep requesting more until we get the entire listing. Set the promise + * when we have the entire listing to stop. + * + * Find guarantees that the handler will only be called once at a time, + * so we do not need any locking here. It also guarantees that the handler + * will be only called once with has_more_results set to false. + */ + auto handler = [promise, + &final_status](const hdfs::Status &status, + const std::vector &stat_info, + const bool has_more_results) -> bool { + // Print result chunks as they arrive + if (!stat_info.empty()) { + for (hdfs::StatInfo const &info : stat_info) { + std::cout << info.str() << std::endl; + } + } + if (!status.ok() && final_status.ok()) { + // We make sure we set 'status' only on the first error + final_status = status; + } + if (!has_more_results) { + promise->set_value(); // Set promise + return false; // Request stop sending results + } + return true; // request more results + }; + + // Asynchronous call to Find + fs->Find(uri.get_path(), name, max_depth, handler); + + // Block until promise is set + future.get(); + if (!final_status.ok()) { + std::cerr << "Error: " << final_status.ToString() << std::endl; + return false; + } + return true; +} +} // namespace hdfs::tools diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.h new file mode 100644 index 00000000000..9adde3c622b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/hdfs-find.h @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LIBHDFSPP_TOOLS_HDFS_FIND +#define LIBHDFSPP_TOOLS_HDFS_FIND + +#include + +#include + +#include "hdfs-tool.h" + +namespace hdfs::tools { +/** + * {@class Find} is an {@class HdfsTool} finds all files recursively starting + * from the specified PATH and prints their file paths. This tool mimics the + * POSIX find. + */ +class Find : public HdfsTool { +public: + /** + * {@inheritdoc} + */ + Find(int argc, char **argv); + + // Abiding to the Rule of 5 + Find(const Find &) = default; + Find(Find &&) = default; + Find &operator=(const Find &) = delete; + Find &operator=(Find &&) = delete; + ~Find() override = default; + + /** + * {@inheritdoc} + */ + [[nodiscard]] std::string GetDescription() const override; + + /** + * {@inheritdoc} + */ + [[nodiscard]] bool Do() override; + +protected: + /** + * {@inheritdoc} + */ + [[nodiscard]] bool Initialize() override; + + /** + * {@inheritdoc} + */ + [[nodiscard]] bool ValidateConstraints() const override { return argc_ > 1; } + + /** + * {@inheritdoc} + */ + [[nodiscard]] bool HandleHelp() const override; + + /** + * Handle the path argument that's passed to this tool. + * + * @param path The path to the directory to begin the find. + * @param name The pattern name of the search term. + * @param max_depth The maximum depth of the traversal while searching through + * the folders. + * + * @return A boolean indicating the result of this operation. + */ + [[nodiscard]] virtual bool HandlePath(const std::string &path, + const std::string &name, + uint32_t max_depth) const; + +private: + /** + * A boost data-structure containing the description of positional arguments + * passed to the command-line. + */ + po::positional_options_description pos_opt_desc_; +}; +} // namespace hdfs::tools +#endif diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/main.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/main.cc new file mode 100644 index 00000000000..1f63aa7a2d8 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-find/main.cc @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include "hdfs-find.h" + +int main(int argc, char *argv[]) { + const auto result = std::atexit([]() -> void { + // Clean up static data on exit and prevent valgrind memory leaks + google::protobuf::ShutdownProtobufLibrary(); + }); + if (result != 0) { + std::cerr + << "Error: Unable to schedule clean-up tasks for HDFS find tool, exiting" + << std::endl; + std::exit(EXIT_FAILURE); + } + + hdfs::tools::Find find(argc, argv); + auto success = false; + + try { + success = find.Do(); + } catch (const std::exception &e) { + std::cerr << "Error: " << e.what() << std::endl; + } + + if (!success) { + std::exit(EXIT_FAILURE); + } + return 0; +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cc deleted file mode 100644 index 348f851ad38..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_find.cc +++ /dev/null @@ -1,146 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - -#include -#include -#include -#include "tools_common.h" - -void usage(){ - std::cout << "Usage: hdfs_find [OPTION] PATH" - << std::endl - << std::endl << "Finds all files recursively starting from the" - << std::endl << "specified PATH and prints their file paths." - << std::endl << "This hdfs_find tool mimics the POSIX find." - << std::endl - << std::endl << "Both PATH and NAME can have wild-cards." - << std::endl - << std::endl << " -n NAME if provided all results will be matching the NAME pattern" - << std::endl << " otherwise, the implicit '*' will be used" - << std::endl << " NAME allows wild-cards" - << std::endl - << std::endl << " -m MAX_DEPTH if provided the maximum depth to recurse after the end of" - << std::endl << " the path is reached will be limited by MAX_DEPTH" - << std::endl << " otherwise, the maximum depth to recurse is unbound" - << std::endl << " MAX_DEPTH can be set to 0 for pure globbing and ignoring" - << std::endl << " the NAME option (no recursion after the end of the path)" - << std::endl - << std::endl << " -h display this help and exit" - << std::endl - << std::endl << "Examples:" - << std::endl << "hdfs_find hdfs://localhost.localdomain:8020/dir?/tree* -n some?file*name" - << std::endl << "hdfs_find / -n file_name -m 3" - << std::endl; -} - -int main(int argc, char *argv[]) { - //We should have at least 2 arguments - if (argc < 2) { - usage(); - exit(EXIT_FAILURE); - } - - int input; - //If NAME is not specified we use implicit "*" - std::string name = "*"; - //If MAX_DEPTH is not specified we use the max value of uint_32_t - uint32_t max_depth = hdfs::FileSystem::GetDefaultFindMaxDepth(); - - //Using GetOpt to read in the values - opterr = 0; - while ((input = getopt(argc, argv, "hn:m:")) != -1) { - switch (input) - { - case 'h': - usage(); - exit(EXIT_SUCCESS); - case 'n': - name = optarg; - break; - case 'm': - max_depth = std::stoi(optarg); - break; - case '?': - if (optopt == 'n' || optopt == 'm') - std::cerr << "Option -" << (char) optopt << " requires an argument." << std::endl; - else if (isprint(optopt)) - std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl; - else - std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl; - usage(); - exit(EXIT_FAILURE); - default: - exit(EXIT_FAILURE); - } - } - std::string uri_path = argv[optind]; - - //Building a URI object from the given uri_path - hdfs::URI uri = hdfs::parse_path_or_exit(uri_path); - - std::shared_ptr fs = hdfs::doConnect(uri, true); - if (!fs) { - std::cerr << "Could not connect the file system. " << std::endl; - exit(EXIT_FAILURE); - } - - std::shared_ptr> promise = std::make_shared>(); - std::future future(promise->get_future()); - hdfs::Status status = hdfs::Status::OK(); - - /** - * Keep requesting more until we get the entire listing. Set the promise - * when we have the entire listing to stop. - * - * Find guarantees that the handler will only be called once at a time, - * so we do not need any locking here. It also guarantees that the handler will be - * only called once with has_more_results set to false. - */ - auto handler = [promise, &status] - (const hdfs::Status &s, const std::vector & si, bool has_more_results) -> bool { - //Print result chunks as they arrive - if(!si.empty()) { - for (hdfs::StatInfo const& s : si) { - std::cout << s.str() << std::endl; - } - } - if(!s.ok() && status.ok()){ - //We make sure we set 'status' only on the first error. - status = s; - } - if (!has_more_results) { - promise->set_value(); //set promise - return false; //request stop sending results - } - return true; //request more results - }; - - //Asynchronous call to Find - fs->Find(uri.get_path(), name, max_depth, handler); - - //block until promise is set - future.get(); - if(!status.ok()) { - std::cerr << "Error: " << status.ToString() << std::endl; - } - - // Clean up static data and prevent valgrind memory leaks - google::protobuf::ShutdownProtobufLibrary(); - return 0; -}