HDFS-16470. Make HDFS find tool cross platform (#4076)
* The source files for hdfs_find uses getopt for parsing the command line arguments. getopt is available only on Linux and thus, isn't cross platform. * Thus, we need to replace getopt with boost::program_options to make hdfs_find cross platform.
This commit is contained in:
parent
da9970dd69
commit
a631f45a99
|
@ -36,6 +36,7 @@ add_executable(hdfs_tool_tests
|
||||||
hdfs-mkdir-mock.cc
|
hdfs-mkdir-mock.cc
|
||||||
hdfs-rm-mock.cc
|
hdfs-rm-mock.cc
|
||||||
hdfs-get-mock.cc
|
hdfs-get-mock.cc
|
||||||
|
hdfs-find-mock.cc
|
||||||
main.cc)
|
main.cc)
|
||||||
target_include_directories(hdfs_tool_tests PRIVATE
|
target_include_directories(hdfs_tool_tests PRIVATE
|
||||||
../tools
|
../tools
|
||||||
|
@ -56,6 +57,7 @@ target_include_directories(hdfs_tool_tests PRIVATE
|
||||||
../../tools/hdfs-mkdir
|
../../tools/hdfs-mkdir
|
||||||
../../tools/hdfs-rm
|
../../tools/hdfs-rm
|
||||||
../../tools/hdfs-get
|
../../tools/hdfs-get
|
||||||
|
../../tools/hdfs-find
|
||||||
../../tools/hdfs-cat)
|
../../tools/hdfs-cat)
|
||||||
target_link_libraries(hdfs_tool_tests PRIVATE
|
target_link_libraries(hdfs_tool_tests PRIVATE
|
||||||
gmock_main
|
gmock_main
|
||||||
|
@ -75,5 +77,6 @@ target_link_libraries(hdfs_tool_tests PRIVATE
|
||||||
hdfs_mkdir_lib
|
hdfs_mkdir_lib
|
||||||
hdfs_rm_lib
|
hdfs_rm_lib
|
||||||
hdfs_get_lib
|
hdfs_get_lib
|
||||||
|
hdfs_find_lib
|
||||||
hdfs_cat_lib)
|
hdfs_cat_lib)
|
||||||
add_test(hdfs_tool_tests hdfs_tool_tests)
|
add_test(hdfs_tool_tests hdfs_tool_tests)
|
||||||
|
|
|
@ -48,9 +48,11 @@ void CreateSnapshotMock::SetExpectations(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*test_case_func == &PassNOptAndAPath<CreateSnapshotMock>) {
|
if (*test_case_func == &PassNOptAndAPath<CreateSnapshotMock>) {
|
||||||
const auto arg1 = args[1];
|
const auto opt_n = args[0];
|
||||||
const auto arg2 = std::optional{args[0]};
|
const auto path = args[2];
|
||||||
EXPECT_CALL(*this, HandleSnapshot(arg1, arg2))
|
const auto opt_n_value = std::optional{args[1]};
|
||||||
|
ASSERT_EQ(opt_n, "-n");
|
||||||
|
EXPECT_CALL(*this, HandleSnapshot(path, opt_n_value))
|
||||||
.Times(1)
|
.Times(1)
|
||||||
.WillOnce(testing::Return(true));
|
.WillOnce(testing::Return(true));
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <gmock/gmock.h>
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include "hdfs-find-mock.h"
|
||||||
|
#include "hdfs-tool-tests.h"
|
||||||
|
#include "hdfspp/hdfspp.h"
|
||||||
|
|
||||||
|
namespace hdfs::tools::test {
|
||||||
|
FindMock::~FindMock() = default;
|
||||||
|
|
||||||
|
void FindMock::SetExpectations(
|
||||||
|
std::function<std::unique_ptr<FindMock>()> test_case,
|
||||||
|
const std::vector<std::string> &args) const {
|
||||||
|
// Get the pointer to the function that defines the test case
|
||||||
|
const auto test_case_func =
|
||||||
|
test_case.target<std::unique_ptr<FindMock> (*)()>();
|
||||||
|
ASSERT_NE(test_case_func, nullptr);
|
||||||
|
|
||||||
|
// Set the expected method calls and their corresponding arguments for each
|
||||||
|
// test case
|
||||||
|
if (*test_case_func == &CallHelp<FindMock>) {
|
||||||
|
EXPECT_CALL(*this, HandleHelp()).Times(1).WillOnce(testing::Return(true));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*test_case_func == &PassAPath<FindMock>) {
|
||||||
|
const auto arg1 = args[0];
|
||||||
|
EXPECT_CALL(*this, HandlePath(arg1, "*",
|
||||||
|
hdfs::FileSystem::GetDefaultFindMaxDepth()))
|
||||||
|
.Times(1)
|
||||||
|
.WillOnce(testing::Return(true));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*test_case_func == &PassNOptAndAPath<FindMock>) {
|
||||||
|
const auto arg1 = args[0];
|
||||||
|
const auto arg2 = args[1];
|
||||||
|
const auto arg3 = args[2];
|
||||||
|
ASSERT_EQ(arg1, "-n");
|
||||||
|
EXPECT_CALL(*this, HandlePath(arg3, arg2,
|
||||||
|
hdfs::FileSystem::GetDefaultFindMaxDepth()))
|
||||||
|
.Times(1)
|
||||||
|
.WillOnce(testing::Return(true));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*test_case_func == &PassMOptPermissionsAndAPath<FindMock>) {
|
||||||
|
const auto arg1 = args[0];
|
||||||
|
const auto arg2 = args[1];
|
||||||
|
const auto arg3 = args[2];
|
||||||
|
ASSERT_EQ(arg1, "-m");
|
||||||
|
EXPECT_CALL(*this,
|
||||||
|
HandlePath(arg3, "*", static_cast<uint32_t>(std::stoi(arg2))))
|
||||||
|
.Times(1)
|
||||||
|
.WillOnce(testing::Return(true));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*test_case_func == &PassNStrMNumAndAPath<FindMock>) {
|
||||||
|
const auto arg1 = args[0];
|
||||||
|
const auto arg2 = args[1];
|
||||||
|
const auto arg3 = args[2];
|
||||||
|
const auto arg4 = args[3];
|
||||||
|
const auto arg5 = args[4];
|
||||||
|
ASSERT_EQ(arg1, "-n");
|
||||||
|
ASSERT_EQ(arg3, "-m");
|
||||||
|
EXPECT_CALL(*this,
|
||||||
|
HandlePath(arg5, arg2, static_cast<uint32_t>(std::stoi(arg4))))
|
||||||
|
.Times(1)
|
||||||
|
.WillOnce(testing::Return(true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace hdfs::tools::test
|
|
@ -0,0 +1,69 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIBHDFSPP_TOOLS_HDFS_FIND_MOCK
|
||||||
|
#define LIBHDFSPP_TOOLS_HDFS_FIND_MOCK
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <gmock/gmock.h>
|
||||||
|
|
||||||
|
#include "hdfs-find.h"
|
||||||
|
|
||||||
|
namespace hdfs::tools::test {
|
||||||
|
/**
|
||||||
|
* {@class FindMock} is an {@class Find} whereby it mocks the
|
||||||
|
* HandleHelp and HandlePath methods for testing their functionality.
|
||||||
|
*/
|
||||||
|
class FindMock : public hdfs::tools::Find {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* {@inheritdoc}
|
||||||
|
*/
|
||||||
|
FindMock(const int argc, char **argv) : Find(argc, argv) {}
|
||||||
|
|
||||||
|
// Abiding to the Rule of 5
|
||||||
|
FindMock(const FindMock &) = delete;
|
||||||
|
FindMock(FindMock &&) = delete;
|
||||||
|
FindMock &operator=(const FindMock &) = delete;
|
||||||
|
FindMock &operator=(FindMock &&) = delete;
|
||||||
|
~FindMock() override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defines the methods and the corresponding arguments that are expected
|
||||||
|
* to be called on this instance of {@link HdfsTool} for the given test case.
|
||||||
|
*
|
||||||
|
* @param test_case An {@link std::function} object that points to the
|
||||||
|
* function defining the test case
|
||||||
|
* @param args The arguments that are passed to this test case
|
||||||
|
*/
|
||||||
|
void SetExpectations(std::function<std::unique_ptr<FindMock>()> test_case,
|
||||||
|
const std::vector<std::string> &args = {}) const;
|
||||||
|
|
||||||
|
MOCK_METHOD(bool, HandleHelp, (), (const, override));
|
||||||
|
|
||||||
|
MOCK_METHOD(bool, HandlePath,
|
||||||
|
(const std::string &, const std::string &, uint32_t),
|
||||||
|
(const, override));
|
||||||
|
};
|
||||||
|
} // namespace hdfs::tools::test
|
||||||
|
|
||||||
|
#endif
|
|
@ -31,6 +31,7 @@
|
||||||
#include "hdfs-df-mock.h"
|
#include "hdfs-df-mock.h"
|
||||||
#include "hdfs-disallow-snapshot-mock.h"
|
#include "hdfs-disallow-snapshot-mock.h"
|
||||||
#include "hdfs-du-mock.h"
|
#include "hdfs-du-mock.h"
|
||||||
|
#include "hdfs-find-mock.h"
|
||||||
#include "hdfs-get-mock.h"
|
#include "hdfs-get-mock.h"
|
||||||
#include "hdfs-mkdir-mock.h"
|
#include "hdfs-mkdir-mock.h"
|
||||||
#include "hdfs-move-to-local-mock.h"
|
#include "hdfs-move-to-local-mock.h"
|
||||||
|
@ -140,6 +141,14 @@ INSTANTIATE_TEST_SUITE_P(
|
||||||
PassAPath<hdfs::tools::test::RmMock>,
|
PassAPath<hdfs::tools::test::RmMock>,
|
||||||
PassRecursivePath<hdfs::tools::test::RmMock>));
|
PassRecursivePath<hdfs::tools::test::RmMock>));
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
HdfsFind, HdfsToolBasicTest,
|
||||||
|
testing::Values(CallHelp<hdfs::tools::test::FindMock>,
|
||||||
|
PassAPath<hdfs::tools::test::FindMock>,
|
||||||
|
PassNStrMNumAndAPath<hdfs::tools::test::FindMock>,
|
||||||
|
PassMOptPermissionsAndAPath<hdfs::tools::test::FindMock>,
|
||||||
|
PassNOptAndAPath<hdfs::tools::test::FindMock>));
|
||||||
|
|
||||||
// Negative tests
|
// Negative tests
|
||||||
INSTANTIATE_TEST_SUITE_P(
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
HdfsAllowSnapshot, HdfsToolNegativeTestThrows,
|
HdfsAllowSnapshot, HdfsToolNegativeTestThrows,
|
||||||
|
@ -210,6 +219,17 @@ INSTANTIATE_TEST_SUITE_P(
|
||||||
PassRecursiveOwnerAndAPath<hdfs::tools::test::RmMock>,
|
PassRecursiveOwnerAndAPath<hdfs::tools::test::RmMock>,
|
||||||
PassMOpt<hdfs::tools::test::RmMock>));
|
PassMOpt<hdfs::tools::test::RmMock>));
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
HdfsFind, HdfsToolNegativeTestThrows,
|
||||||
|
testing::Values(Pass2Paths<hdfs::tools::test::FindMock>,
|
||||||
|
Pass3Paths<hdfs::tools::test::FindMock>,
|
||||||
|
PassRecursiveOwnerAndAPath<hdfs::tools::test::FindMock>,
|
||||||
|
PassRecursive<hdfs::tools::test::FindMock>,
|
||||||
|
PassRecursivePath<hdfs::tools::test::FindMock>,
|
||||||
|
PassMPOptsPermissionsAndAPath<hdfs::tools::test::FindMock>,
|
||||||
|
PassMOpt<hdfs::tools::test::FindMock>,
|
||||||
|
PassNOpt<hdfs::tools::test::FindMock>));
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
HdfsRm, HdfsToolNegativeTestNoThrow,
|
HdfsRm, HdfsToolNegativeTestNoThrow,
|
||||||
testing::Values(PassRecursive<hdfs::tools::test::RmMock>));
|
testing::Values(PassRecursive<hdfs::tools::test::RmMock>));
|
||||||
|
|
|
@ -118,7 +118,7 @@ template <class T> std::unique_ptr<T> PassNOptAndAPath() {
|
||||||
static char *argv[] = {exe.data(), arg1.data(), arg2.data(), arg3.data()};
|
static char *argv[] = {exe.data(), arg1.data(), arg2.data(), arg3.data()};
|
||||||
|
|
||||||
auto hdfs_tool = std::make_unique<T>(argc, argv);
|
auto hdfs_tool = std::make_unique<T>(argc, argv);
|
||||||
hdfs_tool->SetExpectations(PassNOptAndAPath<T>, {arg2, arg3});
|
hdfs_tool->SetExpectations(PassNOptAndAPath<T>, {arg1, arg2, arg3});
|
||||||
return hdfs_tool;
|
return hdfs_tool;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -271,4 +271,34 @@ template <class T> std::unique_ptr<T> PassMPOptsPermissionsAndAPath() {
|
||||||
return hdfs_tool;
|
return hdfs_tool;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class T> std::unique_ptr<T> PassNStrMNumAndAPath() {
|
||||||
|
constexpr auto argc = 6;
|
||||||
|
static std::string exe("hdfs_tool_name");
|
||||||
|
static std::string arg1("-n");
|
||||||
|
static std::string arg2("some_str");
|
||||||
|
static std::string arg3("-m");
|
||||||
|
static std::string arg4("757");
|
||||||
|
static std::string arg5("some/path");
|
||||||
|
|
||||||
|
static char *argv[] = {exe.data(), arg1.data(), arg2.data(),
|
||||||
|
arg3.data(), arg4.data(), arg5.data()};
|
||||||
|
|
||||||
|
auto hdfs_tool = std::make_unique<T>(argc, argv);
|
||||||
|
hdfs_tool->SetExpectations(PassNStrMNumAndAPath<T>,
|
||||||
|
{arg1, arg2, arg3, arg4, arg5});
|
||||||
|
return hdfs_tool;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T> std::unique_ptr<T> PassNOpt() {
|
||||||
|
constexpr auto argc = 2;
|
||||||
|
static std::string exe("hdfs_tool_name");
|
||||||
|
static std::string arg1("-n");
|
||||||
|
|
||||||
|
static char *argv[] = {exe.data(), arg1.data()};
|
||||||
|
|
||||||
|
auto hdfs_tool = std::make_unique<T>(argc, argv);
|
||||||
|
hdfs_tool->SetExpectations(PassNOpt<T>, {arg1});
|
||||||
|
return hdfs_tool;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -41,8 +41,7 @@ add_subdirectory(hdfs-chown)
|
||||||
|
|
||||||
add_subdirectory(hdfs-chmod)
|
add_subdirectory(hdfs-chmod)
|
||||||
|
|
||||||
add_executable(hdfs_find hdfs_find.cc)
|
add_subdirectory(hdfs-find)
|
||||||
target_link_libraries(hdfs_find tools_common hdfspp_static)
|
|
||||||
|
|
||||||
add_subdirectory(hdfs-mkdir)
|
add_subdirectory(hdfs-mkdir)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
#
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
add_library(hdfs_find_lib STATIC $<TARGET_OBJECTS:hdfs_tool_obj> hdfs-find.cc)
|
||||||
|
target_include_directories(hdfs_find_lib PRIVATE ../../tools ${Boost_INCLUDE_DIRS})
|
||||||
|
target_link_libraries(hdfs_find_lib PRIVATE Boost::boost Boost::program_options tools_common hdfspp_static)
|
||||||
|
|
||||||
|
add_executable(hdfs_find main.cc)
|
||||||
|
target_include_directories(hdfs_find PRIVATE ../../tools)
|
||||||
|
target_link_libraries(hdfs_find PRIVATE hdfs_find_lib)
|
||||||
|
|
||||||
|
install(TARGETS hdfs_find RUNTIME DESTINATION bin)
|
|
@ -0,0 +1,193 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <future>
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <ostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "hdfs-find.h"
|
||||||
|
#include "tools_common.h"
|
||||||
|
|
||||||
|
namespace hdfs::tools {
|
||||||
|
Find::Find(const int argc, char **argv) : HdfsTool(argc, argv) {}
|
||||||
|
|
||||||
|
bool Find::Initialize() {
|
||||||
|
auto add_options = opt_desc_.add_options();
|
||||||
|
add_options(
|
||||||
|
"help,h",
|
||||||
|
"Finds all files recursively starting from the specified PATH and prints "
|
||||||
|
"their file paths. This hdfs_find tool mimics the POSIX find.");
|
||||||
|
add_options(
|
||||||
|
"name,n", po::value<std::string>(),
|
||||||
|
"If provided, all results will be matching the NAME pattern otherwise, "
|
||||||
|
"the implicit '*' will be used NAME allows wild-cards");
|
||||||
|
add_options(
|
||||||
|
"max-depth,m", po::value<u_int32_t>(),
|
||||||
|
"If provided, the maximum depth to recurse after the end of the path is "
|
||||||
|
"reached will be limited by MAX_DEPTH otherwise, the maximum depth to "
|
||||||
|
"recurse is unbound MAX_DEPTH can be set to 0 for pure globbing and "
|
||||||
|
"ignoring the NAME option (no recursion after the end of the path)");
|
||||||
|
add_options("path", po::value<std::string>(),
|
||||||
|
"The path where we want to start the find operation");
|
||||||
|
|
||||||
|
// We allow only one positional argument to be passed to this tool. An
|
||||||
|
// exception is thrown if multiple arguments are passed.
|
||||||
|
pos_opt_desc_.add("path", 1);
|
||||||
|
|
||||||
|
po::store(po::command_line_parser(argc_, argv_)
|
||||||
|
.options(opt_desc_)
|
||||||
|
.positional(pos_opt_desc_)
|
||||||
|
.run(),
|
||||||
|
opt_val_);
|
||||||
|
po::notify(opt_val_);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Find::GetDescription() const {
|
||||||
|
std::stringstream desc;
|
||||||
|
desc << "Usage: hdfs_find [OPTION] PATH" << std::endl
|
||||||
|
<< std::endl
|
||||||
|
<< "Finds all files recursively starting from the" << std::endl
|
||||||
|
<< "specified PATH and prints their file paths." << std::endl
|
||||||
|
<< "This hdfs_find tool mimics the POSIX find." << std::endl
|
||||||
|
<< std::endl
|
||||||
|
<< "Both PATH and NAME can have wild-cards." << std::endl
|
||||||
|
<< std::endl
|
||||||
|
<< " -n NAME if provided all results will be matching the NAME "
|
||||||
|
"pattern"
|
||||||
|
<< std::endl
|
||||||
|
<< " otherwise, the implicit '*' will be used"
|
||||||
|
<< std::endl
|
||||||
|
<< " NAME allows wild-cards" << std::endl
|
||||||
|
<< std::endl
|
||||||
|
<< " -m MAX_DEPTH if provided the maximum depth to recurse after the "
|
||||||
|
"end of"
|
||||||
|
<< std::endl
|
||||||
|
<< " the path is reached will be limited by MAX_DEPTH"
|
||||||
|
<< std::endl
|
||||||
|
<< " otherwise, the maximum depth to recurse is unbound"
|
||||||
|
<< std::endl
|
||||||
|
<< " MAX_DEPTH can be set to 0 for pure globbing and "
|
||||||
|
"ignoring"
|
||||||
|
<< std::endl
|
||||||
|
<< " the NAME option (no recursion after the end of the "
|
||||||
|
"path)"
|
||||||
|
<< std::endl
|
||||||
|
<< std::endl
|
||||||
|
<< " -h display this help and exit" << std::endl
|
||||||
|
<< std::endl
|
||||||
|
<< "Examples:" << std::endl
|
||||||
|
<< "hdfs_find hdfs://localhost.localdomain:8020/dir?/tree* -n "
|
||||||
|
"some?file*name"
|
||||||
|
<< std::endl
|
||||||
|
<< "hdfs_find / -n file_name -m 3" << std::endl;
|
||||||
|
return desc.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Find::Do() {
|
||||||
|
if (!Initialize()) {
|
||||||
|
std::cerr << "Unable to initialize HDFS find tool" << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ValidateConstraints()) {
|
||||||
|
std::cout << GetDescription();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opt_val_.count("help") > 0) {
|
||||||
|
return HandleHelp();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opt_val_.count("path") > 0) {
|
||||||
|
const auto path = opt_val_["path"].as<std::string>();
|
||||||
|
const auto name =
|
||||||
|
opt_val_.count("name") > 0 ? opt_val_["name"].as<std::string>() : "*";
|
||||||
|
const auto max_depth = opt_val_.count("max-depth") <= 0
|
||||||
|
? hdfs::FileSystem::GetDefaultFindMaxDepth()
|
||||||
|
: opt_val_["max-depth"].as<uint32_t>();
|
||||||
|
return HandlePath(path, name, max_depth);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Find::HandleHelp() const {
|
||||||
|
std::cout << GetDescription();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Find::HandlePath(const std::string &path, const std::string &name,
|
||||||
|
const uint32_t max_depth) const {
|
||||||
|
// Building a URI object from the given path
|
||||||
|
auto uri = hdfs::parse_path_or_exit(path);
|
||||||
|
|
||||||
|
const auto fs = hdfs::doConnect(uri, true);
|
||||||
|
if (!fs) {
|
||||||
|
std::cerr << "Could not connect the file system." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto promise = std::make_shared<std::promise<void>>();
|
||||||
|
std::future<void> future(promise->get_future());
|
||||||
|
auto final_status = hdfs::Status::OK();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Keep requesting more until we get the entire listing. Set the promise
|
||||||
|
* when we have the entire listing to stop.
|
||||||
|
*
|
||||||
|
* Find guarantees that the handler will only be called once at a time,
|
||||||
|
* so we do not need any locking here. It also guarantees that the handler
|
||||||
|
* will be only called once with has_more_results set to false.
|
||||||
|
*/
|
||||||
|
auto handler = [promise,
|
||||||
|
&final_status](const hdfs::Status &status,
|
||||||
|
const std::vector<hdfs::StatInfo> &stat_info,
|
||||||
|
const bool has_more_results) -> bool {
|
||||||
|
// Print result chunks as they arrive
|
||||||
|
if (!stat_info.empty()) {
|
||||||
|
for (hdfs::StatInfo const &info : stat_info) {
|
||||||
|
std::cout << info.str() << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!status.ok() && final_status.ok()) {
|
||||||
|
// We make sure we set 'status' only on the first error
|
||||||
|
final_status = status;
|
||||||
|
}
|
||||||
|
if (!has_more_results) {
|
||||||
|
promise->set_value(); // Set promise
|
||||||
|
return false; // Request stop sending results
|
||||||
|
}
|
||||||
|
return true; // request more results
|
||||||
|
};
|
||||||
|
|
||||||
|
// Asynchronous call to Find
|
||||||
|
fs->Find(uri.get_path(), name, max_depth, handler);
|
||||||
|
|
||||||
|
// Block until promise is set
|
||||||
|
future.get();
|
||||||
|
if (!final_status.ok()) {
|
||||||
|
std::cerr << "Error: " << final_status.ToString() << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} // namespace hdfs::tools
|
|
@ -0,0 +1,96 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIBHDFSPP_TOOLS_HDFS_FIND
|
||||||
|
#define LIBHDFSPP_TOOLS_HDFS_FIND
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <boost/program_options.hpp>
|
||||||
|
|
||||||
|
#include "hdfs-tool.h"
|
||||||
|
|
||||||
|
namespace hdfs::tools {
|
||||||
|
/**
|
||||||
|
* {@class Find} is an {@class HdfsTool} finds all files recursively starting
|
||||||
|
* from the specified PATH and prints their file paths. This tool mimics the
|
||||||
|
* POSIX find.
|
||||||
|
*/
|
||||||
|
class Find : public HdfsTool {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* {@inheritdoc}
|
||||||
|
*/
|
||||||
|
Find(int argc, char **argv);
|
||||||
|
|
||||||
|
// Abiding to the Rule of 5
|
||||||
|
Find(const Find &) = default;
|
||||||
|
Find(Find &&) = default;
|
||||||
|
Find &operator=(const Find &) = delete;
|
||||||
|
Find &operator=(Find &&) = delete;
|
||||||
|
~Find() override = default;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritdoc}
|
||||||
|
*/
|
||||||
|
[[nodiscard]] std::string GetDescription() const override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritdoc}
|
||||||
|
*/
|
||||||
|
[[nodiscard]] bool Do() override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/**
|
||||||
|
* {@inheritdoc}
|
||||||
|
*/
|
||||||
|
[[nodiscard]] bool Initialize() override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritdoc}
|
||||||
|
*/
|
||||||
|
[[nodiscard]] bool ValidateConstraints() const override { return argc_ > 1; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritdoc}
|
||||||
|
*/
|
||||||
|
[[nodiscard]] bool HandleHelp() const override;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle the path argument that's passed to this tool.
|
||||||
|
*
|
||||||
|
* @param path The path to the directory to begin the find.
|
||||||
|
* @param name The pattern name of the search term.
|
||||||
|
* @param max_depth The maximum depth of the traversal while searching through
|
||||||
|
* the folders.
|
||||||
|
*
|
||||||
|
* @return A boolean indicating the result of this operation.
|
||||||
|
*/
|
||||||
|
[[nodiscard]] virtual bool HandlePath(const std::string &path,
|
||||||
|
const std::string &name,
|
||||||
|
uint32_t max_depth) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* A boost data-structure containing the description of positional arguments
|
||||||
|
* passed to the command-line.
|
||||||
|
*/
|
||||||
|
po::positional_options_description pos_opt_desc_;
|
||||||
|
};
|
||||||
|
} // namespace hdfs::tools
|
||||||
|
#endif
|
|
@ -0,0 +1,52 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <exception>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <google/protobuf/stubs/common.h>
|
||||||
|
|
||||||
|
#include "hdfs-find.h"
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
const auto result = std::atexit([]() -> void {
|
||||||
|
// Clean up static data on exit and prevent valgrind memory leaks
|
||||||
|
google::protobuf::ShutdownProtobufLibrary();
|
||||||
|
});
|
||||||
|
if (result != 0) {
|
||||||
|
std::cerr
|
||||||
|
<< "Error: Unable to schedule clean-up tasks for HDFS find tool, exiting"
|
||||||
|
<< std::endl;
|
||||||
|
std::exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
hdfs::tools::Find find(argc, argv);
|
||||||
|
auto success = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
success = find.Do();
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
std::cerr << "Error: " << e.what() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!success) {
|
||||||
|
std::exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -1,146 +0,0 @@
|
||||||
/*
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
or more contributor license agreements. See the NOTICE file
|
|
||||||
distributed with this work for additional information
|
|
||||||
regarding copyright ownership. The ASF licenses this file
|
|
||||||
to you under the Apache License, Version 2.0 (the
|
|
||||||
"License"); you may not use this file except in compliance
|
|
||||||
with the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing,
|
|
||||||
software distributed under the License is distributed on an
|
|
||||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations
|
|
||||||
under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <google/protobuf/stubs/common.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <future>
|
|
||||||
#include "tools_common.h"
|
|
||||||
|
|
||||||
void usage(){
|
|
||||||
std::cout << "Usage: hdfs_find [OPTION] PATH"
|
|
||||||
<< std::endl
|
|
||||||
<< std::endl << "Finds all files recursively starting from the"
|
|
||||||
<< std::endl << "specified PATH and prints their file paths."
|
|
||||||
<< std::endl << "This hdfs_find tool mimics the POSIX find."
|
|
||||||
<< std::endl
|
|
||||||
<< std::endl << "Both PATH and NAME can have wild-cards."
|
|
||||||
<< std::endl
|
|
||||||
<< std::endl << " -n NAME if provided all results will be matching the NAME pattern"
|
|
||||||
<< std::endl << " otherwise, the implicit '*' will be used"
|
|
||||||
<< std::endl << " NAME allows wild-cards"
|
|
||||||
<< std::endl
|
|
||||||
<< std::endl << " -m MAX_DEPTH if provided the maximum depth to recurse after the end of"
|
|
||||||
<< std::endl << " the path is reached will be limited by MAX_DEPTH"
|
|
||||||
<< std::endl << " otherwise, the maximum depth to recurse is unbound"
|
|
||||||
<< std::endl << " MAX_DEPTH can be set to 0 for pure globbing and ignoring"
|
|
||||||
<< std::endl << " the NAME option (no recursion after the end of the path)"
|
|
||||||
<< std::endl
|
|
||||||
<< std::endl << " -h display this help and exit"
|
|
||||||
<< std::endl
|
|
||||||
<< std::endl << "Examples:"
|
|
||||||
<< std::endl << "hdfs_find hdfs://localhost.localdomain:8020/dir?/tree* -n some?file*name"
|
|
||||||
<< std::endl << "hdfs_find / -n file_name -m 3"
|
|
||||||
<< std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
|
||||||
//We should have at least 2 arguments
|
|
||||||
if (argc < 2) {
|
|
||||||
usage();
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
int input;
|
|
||||||
//If NAME is not specified we use implicit "*"
|
|
||||||
std::string name = "*";
|
|
||||||
//If MAX_DEPTH is not specified we use the max value of uint_32_t
|
|
||||||
uint32_t max_depth = hdfs::FileSystem::GetDefaultFindMaxDepth();
|
|
||||||
|
|
||||||
//Using GetOpt to read in the values
|
|
||||||
opterr = 0;
|
|
||||||
while ((input = getopt(argc, argv, "hn:m:")) != -1) {
|
|
||||||
switch (input)
|
|
||||||
{
|
|
||||||
case 'h':
|
|
||||||
usage();
|
|
||||||
exit(EXIT_SUCCESS);
|
|
||||||
case 'n':
|
|
||||||
name = optarg;
|
|
||||||
break;
|
|
||||||
case 'm':
|
|
||||||
max_depth = std::stoi(optarg);
|
|
||||||
break;
|
|
||||||
case '?':
|
|
||||||
if (optopt == 'n' || optopt == 'm')
|
|
||||||
std::cerr << "Option -" << (char) optopt << " requires an argument." << std::endl;
|
|
||||||
else if (isprint(optopt))
|
|
||||||
std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl;
|
|
||||||
else
|
|
||||||
std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl;
|
|
||||||
usage();
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
default:
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::string uri_path = argv[optind];
|
|
||||||
|
|
||||||
//Building a URI object from the given uri_path
|
|
||||||
hdfs::URI uri = hdfs::parse_path_or_exit(uri_path);
|
|
||||||
|
|
||||||
std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
|
|
||||||
if (!fs) {
|
|
||||||
std::cerr << "Could not connect the file system. " << std::endl;
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::shared_ptr<std::promise<void>> promise = std::make_shared<std::promise<void>>();
|
|
||||||
std::future<void> future(promise->get_future());
|
|
||||||
hdfs::Status status = hdfs::Status::OK();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Keep requesting more until we get the entire listing. Set the promise
|
|
||||||
* when we have the entire listing to stop.
|
|
||||||
*
|
|
||||||
* Find guarantees that the handler will only be called once at a time,
|
|
||||||
* so we do not need any locking here. It also guarantees that the handler will be
|
|
||||||
* only called once with has_more_results set to false.
|
|
||||||
*/
|
|
||||||
auto handler = [promise, &status]
|
|
||||||
(const hdfs::Status &s, const std::vector<hdfs::StatInfo> & si, bool has_more_results) -> bool {
|
|
||||||
//Print result chunks as they arrive
|
|
||||||
if(!si.empty()) {
|
|
||||||
for (hdfs::StatInfo const& s : si) {
|
|
||||||
std::cout << s.str() << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(!s.ok() && status.ok()){
|
|
||||||
//We make sure we set 'status' only on the first error.
|
|
||||||
status = s;
|
|
||||||
}
|
|
||||||
if (!has_more_results) {
|
|
||||||
promise->set_value(); //set promise
|
|
||||||
return false; //request stop sending results
|
|
||||||
}
|
|
||||||
return true; //request more results
|
|
||||||
};
|
|
||||||
|
|
||||||
//Asynchronous call to Find
|
|
||||||
fs->Find(uri.get_path(), name, max_depth, handler);
|
|
||||||
|
|
||||||
//block until promise is set
|
|
||||||
future.get();
|
|
||||||
if(!status.ok()) {
|
|
||||||
std::cerr << "Error: " << status.ToString() << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up static data and prevent valgrind memory leaks
|
|
||||||
google::protobuf::ShutdownProtobufLibrary();
|
|
||||||
return 0;
|
|
||||||
}
|
|
Loading…
Reference in New Issue