HDFS-16471. Make HDFS ls tool cross platform (#4086)

* The source files for hdfs_ls
   uses getopt for parsing the
   command line arguments.
* getopt is available only on
   Linux and thus, isn't cross
   platform.
* Thus, we need to replace
   getopt with
   boost::program_options to
   make this tool cross platform.
This commit is contained in:
Gautham B A 2022-03-22 23:12:00 +05:30 committed by GitHub
parent 59d07bdcc3
commit 81879eb9cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 485 additions and 132 deletions

View File

@ -37,6 +37,7 @@ add_executable(hdfs_tool_tests
hdfs-rm-mock.cc hdfs-rm-mock.cc
hdfs-get-mock.cc hdfs-get-mock.cc
hdfs-find-mock.cc hdfs-find-mock.cc
hdfs-ls-mock.cc
main.cc) main.cc)
target_include_directories(hdfs_tool_tests PRIVATE target_include_directories(hdfs_tool_tests PRIVATE
../tools ../tools
@ -58,6 +59,7 @@ target_include_directories(hdfs_tool_tests PRIVATE
../../tools/hdfs-rm ../../tools/hdfs-rm
../../tools/hdfs-get ../../tools/hdfs-get
../../tools/hdfs-find ../../tools/hdfs-find
../../tools/hdfs-ls
../../tools/hdfs-cat) ../../tools/hdfs-cat)
target_link_libraries(hdfs_tool_tests PRIVATE target_link_libraries(hdfs_tool_tests PRIVATE
gmock_main gmock_main
@ -78,5 +80,6 @@ target_link_libraries(hdfs_tool_tests PRIVATE
hdfs_rm_lib hdfs_rm_lib
hdfs_get_lib hdfs_get_lib
hdfs_find_lib hdfs_find_lib
hdfs_ls_lib
hdfs_cat_lib) hdfs_cat_lib)
add_test(hdfs_tool_tests hdfs_tool_tests) add_test(hdfs_tool_tests hdfs_tool_tests)

View File

@ -0,0 +1,67 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "hdfs-ls-mock.h"
#include "hdfs-tool-tests.h"
namespace hdfs::tools::test {
LsMock::~LsMock() = default;
void LsMock::SetExpectations(std::function<std::unique_ptr<LsMock>()> test_case,
const std::vector<std::string> &args) const {
// Get the pointer to the function that defines the test case
const auto test_case_func = test_case.target<std::unique_ptr<LsMock> (*)()>();
ASSERT_NE(test_case_func, nullptr);
// Set the expected method calls and their corresponding arguments for each
// test case
if (*test_case_func == &CallHelp<LsMock>) {
EXPECT_CALL(*this, HandleHelp()).Times(1).WillOnce(testing::Return(true));
return;
}
if (*test_case_func == &PassAPath<LsMock>) {
const auto arg1 = args[0];
EXPECT_CALL(*this, HandlePath(arg1, false))
.Times(1)
.WillOnce(testing::Return(true));
}
if (*test_case_func == &PassRecursivePath<LsMock>) {
const auto arg1 = args[0];
const auto arg2 = args[1];
ASSERT_EQ(arg1, "-R");
EXPECT_CALL(*this, HandlePath(arg2, true))
.Times(1)
.WillOnce(testing::Return(true));
}
if (*test_case_func == &PassRecursive<LsMock>) {
const auto arg1 = args[0];
ASSERT_EQ(arg1, "-R");
}
}
} // namespace hdfs::tools::test

View File

@ -0,0 +1,68 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIBHDFSPP_TOOLS_HDFS_LS_MOCK
#define LIBHDFSPP_TOOLS_HDFS_LS_MOCK
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include <gmock/gmock.h>
#include "hdfs-ls.h"
namespace hdfs::tools::test {
/**
* {@class LsMock} is an {@class Ls} whereby it mocks the
* HandleHelp and HandlePath methods for testing their functionality.
*/
class LsMock : public hdfs::tools::Ls {
public:
/**
* {@inheritdoc}
*/
LsMock(const int argc, char **argv) : Ls(argc, argv) {}
// Abiding to the Rule of 5
LsMock(const LsMock &) = delete;
LsMock(LsMock &&) = delete;
LsMock &operator=(const LsMock &) = delete;
LsMock &operator=(LsMock &&) = delete;
~LsMock() override;
/**
* Defines the methods and the corresponding arguments that are expected
* to be called on this instance of {@link HdfsTool} for the given test case.
*
* @param test_case An {@link std::function} object that points to the
* function defining the test case
* @param args The arguments that are passed to this test case
*/
void SetExpectations(std::function<std::unique_ptr<LsMock>()> test_case,
const std::vector<std::string> &args = {}) const;
MOCK_METHOD(bool, HandleHelp, (), (const, override));
MOCK_METHOD(bool, HandlePath, (const std::string &, const bool),
(const, override));
};
} // namespace hdfs::tools::test
#endif

View File

@ -33,6 +33,7 @@
#include "hdfs-du-mock.h" #include "hdfs-du-mock.h"
#include "hdfs-find-mock.h" #include "hdfs-find-mock.h"
#include "hdfs-get-mock.h" #include "hdfs-get-mock.h"
#include "hdfs-ls-mock.h"
#include "hdfs-mkdir-mock.h" #include "hdfs-mkdir-mock.h"
#include "hdfs-move-to-local-mock.h" #include "hdfs-move-to-local-mock.h"
#include "hdfs-rename-snapshot-mock.h" #include "hdfs-rename-snapshot-mock.h"
@ -81,6 +82,12 @@ INSTANTIATE_TEST_SUITE_P(
CallHelp<hdfs::tools::test::DuMock>, CallHelp<hdfs::tools::test::DuMock>,
PassRecursivePath<hdfs::tools::test::DuMock>)); PassRecursivePath<hdfs::tools::test::DuMock>));
INSTANTIATE_TEST_SUITE_P(
HdfsLs, HdfsToolBasicTest,
testing::Values(PassAPath<hdfs::tools::test::LsMock>,
CallHelp<hdfs::tools::test::LsMock>,
PassRecursivePath<hdfs::tools::test::LsMock>));
INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(
HdfsDeleteSnapshot, HdfsToolBasicTest, HdfsDeleteSnapshot, HdfsToolBasicTest,
testing::Values(CallHelp<hdfs::tools::test::DeleteSnapshotMock>, testing::Values(CallHelp<hdfs::tools::test::DeleteSnapshotMock>,
@ -180,6 +187,14 @@ INSTANTIATE_TEST_SUITE_P(
PassOwnerAndAPath<hdfs::tools::test::DuMock>, PassOwnerAndAPath<hdfs::tools::test::DuMock>,
PassPermissionsAndAPath<hdfs::tools::test::DuMock>)); PassPermissionsAndAPath<hdfs::tools::test::DuMock>));
INSTANTIATE_TEST_SUITE_P(
HdfsLs, HdfsToolNegativeTestThrows,
testing::Values(Pass2Paths<hdfs::tools::test::LsMock>,
Pass3Paths<hdfs::tools::test::LsMock>,
PassNOptAndAPath<hdfs::tools::test::LsMock>,
PassOwnerAndAPath<hdfs::tools::test::LsMock>,
PassPermissionsAndAPath<hdfs::tools::test::LsMock>));
INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(
HdfsCat, HdfsToolNegativeTestThrows, HdfsCat, HdfsToolNegativeTestThrows,
testing::Values(Pass2Paths<hdfs::tools::test::CatMock>)); testing::Values(Pass2Paths<hdfs::tools::test::CatMock>));
@ -262,6 +277,10 @@ INSTANTIATE_TEST_SUITE_P(
HdfsDu, HdfsToolNegativeTestNoThrow, HdfsDu, HdfsToolNegativeTestNoThrow,
testing::Values(PassRecursive<hdfs::tools::test::DuMock>)); testing::Values(PassRecursive<hdfs::tools::test::DuMock>));
INSTANTIATE_TEST_SUITE_P(
HdfsLs, HdfsToolNegativeTestNoThrow,
testing::Values(PassRecursive<hdfs::tools::test::LsMock>));
INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(
HdfsChown, HdfsToolNegativeTestNoThrow, HdfsChown, HdfsToolNegativeTestNoThrow,
testing::Values(PassAPath<hdfs::tools::test::ChownMock>)); testing::Values(PassAPath<hdfs::tools::test::ChownMock>));

View File

@ -47,8 +47,7 @@ add_subdirectory(hdfs-mkdir)
add_subdirectory(hdfs-rm) add_subdirectory(hdfs-rm)
add_executable(hdfs_ls hdfs_ls.cc) add_subdirectory(hdfs-ls)
target_link_libraries(hdfs_ls tools_common hdfspp_static)
add_executable(hdfs_stat hdfs_stat.cc) add_executable(hdfs_stat hdfs_stat.cc)
target_link_libraries(hdfs_stat tools_common hdfspp_static) target_link_libraries(hdfs_stat tools_common hdfspp_static)

View File

@ -0,0 +1,27 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
add_library(hdfs_ls_lib STATIC $<TARGET_OBJECTS:hdfs_tool_obj> hdfs-ls.cc)
target_include_directories(hdfs_ls_lib PRIVATE ../../tools ${Boost_INCLUDE_DIRS})
target_link_libraries(hdfs_ls_lib PRIVATE Boost::boost Boost::program_options tools_common hdfspp_static)
add_executable(hdfs_ls main.cc)
target_include_directories(hdfs_ls PRIVATE ../../tools)
target_link_libraries(hdfs_ls PRIVATE hdfs_ls_lib)
install(TARGETS hdfs_ls RUNTIME DESTINATION bin)

View File

@ -0,0 +1,156 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <future>
#include <iostream>
#include <memory>
#include <ostream>
#include <sstream>
#include <string>
#include "hdfs-ls.h"
#include "tools_common.h"
namespace hdfs::tools {
Ls::Ls(const int argc, char **argv) : HdfsTool(argc, argv) {}
bool Ls::Initialize() {
auto add_options = opt_desc_.add_options();
add_options("help,h", "List information about the files");
add_options("recursive,R", "Operate on files and directories recursively");
add_options("path", po::value<std::string>(),
"The path for which we need to do ls");
// We allow only one positional argument to be passed to this tool. An
// exception is thrown if multiple arguments are passed.
pos_opt_desc_.add("path", 1);
po::store(po::command_line_parser(argc_, argv_)
.options(opt_desc_)
.positional(pos_opt_desc_)
.run(),
opt_val_);
po::notify(opt_val_);
return true;
}
std::string Ls::GetDescription() const {
std::stringstream desc;
desc << "Usage: hdfs_ls [OPTION] FILE" << std::endl
<< std::endl
<< "List information about the FILEs." << std::endl
<< std::endl
<< " -R list subdirectories recursively" << std::endl
<< " -h display this help and exit" << std::endl
<< std::endl
<< "Examples:" << std::endl
<< "hdfs_ls hdfs://localhost.localdomain:8020/dir" << std::endl
<< "hdfs_ls -R /dir1/dir2" << std::endl;
return desc.str();
}
bool Ls::Do() {
if (!Initialize()) {
std::cerr << "Unable to initialize HDFS ls tool" << std::endl;
return false;
}
if (!ValidateConstraints()) {
std::cout << GetDescription();
return false;
}
if (opt_val_.count("help") > 0) {
return HandleHelp();
}
if (opt_val_.count("path") > 0) {
const auto path = opt_val_["path"].as<std::string>();
const auto recursive = opt_val_.count("recursive") > 0;
return HandlePath(path, recursive);
}
return false;
}
bool Ls::HandleHelp() const {
std::cout << GetDescription();
return true;
}
bool Ls::HandlePath(const std::string &path, const bool recursive) const {
// Building a URI object from the given path
auto uri = hdfs::parse_path_or_exit(path);
const auto fs = hdfs::doConnect(uri, true);
if (!fs) {
std::cerr << "Could not connect the file system. " << std::endl;
return false;
}
const auto promise = std::make_shared<std::promise<void>>();
auto future(promise->get_future());
auto result = hdfs::Status::OK();
/*
* Keep requesting more until we get the entire listing. Set the promise
* when we have the entire listing to stop.
*
* Find and GetListing guarantee that the handler will only be called once at
* a time, so we do not need any locking here. They also guarantee that the
* handler will be only called once with has_more_results set to false.
*/
auto handler = [promise,
&result](const hdfs::Status &status,
const std::vector<hdfs::StatInfo> &stat_info,
const bool has_more_results) -> bool {
// Print result chunks as they arrive
if (!stat_info.empty()) {
for (const auto &info : stat_info) {
std::cout << info.str() << std::endl;
}
}
if (!status.ok() && result.ok()) {
// We make sure we set the result only on the first error
result = status;
}
if (!has_more_results) {
promise->set_value(); // Set promise
return false; // Request to stop sending results
}
return true; // Request more results
};
if (!recursive) {
// Asynchronous call to GetListing
fs->GetListing(uri.get_path(), handler);
} else {
// Asynchronous call to Find
fs->Find(uri.get_path(), "*", hdfs::FileSystem::GetDefaultFindMaxDepth(),
handler);
}
// Block until promise is set
future.get();
if (!result.ok()) {
std::cerr << "Error: " << result.ToString() << std::endl;
return false;
}
return true;
}
} // namespace hdfs::tools

View File

@ -0,0 +1,92 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LIBHDFSPP_TOOLS_HDFS_LS
#define LIBHDFSPP_TOOLS_HDFS_LS
#include <string>
#include <boost/program_options.hpp>
#include "hdfs-tool.h"
namespace hdfs::tools {
/**
* {@class Ls} is an {@class HdfsTool} that lists information about the files.
*/
class Ls : public HdfsTool {
public:
/**
* {@inheritdoc}
*/
Ls(int argc, char **argv);
// Abiding to the Rule of 5
Ls(const Ls &) = default;
Ls(Ls &&) = default;
Ls &operator=(const Ls &) = delete;
Ls &operator=(Ls &&) = delete;
~Ls() override = default;
/**
* {@inheritdoc}
*/
[[nodiscard]] std::string GetDescription() const override;
/**
* {@inheritdoc}
*/
[[nodiscard]] bool Do() override;
protected:
/**
* {@inheritdoc}
*/
[[nodiscard]] bool Initialize() override;
/**
* {@inheritdoc}
*/
[[nodiscard]] bool ValidateConstraints() const override { return argc_ > 1; }
/**
* {@inheritdoc}
*/
[[nodiscard]] bool HandleHelp() const override;
/**
* Handle the path argument that's passed to this tool.
*
* @param path The path to the directory for which we need to ls.
* @param recursive A boolean indicating whether ls needs to be
* performed recursively for the given path.
*
* @return A boolean indicating the result of this operation.
*/
[[nodiscard]] virtual bool HandlePath(const std::string &path,
bool recursive) const;
private:
/**
* A boost data-structure containing the description of positional arguments
* passed to the command-line.
*/
po::positional_options_description pos_opt_desc_;
};
} // namespace hdfs::tools
#endif

View File

@ -0,0 +1,52 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstdlib>
#include <exception>
#include <iostream>
#include <google/protobuf/stubs/common.h>
#include "hdfs-ls.h"
int main(int argc, char *argv[]) {
const auto result = std::atexit([]() -> void {
// Clean up static data on exit and prevent valgrind memory leaks
google::protobuf::ShutdownProtobufLibrary();
});
if (result != 0) {
std::cerr << "Error: Unable to schedule clean-up tasks for HDFS ls tool, "
"exiting"
<< std::endl;
std::exit(EXIT_FAILURE);
}
hdfs::tools::Ls ls(argc, argv);
auto success = false;
try {
success = ls.Do();
} catch (const std::exception &e) {
std::cerr << "Error: " << e.what() << std::endl;
}
if (!success) {
std::exit(EXIT_FAILURE);
}
return 0;
}

View File

@ -1,130 +0,0 @@
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/
#include <google/protobuf/stubs/common.h>
#include <unistd.h>
#include <future>
#include "tools_common.h"
void usage(){
std::cout << "Usage: hdfs_ls [OPTION] FILE"
<< std::endl
<< std::endl << "List information about the FILEs."
<< std::endl
<< std::endl << " -R list subdirectories recursively"
<< std::endl << " -h display this help and exit"
<< std::endl
<< std::endl << "Examples:"
<< std::endl << "hdfs_ls hdfs://localhost.localdomain:8020/dir"
<< std::endl << "hdfs_ls -R /dir1/dir2"
<< std::endl;
}
int main(int argc, char *argv[]) {
//We should have at least 2 arguments
if (argc < 2) {
usage();
exit(EXIT_FAILURE);
}
bool recursive = false;
int input;
//Using GetOpt to read in the values
opterr = 0;
while ((input = getopt(argc, argv, "Rh")) != -1) {
switch (input)
{
case 'R':
recursive = true;
break;
case 'h':
usage();
exit(EXIT_SUCCESS);
case '?':
if (isprint(optopt))
std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl;
else
std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl;
usage();
exit(EXIT_FAILURE);
default:
exit(EXIT_FAILURE);
}
}
std::string uri_path = argv[optind];
//Building a URI object from the given uri_path
hdfs::URI uri = hdfs::parse_path_or_exit(uri_path);
std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
if (!fs) {
std::cerr << "Could not connect the file system. " << std::endl;
exit(EXIT_FAILURE);
}
std::shared_ptr<std::promise<void>> promise = std::make_shared<std::promise<void>>();
std::future<void> future(promise->get_future());
hdfs::Status status = hdfs::Status::OK();
/**
* Keep requesting more until we get the entire listing. Set the promise
* when we have the entire listing to stop.
*
* Find and GetListing guarantee that the handler will only be called once at a time,
* so we do not need any locking here. They also guarantee that the handler will be
* only called once with has_more_results set to false.
*/
auto handler = [promise, &status]
(const hdfs::Status &s, const std::vector<hdfs::StatInfo> & si, bool has_more_results) -> bool {
//Print result chunks as they arrive
if(!si.empty()) {
for (hdfs::StatInfo const& s : si) {
std::cout << s.str() << std::endl;
}
}
if(!s.ok() && status.ok()){
//We make sure we set 'status' only on the first error.
status = s;
}
if (!has_more_results) {
promise->set_value(); //set promise
return false; //request stop sending results
}
return true; //request more results
};
if(!recursive){
//Asynchronous call to GetListing
fs->GetListing(uri.get_path(), handler);
} else {
//Asynchronous call to Find
fs->Find(uri.get_path(), "*", hdfs::FileSystem::GetDefaultFindMaxDepth(), handler);
}
//block until promise is set
future.get();
if(!status.ok()) {
std::cerr << "Error: " << status.ToString() << std::endl;
}
// Clean up static data and prevent valgrind memory leaks
google::protobuf::ShutdownProtobufLibrary();
return 0;
}