From 6bad3badf56559e00002f0d95dee3e81480ffa44 Mon Sep 17 00:00:00 2001 From: Gautham B A Date: Thu, 7 Oct 2021 23:27:11 +0530 Subject: [PATCH] HDFS-16251. Make hdfs_cat tool cross platform (#3523) --- .../libhdfspp/tests/tools/CMakeLists.txt | 8 +- .../libhdfspp/tests/tools/hdfs-cat-mock.cc | 23 ++++ .../libhdfspp/tests/tools/hdfs-cat-mock.h | 53 +++++++++ .../libhdfspp/tests/tools/hdfs-tool-test.cc | 11 +- .../native/libhdfspp/tools/CMakeLists.txt | 7 +- .../libhdfspp/tools/hdfs-cat/CMakeLists.txt | 27 +++++ .../libhdfspp/tools/hdfs-cat/hdfs-cat.cc | 104 ++++++++++++++++++ .../libhdfspp/tools/hdfs-cat/hdfs-cat.h | 90 +++++++++++++++ .../native/libhdfspp/tools/hdfs-cat/main.cc | 52 +++++++++ .../main/native/libhdfspp/tools/hdfs_cat.cc | 83 -------------- 10 files changed, 365 insertions(+), 93 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-cat-mock.cc create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-cat-mock.h create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/CMakeLists.txt create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/hdfs-cat.cc create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/hdfs-cat.h create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/main.cc delete mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_cat.cc diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt index 421a66dbcff..e2bcc567e6a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/CMakeLists.txt @@ -16,7 +16,7 @@ # limitations under the License. # -add_executable(hdfs_allowSnapshot_test hdfs-allow-snapshot-mock.cc hdfs-tool-test.cc main.cc) -target_include_directories(hdfs_allowSnapshot_test PRIVATE ../tools ../../tools ../../tools/hdfs-allow-snapshot) -target_link_libraries(hdfs_allowSnapshot_test PRIVATE gmock_main hdfs_allowSnapshot_lib) -add_test(hdfs_allowSnapshot_test hdfs_allowSnapshot_test) \ No newline at end of file +add_executable(hdfs_tool_test hdfs-allow-snapshot-mock.cc hdfs-cat-mock.cc hdfs-tool-test.cc main.cc) +target_include_directories(hdfs_tool_test PRIVATE ../tools ../../tools ../../tools/hdfs-allow-snapshot ../../tools/hdfs-cat) +target_link_libraries(hdfs_tool_test PRIVATE gmock_main hdfs_allowSnapshot_lib hdfs_cat_lib) +add_test(hdfs_tool_test hdfs_tool_test) diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-cat-mock.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-cat-mock.cc new file mode 100644 index 00000000000..1d8f55a5d7b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-cat-mock.cc @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hdfs-cat-mock.h" + +namespace hdfs::tools::test { +CatMock::~CatMock() {} +} // namespace hdfs::tools::test diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-cat-mock.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-cat-mock.h new file mode 100644 index 00000000000..d8ae5921836 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-cat-mock.h @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LIBHDFSPP_TOOLS_HDFS_CAT_MOCK +#define LIBHDFSPP_TOOLS_HDFS_CAT_MOCK + +#include + +#include + +#include "hdfs-cat.h" + +namespace hdfs::tools::test { +/** + * {@class CatMock} is an {@class Cat} whereby it mocks the + * HandleHelp and HandlePath methods for testing their functionality. + */ +class CatMock : public hdfs::tools::Cat { +public: + /** + * {@inheritdoc} + */ + CatMock(const int argc, char **argv) : Cat(argc, argv) {} + + // Abiding to the Rule of 5 + CatMock(const CatMock &) = delete; + CatMock(CatMock &&) = delete; + CatMock &operator=(const CatMock &) = delete; + CatMock &operator=(CatMock &&) = delete; + ~CatMock() override; + + MOCK_METHOD(bool, HandleHelp, (), (const, override)); + + MOCK_METHOD(bool, HandlePath, (const std::string &), (const, override)); +}; +} // namespace hdfs::tools::test + +#endif diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-test.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-test.cc index 1b8cff1d107..77c06555372 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-test.cc +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/tools/hdfs-tool-test.cc @@ -20,6 +20,7 @@ #include #include "hdfs-allow-snapshot-mock.h" +#include "hdfs-cat-mock.h" #include "hdfs-tool-test.h" HdfsToolBasicTest::~HdfsToolBasicTest() {} @@ -30,6 +31,14 @@ INSTANTIATE_TEST_SUITE_P( testing::Values(PassAPath, CallHelp)); +INSTANTIATE_TEST_SUITE_P(HdfsCat, HdfsToolBasicTest, + testing::Values(PassAPath, + CallHelp)); + INSTANTIATE_TEST_SUITE_P( HdfsAllowSnapshot, HdfsToolNegativeTest, - testing::Values(Pass2Paths)); \ No newline at end of file + testing::Values(Pass2Paths)); + +INSTANTIATE_TEST_SUITE_P( + HdfsCat, HdfsToolNegativeTest, + testing::Values(Pass2Paths)); diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt index 9f4f9846ec7..4b6cda5764f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/CMakeLists.txt @@ -31,9 +31,7 @@ add_library(tools_common $) add_library(hdfs_tool_obj OBJECT hdfs-tool.cc) target_include_directories(hdfs_tool_obj PRIVATE ../tools) - -add_executable(hdfs_cat hdfs_cat.cc) -target_link_libraries(hdfs_cat tools_common hdfspp_static) +add_subdirectory(hdfs-cat) add_executable(hdfs_chgrp hdfs_chgrp.cc) target_link_libraries(hdfs_chgrp tools_common hdfspp_static) @@ -81,14 +79,13 @@ add_executable(hdfs_setrep hdfs_setrep.cc) target_link_libraries(hdfs_setrep tools_common hdfspp_static) add_library(hdfs_allowSnapshot_lib STATIC $ hdfs-allow-snapshot/hdfs-allow-snapshot.cc) -target_include_directories(hdfs_allowSnapshot_lib PRIVATE ../tools allow-snapshot ${Boost_INCLUDE_DIRS}) +target_include_directories(hdfs_allowSnapshot_lib PRIVATE ../tools hdfs-allow-snapshot ${Boost_INCLUDE_DIRS}) target_link_libraries(hdfs_allowSnapshot_lib PRIVATE Boost::boost Boost::program_options tools_common hdfspp_static) add_executable(hdfs_allowSnapshot hdfs-allow-snapshot/main.cc) target_include_directories(hdfs_allowSnapshot PRIVATE ../tools) target_link_libraries(hdfs_allowSnapshot PRIVATE hdfs_allowSnapshot_lib) - add_executable(hdfs_disallowSnapshot hdfs_disallowSnapshot.cc) target_link_libraries(hdfs_disallowSnapshot tools_common hdfspp_static) diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/CMakeLists.txt new file mode 100644 index 00000000000..12de299d263 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/CMakeLists.txt @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +add_library(hdfs_cat_lib STATIC $ hdfs-cat.cc) +target_include_directories(hdfs_cat_lib PRIVATE ../../tools hdfs-cat ${Boost_INCLUDE_DIRS}) +target_link_libraries(hdfs_cat_lib PRIVATE Boost::boost Boost::program_options tools_common hdfspp_static) + +add_executable(hdfs_cat main.cc) +target_include_directories(hdfs_cat PRIVATE ../../tools) +target_link_libraries(hdfs_cat PRIVATE hdfs_cat_lib) + +install(TARGETS hdfs_cat RUNTIME DESTINATION bin) \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/hdfs-cat.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/hdfs-cat.cc new file mode 100644 index 00000000000..db8507ec770 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/hdfs-cat.cc @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include "hdfs-cat.h" +#include "tools_common.h" + +namespace hdfs::tools { +Cat::Cat(const int argc, char **argv) : HdfsTool(argc, argv) {} + +bool Cat::Initialize() { + opt_desc_.add_options()("help,h", "Concatenate FILE to standard output.")( + "path", po::value(), + "The path to the file that needs to be cat-ed"); + + // We allow only one argument to be passed to this tool. An exception is + // thrown if multiple arguments are passed. + pos_opt_desc_.add("path", 1); + + po::store(po::command_line_parser(argc_, argv_) + .options(opt_desc_) + .positional(pos_opt_desc_) + .run(), + opt_val_); + po::notify(opt_val_); + return true; +} + +std::string Cat::GetDescription() const { + std::stringstream desc; + desc << "Usage: hdfs_cat [OPTION] FILE" << std::endl + << std::endl + << "Concatenate FILE to standard output." << std::endl + << std::endl + << " -h display this help and exit" << std::endl + << std::endl + << "Examples:" << std::endl + << "hdfs_cat hdfs://localhost.localdomain:8020/dir/file" << std::endl + << "hdfs_cat /dir/file" << std::endl; + return desc.str(); +} + +bool Cat::Do() { + if (!Initialize()) { + std::cerr << "Unable to initialize HDFS cat tool" << std::endl; + return false; + } + + if (!ValidateConstraints()) { + std::cout << GetDescription(); + return false; + } + + if (opt_val_.count("help") > 0) { + return HandleHelp(); + } + + if (opt_val_.count("path") > 0) { + const auto path = opt_val_["path"].as(); + return HandlePath(path); + } + + return true; +} + +bool Cat::HandleHelp() const { + std::cout << GetDescription(); + return true; +} + +bool Cat::HandlePath(const std::string &path) const { + // Building a URI object from the given uri_path + auto uri = hdfs::parse_path_or_exit(path); + + const auto fs = hdfs::doConnect(uri, false); + if (fs == nullptr) { + std::cerr << "Could not connect the file system." << std::endl; + return false; + } + + readFile(fs, uri.get_path(), 0, stdout, false); + return true; +} +} // namespace hdfs::tools diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/hdfs-cat.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/hdfs-cat.h new file mode 100644 index 00000000000..ca76894ae04 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/hdfs-cat.h @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LIBHDFSPP_TOOLS_HDFS_CAT +#define LIBHDFSPP_TOOLS_HDFS_CAT + +#include + +#include + +#include "hdfs-tool.h" + +namespace hdfs::tools { +/** + * {@class Cat} is an {@class HdfsTool} that reads the contents of the file + * located at an HDFS path and writes to stdout. + */ +class Cat : public HdfsTool { +public: + /** + * {@inheritdoc} + */ + Cat(int argc, char **argv); + + // Abiding to the Rule of 5 + Cat(const Cat &) = default; + Cat(Cat &&) = default; + Cat &operator=(const Cat &) = delete; + Cat &operator=(Cat &&) = delete; + ~Cat() override = default; + + /** + * {@inheritdoc} + */ + [[nodiscard]] std::string GetDescription() const override; + + /** + * {@inheritdoc} + */ + [[nodiscard]] bool Do() override; + +protected: + /** + * {@inheritdoc} + */ + [[nodiscard]] bool Initialize() override; + + /** + * {@inheritdoc} + */ + [[nodiscard]] bool ValidateConstraints() const override { return argc_ > 1; } + + /** + * {@inheritdoc} + */ + [[nodiscard]] bool HandleHelp() const override; + + /** + * Handle the path argument that's passed to this tool. + * + * @param path The path to the directory that needs to be made snapshot-able. + * + * @return A boolean indicating the result of this operation. + */ + [[nodiscard]] virtual bool HandlePath(const std::string &path) const; + +private: + /** + * A boost data-structure containing the description of positional arguments + * passed to the command-line. + */ + po::positional_options_description pos_opt_desc_; +}; +} // namespace hdfs::tools +#endif diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/main.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/main.cc new file mode 100644 index 00000000000..de514b06eed --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs-cat/main.cc @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include "hdfs-cat.h" + +int main(int argc, char *argv[]) { + const auto result = std::atexit([]() -> void { + // Clean up static data on exit and prevent valgrind memory leaks + google::protobuf::ShutdownProtobufLibrary(); + }); + if (result != 0) { + std::cerr + << "Error: Unable to schedule clean-up tasks for HDFS cat tool, exiting" + << std::endl; + std::exit(EXIT_FAILURE); + } + + hdfs::tools::Cat cat(argc, argv); + auto success = false; + + try { + success = cat.Do(); + } catch (const std::exception &e) { + std::cerr << "Error: " << e.what() << std::endl; + } + + if (!success) { + std::exit(EXIT_FAILURE); + } + return 0; +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_cat.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_cat.cc deleted file mode 100644 index c89bb94cb7d..00000000000 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tools/hdfs_cat.cc +++ /dev/null @@ -1,83 +0,0 @@ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. -*/ - -#include -#include -#include "tools_common.h" - -void usage(){ - std::cout << "Usage: hdfs_cat [OPTION] FILE" - << std::endl - << std::endl << "Concatenate FILE to standard output." - << std::endl - << std::endl << " -h display this help and exit" - << std::endl - << std::endl << "Examples:" - << std::endl << "hdfs_cat hdfs://localhost.localdomain:8020/dir/file" - << std::endl << "hdfs_cat /dir/file" - << std::endl; -} - -#define BUF_SIZE 4096 - -int main(int argc, char *argv[]) { - if (argc != 2) { - usage(); - exit(EXIT_FAILURE); - } - - int input; - - //Using GetOpt to read in the values - opterr = 0; - while ((input = getopt(argc, argv, "h")) != -1) { - switch (input) - { - case 'h': - usage(); - exit(EXIT_SUCCESS); - case '?': - if (isprint(optopt)) - std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl; - else - std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl; - usage(); - exit(EXIT_FAILURE); - default: - exit(EXIT_FAILURE); - } - } - - std::string uri_path = argv[optind]; - - //Building a URI object from the given uri_path - hdfs::URI uri = hdfs::parse_path_or_exit(uri_path); - - std::shared_ptr fs = hdfs::doConnect(uri, false); - if (!fs) { - std::cerr << "Could not connect the file system. " << std::endl; - exit(EXIT_FAILURE); - } - - readFile(fs, uri.get_path(), 0, stdout, false); - - // Clean up static data and prevent valgrind memory leaks - google::protobuf::ShutdownProtobufLibrary(); - return 0; -}