From 73b67b2df565d2466d6cda1fda0201f9abeab179 Mon Sep 17 00:00:00 2001 From: Mukul Kumar Singh Date: Mon, 11 Feb 2019 20:08:25 +0530 Subject: [PATCH] HDDS-1040. Add blockade Tests for client failures. Contributed by Nilotpal Nandi. --- .../blockade/clusterUtils/cluster_utils.py | 117 ++++++++++++++++- .../dist/src/main/blockade/conftest.py | 25 +++- .../blockade/test_blockade_client_failure.py | 124 ++++++++++++++++++ .../test_blockade_datanode_isolation.py | 1 + .../src/main/blockade/test_blockade_flaky.py | 1 + .../blockade/test_blockade_mixed_failure.py | 1 + ...ckade_mixed_failure_three_nodes_isolate.py | 1 + .../test_blockade_mixed_failure_two_nodes.py | 1 + .../blockade/test_blockade_scm_isolation.py | 1 + .../compose/ozoneblockade/docker-compose.yaml | 9 ++ 10 files changed, 272 insertions(+), 9 deletions(-) create mode 100644 hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py diff --git a/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py b/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py index f590f7724d2..bf0b28fd8c4 100644 --- a/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py +++ b/hadoop-ozone/dist/src/main/blockade/clusterUtils/cluster_utils.py @@ -22,6 +22,7 @@ import logging import time import re import yaml +import os logger = logging.getLogger(__name__) @@ -64,17 +65,18 @@ class ClusterUtils(object): @classmethod def run_freon(cls, docker_compose_file, num_volumes, num_buckets, - num_keys, key_size, replication_type, replication_factor): + num_keys, key_size, replication_type, replication_factor, + freon_client='ozoneManager'): # run freon cmd = "docker-compose -f %s " \ - "exec ozoneManager /opt/hadoop/bin/ozone " \ + "exec %s /opt/hadoop/bin/ozone " \ "freon rk " \ "--numOfVolumes %s " \ "--numOfBuckets %s " \ "--numOfKeys %s " \ "--keySize %s " \ "--replicationType %s " \ - "--factor %s" % (docker_compose_file, num_volumes, + "--factor %s" % (docker_compose_file, freon_client, num_volumes, num_buckets, num_keys, key_size, replication_type, replication_factor) exit_code, output = cls.run_cmd(cmd) @@ -188,4 +190,111 @@ class ClusterUtils(object): logger.info("All datanodes container status: %s", ' '.join(all_datanode_container_status)) - return all_datanode_container_status \ No newline at end of file + return all_datanode_container_status + + @classmethod + def create_volume(cls, docker_compose_file, volume_name): + command = "docker-compose -f %s " \ + "exec ozone_client /opt/hadoop/bin/ozone " \ + "sh volume create /%s --user root" % \ + (docker_compose_file, volume_name) + logger.info("Creating Volume %s", volume_name) + exit_code, output = cls.run_cmd(command) + assert exit_code == 0, "Ozone volume create failed with output=[%s]" \ + % output + + @classmethod + def delete_volume(cls, docker_compose_file, volume_name): + command = "docker-compose -f %s " \ + "exec ozone_client /opt/hadoop/bin/ozone " \ + "sh volume delete /%s" % (docker_compose_file, volume_name) + logger.info("Deleting Volume %s", volume_name) + exit_code, output = cls.run_cmd(command) + return exit_code, output + + @classmethod + def create_bucket(cls, docker_compose_file, bucket_name, volume_name): + command = "docker-compose -f %s " \ + "exec ozone_client /opt/hadoop/bin/ozone " \ + "sh bucket create /%s/%s" % (docker_compose_file, + volume_name, bucket_name) + logger.info("Creating Bucket %s in volume %s", + bucket_name, volume_name) + exit_code, output = cls.run_cmd(command) + assert exit_code == 0, "Ozone bucket create failed with output=[%s]" \ + % output + + @classmethod + def delete_bucket(cls, docker_compose_file, bucket_name, volume_name): + command = "docker-compose -f %s " \ + "exec ozone_client /opt/hadoop/bin/ozone " \ + "sh bucket delete /%s/%s" % (docker_compose_file, + volume_name, bucket_name) + logger.info("Running delete bucket of %s/%s", volume_name, bucket_name) + exit_code, output = cls.run_cmd(command) + return exit_code, output + + @classmethod + def put_key(cls, docker_compose_file, bucket_name, volume_name, + filepath, key_name=None, replication_factor=None): + command = "docker-compose -f %s " \ + "exec ozone_client ls %s" % (docker_compose_file, filepath) + exit_code, output = cls.run_cmd(command) + assert exit_code == 0, "%s does not exist" % filepath + if key_name is None: + key_name = os.path.basename(filepath) + command = "docker-compose -f %s " \ + "exec ozone_client /opt/hadoop/bin/ozone " \ + "sh key put /%s/%s/%s %s" % (docker_compose_file, + volume_name, bucket_name, + key_name, filepath) + if replication_factor: + command = "%s --replication=%s" % (command, replication_factor) + logger.info("Creating key %s in %s/%s", key_name, + volume_name, bucket_name) + exit_code, output = cls.run_cmd(command) + assert exit_code == 0, "Ozone put Key failed with output=[%s]" % output + + @classmethod + def delete_key(cls, docker_compose_file, bucket_name, volume_name, + key_name): + command = "docker-compose -f %s " \ + "exec ozone_client /opt/hadoop/bin/ozone " \ + "sh key delete /%s/%s/%s" \ + % (docker_compose_file, volume_name, bucket_name, key_name) + logger.info("Running delete key %s in %s/%s", + key_name, volume_name, bucket_name) + exit_code, output = cls.run_cmd(command) + return exit_code, output + + @classmethod + def get_key(cls, docker_compose_file, bucket_name, volume_name, + key_name, filepath=None): + if filepath is None: + filepath = '.' + command = "docker-compose -f %s " \ + "exec ozone_client /opt/hadoop/bin/ozone " \ + "sh key get /%s/%s/%s %s" % (docker_compose_file, + volume_name, bucket_name, + key_name, filepath) + logger.info("Running get key %s in %s/%s", key_name, + volume_name, bucket_name) + exit_code, output = cls.run_cmd(command) + assert exit_code == 0, "Ozone get Key failed with output=[%s]" % output + + @classmethod + def find_checksum(cls, docker_compose_file, filepath): + command = "docker-compose -f %s " \ + "exec ozone_client md5sum %s" % (docker_compose_file, filepath) + exit_code, output = cls.run_cmd(command) + assert exit_code == 0, "Cant find checksum" + myoutput = output.split("\n") + finaloutput = "" + for line in myoutput: + if line.find("Warning") >= 0 or line.find("is not a tty") >= 0: + logger.info("skip this line: %s", line) + else: + finaloutput = finaloutput + line + checksum = finaloutput.split(" ") + logger.info("Checksum of %s is : %s", filepath, checksum[0]) + return checksum[0] \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/blockade/conftest.py b/hadoop-ozone/dist/src/main/blockade/conftest.py index 31e2ccd7385..ff5bfc7fc0c 100644 --- a/hadoop-ozone/dist/src/main/blockade/conftest.py +++ b/hadoop-ozone/dist/src/main/blockade/conftest.py @@ -15,8 +15,10 @@ import logging import os +import time +import subprocess - +EPOCH_TIME = int(time.time()) def pytest_addoption(parser): parser.addoption("--output-dir", action="store", @@ -40,13 +42,14 @@ def pytest_addoption(parser): def pytest_configure(config): + global OUTPUT_DIR os.environ["CONTAINER_STATUS_SLEEP"] = config.option.containerStatusSleep - outputdir = config.option.output_dir + OUTPUT_DIR = "%s/%s" % (config.option.output_dir, EPOCH_TIME) try: - os.makedirs(outputdir) + os.makedirs(OUTPUT_DIR) except OSError, e: raise Exception(e.strerror + ": " + e.filename) - log_file = os.path.join(outputdir, "output.log") + log_file = os.path.join(OUTPUT_DIR, "output.log") if config.option.log_level == "trace": loglevel = eval("logging.DEBUG") @@ -74,8 +77,20 @@ def pytest_report_teststatus(report): elif report.when == 'call': logger.info("TEST \"%s\" %s in %3.2f seconds" % (name, report.outcome.upper(), report.duration)) + log_file_path = "%s/%s_all_docker.log" % \ + (OUTPUT_DIR, name) + gather_docker_logs(log_file_path) def pytest_sessionfinish(session): logger = logging.getLogger('main') - logger.info("ALL TESTS FINISHED") \ No newline at end of file + logger.info("ALL TESTS FINISHED") + logger.info("ALL logs present in following directory: %s", OUTPUT_DIR) + + +def gather_docker_logs(log_file_path): + docker_compose_file = os.environ["DOCKER_COMPOSE_FILE"] + output = subprocess.check_output(["docker-compose", "-f", + docker_compose_file, "logs"]) + with open(log_file_path, "w") as text_file: + text_file.write(output) diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py new file mode 100644 index 00000000000..b8ecf01c2d0 --- /dev/null +++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py @@ -0,0 +1,124 @@ +#!/usr/bin/python + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import time +import logging +from blockadeUtils.blockade import Blockade +from clusterUtils.cluster_utils import ClusterUtils + + +logger = logging.getLogger(__name__) +parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +FILE = os.path.join(parent_dir, "compose", "ozoneblockade", + "docker-compose.yaml") +os.environ["DOCKER_COMPOSE_FILE"] = FILE +SCALE = 3 +CONTAINER_LIST = [] +OM = [] +SCM = [] +DATANODES = [] +CLIENT = [] + + +def setup(): + global CONTAINER_LIST, OM, SCM, DATANODES, CLIENT, ORIG_CHECKSUM, \ + TEST_VOLUME_NAME, TEST_BUCKET_NAME + epoch_time = int(time.time()) + TEST_VOLUME_NAME = "%s%s" % ("volume", epoch_time) + TEST_BUCKET_NAME = "%s%s" % ("bucket", epoch_time) + Blockade.blockade_destroy() + CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE) + exit_code, output = Blockade.blockade_status() + assert exit_code == 0, "blockade status command failed with output=[%s]" % \ + output + OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST) + SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST) + DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST))) + CLIENT = filter(lambda x: 'ozone_client' in x, CONTAINER_LIST) + + exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", + "THREE", "ozone_client") + assert exit_code == 0, "freon run failed with output=[%s]" % output + ClusterUtils.create_volume(FILE, TEST_VOLUME_NAME) + ClusterUtils.create_bucket(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME) + ORIG_CHECKSUM = ClusterUtils.find_checksum(FILE, "/etc/passwd") + + +def teardown(): + logger.info("Inside teardown") + Blockade.blockade_destroy() + + +def teardown_module(): + ClusterUtils.cluster_destroy(FILE) + + +def test_client_failure_isolate_two_datanodes(): + """ + In this test, all datanodes are isolated from each other. + two of the datanodes cannot communicate with any other node in the cluster. + Expectation : + Write should fail. + Keys written before parition created can be read. + """ + test_key_name = "testkey1" + ClusterUtils.put_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME, + "/etc/passwd", key_name=test_key_name, + replication_factor='THREE') + first_set = [OM[0], SCM[0], DATANODES[0], CLIENT[0]] + second_set = [DATANODES[1]] + third_set = [DATANODES[2]] + Blockade.blockade_create_partition(first_set, second_set, third_set) + Blockade.blockade_status() + exit_code, output = \ + ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE") + assert re.search( + "Allocate block failed, error:INTERNAL_ERROR", + output) is not None + ClusterUtils.get_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME, + test_key_name, "/tmp/") + key_checksum = ClusterUtils.find_checksum(FILE, "/tmp/%s" % test_key_name) + + assert key_checksum == ORIG_CHECKSUM + + +def test_client_failure_isolate_one_datanode(): + """ + In this test, one of the datanodes is isolated from all other nodes. + Expectation : + Write should pass. + Keys written before partition created can be read. + """ + test_key_name = "testkey2" + ClusterUtils.put_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME, + "/etc/passwd", key_name=test_key_name, + replication_factor='THREE') + first_set = [OM[0], SCM[0], DATANODES[0], DATANODES[1], CLIENT[0]] + second_set = [DATANODES[2]] + Blockade.blockade_create_partition(first_set, second_set) + Blockade.blockade_status() + exit_code, output = \ + ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE") + assert re.search("3 way commit failed", output) is not None + assert re.search("Status: Success", output) is not None + ClusterUtils.get_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME, + test_key_name, "/tmp/") + key_checksum = ClusterUtils.find_checksum(FILE, "/tmp/%s" % test_key_name) + + assert key_checksum == ORIG_CHECKSUM diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py index eecc7ea393d..becc635494f 100644 --- a/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py +++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_datanode_isolation.py @@ -26,6 +26,7 @@ logger = logging.getLogger(__name__) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) FILE = os.path.join(parent_dir, "compose", "ozoneblockade", "docker-compose.yaml") +os.environ["DOCKER_COMPOSE_FILE"] = FILE SCALE = 3 CONTAINER_LIST = [] OM = [] diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py index 3da7164d7f9..312960027db 100644 --- a/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py +++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_flaky.py @@ -27,6 +27,7 @@ logger = logging.getLogger(__name__) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) FILE = os.path.join(parent_dir, "compose", "ozoneblockade", "docker-compose.yaml") +os.environ["DOCKER_COMPOSE_FILE"] = FILE SCALE = 6 CONTAINER_LIST = [] diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py index 69c865c490f..59755e01563 100644 --- a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py +++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure.py @@ -26,6 +26,7 @@ logger = logging.getLogger(__name__) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) FILE = os.path.join(parent_dir, "compose", "ozoneblockade", "docker-compose.yaml") +os.environ["DOCKER_COMPOSE_FILE"] = FILE SCALE = 3 CONTAINER_LIST = [] OM = [] diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py index 255a6863159..ee4d031087d 100644 --- a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py +++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py @@ -26,6 +26,7 @@ logger = logging.getLogger(__name__) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) FILE = os.path.join(parent_dir, "compose", "ozoneblockade", "docker-compose.yaml") +os.environ["DOCKER_COMPOSE_FILE"] = FILE SCALE = 3 CONTAINER_LIST = [] OM = [] diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py index 634299b4eb1..a8a6f9b72c0 100644 --- a/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py +++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py @@ -26,6 +26,7 @@ logger = logging.getLogger(__name__) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) FILE = os.path.join(parent_dir, "compose", "ozoneblockade", "docker-compose.yaml") +os.environ["DOCKER_COMPOSE_FILE"] = FILE SCALE = 3 CONTAINER_LIST = [] OM = [] diff --git a/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py b/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py index 0af97456510..d2dd29a5e01 100644 --- a/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py +++ b/hadoop-ozone/dist/src/main/blockade/test_blockade_scm_isolation.py @@ -26,6 +26,7 @@ logger = logging.getLogger(__name__) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) FILE = os.path.join(parent_dir, "compose", "ozoneblockade", "docker-compose.yaml") +os.environ["DOCKER_COMPOSE_FILE"] = FILE SCALE = 3 CONTAINER_LIST = [] OM = [] diff --git a/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml index 0a6a9d80280..75f4bf03901 100644 --- a/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml +++ b/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-compose.yaml @@ -47,3 +47,12 @@ services: environment: ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION command: ["/opt/hadoop/bin/ozone","scm"] + ozone_client: + image: apache/hadoop-runner + volumes: + - ../..:/opt/hadoop + ports: + - 9869 + command: ["tail", "-f","/etc/passwd"] + env_file: + - ./docker-config