HDDS-1040. Add blockade Tests for client failures. Contributed by Nilotpal Nandi.

This commit is contained in:
Mukul Kumar Singh 2019-02-11 20:08:25 +05:30
parent e7d1ae52d2
commit 73b67b2df5
10 changed files with 272 additions and 9 deletions

View File

@ -22,6 +22,7 @@ import logging
import time import time
import re import re
import yaml import yaml
import os
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -64,17 +65,18 @@ class ClusterUtils(object):
@classmethod @classmethod
def run_freon(cls, docker_compose_file, num_volumes, num_buckets, def run_freon(cls, docker_compose_file, num_volumes, num_buckets,
num_keys, key_size, replication_type, replication_factor): num_keys, key_size, replication_type, replication_factor,
freon_client='ozoneManager'):
# run freon # run freon
cmd = "docker-compose -f %s " \ cmd = "docker-compose -f %s " \
"exec ozoneManager /opt/hadoop/bin/ozone " \ "exec %s /opt/hadoop/bin/ozone " \
"freon rk " \ "freon rk " \
"--numOfVolumes %s " \ "--numOfVolumes %s " \
"--numOfBuckets %s " \ "--numOfBuckets %s " \
"--numOfKeys %s " \ "--numOfKeys %s " \
"--keySize %s " \ "--keySize %s " \
"--replicationType %s " \ "--replicationType %s " \
"--factor %s" % (docker_compose_file, num_volumes, "--factor %s" % (docker_compose_file, freon_client, num_volumes,
num_buckets, num_keys, key_size, num_buckets, num_keys, key_size,
replication_type, replication_factor) replication_type, replication_factor)
exit_code, output = cls.run_cmd(cmd) exit_code, output = cls.run_cmd(cmd)
@ -189,3 +191,110 @@ class ClusterUtils(object):
' '.join(all_datanode_container_status)) ' '.join(all_datanode_container_status))
return all_datanode_container_status return all_datanode_container_status
@classmethod
def create_volume(cls, docker_compose_file, volume_name):
command = "docker-compose -f %s " \
"exec ozone_client /opt/hadoop/bin/ozone " \
"sh volume create /%s --user root" % \
(docker_compose_file, volume_name)
logger.info("Creating Volume %s", volume_name)
exit_code, output = cls.run_cmd(command)
assert exit_code == 0, "Ozone volume create failed with output=[%s]" \
% output
@classmethod
def delete_volume(cls, docker_compose_file, volume_name):
command = "docker-compose -f %s " \
"exec ozone_client /opt/hadoop/bin/ozone " \
"sh volume delete /%s" % (docker_compose_file, volume_name)
logger.info("Deleting Volume %s", volume_name)
exit_code, output = cls.run_cmd(command)
return exit_code, output
@classmethod
def create_bucket(cls, docker_compose_file, bucket_name, volume_name):
command = "docker-compose -f %s " \
"exec ozone_client /opt/hadoop/bin/ozone " \
"sh bucket create /%s/%s" % (docker_compose_file,
volume_name, bucket_name)
logger.info("Creating Bucket %s in volume %s",
bucket_name, volume_name)
exit_code, output = cls.run_cmd(command)
assert exit_code == 0, "Ozone bucket create failed with output=[%s]" \
% output
@classmethod
def delete_bucket(cls, docker_compose_file, bucket_name, volume_name):
command = "docker-compose -f %s " \
"exec ozone_client /opt/hadoop/bin/ozone " \
"sh bucket delete /%s/%s" % (docker_compose_file,
volume_name, bucket_name)
logger.info("Running delete bucket of %s/%s", volume_name, bucket_name)
exit_code, output = cls.run_cmd(command)
return exit_code, output
@classmethod
def put_key(cls, docker_compose_file, bucket_name, volume_name,
filepath, key_name=None, replication_factor=None):
command = "docker-compose -f %s " \
"exec ozone_client ls %s" % (docker_compose_file, filepath)
exit_code, output = cls.run_cmd(command)
assert exit_code == 0, "%s does not exist" % filepath
if key_name is None:
key_name = os.path.basename(filepath)
command = "docker-compose -f %s " \
"exec ozone_client /opt/hadoop/bin/ozone " \
"sh key put /%s/%s/%s %s" % (docker_compose_file,
volume_name, bucket_name,
key_name, filepath)
if replication_factor:
command = "%s --replication=%s" % (command, replication_factor)
logger.info("Creating key %s in %s/%s", key_name,
volume_name, bucket_name)
exit_code, output = cls.run_cmd(command)
assert exit_code == 0, "Ozone put Key failed with output=[%s]" % output
@classmethod
def delete_key(cls, docker_compose_file, bucket_name, volume_name,
key_name):
command = "docker-compose -f %s " \
"exec ozone_client /opt/hadoop/bin/ozone " \
"sh key delete /%s/%s/%s" \
% (docker_compose_file, volume_name, bucket_name, key_name)
logger.info("Running delete key %s in %s/%s",
key_name, volume_name, bucket_name)
exit_code, output = cls.run_cmd(command)
return exit_code, output
@classmethod
def get_key(cls, docker_compose_file, bucket_name, volume_name,
key_name, filepath=None):
if filepath is None:
filepath = '.'
command = "docker-compose -f %s " \
"exec ozone_client /opt/hadoop/bin/ozone " \
"sh key get /%s/%s/%s %s" % (docker_compose_file,
volume_name, bucket_name,
key_name, filepath)
logger.info("Running get key %s in %s/%s", key_name,
volume_name, bucket_name)
exit_code, output = cls.run_cmd(command)
assert exit_code == 0, "Ozone get Key failed with output=[%s]" % output
@classmethod
def find_checksum(cls, docker_compose_file, filepath):
command = "docker-compose -f %s " \
"exec ozone_client md5sum %s" % (docker_compose_file, filepath)
exit_code, output = cls.run_cmd(command)
assert exit_code == 0, "Cant find checksum"
myoutput = output.split("\n")
finaloutput = ""
for line in myoutput:
if line.find("Warning") >= 0 or line.find("is not a tty") >= 0:
logger.info("skip this line: %s", line)
else:
finaloutput = finaloutput + line
checksum = finaloutput.split(" ")
logger.info("Checksum of %s is : %s", filepath, checksum[0])
return checksum[0]

View File

@ -15,8 +15,10 @@
import logging import logging
import os import os
import time
import subprocess
EPOCH_TIME = int(time.time())
def pytest_addoption(parser): def pytest_addoption(parser):
parser.addoption("--output-dir", parser.addoption("--output-dir",
action="store", action="store",
@ -40,13 +42,14 @@ def pytest_addoption(parser):
def pytest_configure(config): def pytest_configure(config):
global OUTPUT_DIR
os.environ["CONTAINER_STATUS_SLEEP"] = config.option.containerStatusSleep os.environ["CONTAINER_STATUS_SLEEP"] = config.option.containerStatusSleep
outputdir = config.option.output_dir OUTPUT_DIR = "%s/%s" % (config.option.output_dir, EPOCH_TIME)
try: try:
os.makedirs(outputdir) os.makedirs(OUTPUT_DIR)
except OSError, e: except OSError, e:
raise Exception(e.strerror + ": " + e.filename) raise Exception(e.strerror + ": " + e.filename)
log_file = os.path.join(outputdir, "output.log") log_file = os.path.join(OUTPUT_DIR, "output.log")
if config.option.log_level == "trace": if config.option.log_level == "trace":
loglevel = eval("logging.DEBUG") loglevel = eval("logging.DEBUG")
@ -74,8 +77,20 @@ def pytest_report_teststatus(report):
elif report.when == 'call': elif report.when == 'call':
logger.info("TEST \"%s\" %s in %3.2f seconds" % logger.info("TEST \"%s\" %s in %3.2f seconds" %
(name, report.outcome.upper(), report.duration)) (name, report.outcome.upper(), report.duration))
log_file_path = "%s/%s_all_docker.log" % \
(OUTPUT_DIR, name)
gather_docker_logs(log_file_path)
def pytest_sessionfinish(session): def pytest_sessionfinish(session):
logger = logging.getLogger('main') logger = logging.getLogger('main')
logger.info("ALL TESTS FINISHED") logger.info("ALL TESTS FINISHED")
logger.info("ALL logs present in following directory: %s", OUTPUT_DIR)
def gather_docker_logs(log_file_path):
docker_compose_file = os.environ["DOCKER_COMPOSE_FILE"]
output = subprocess.check_output(["docker-compose", "-f",
docker_compose_file, "logs"])
with open(log_file_path, "w") as text_file:
text_file.write(output)

View File

@ -0,0 +1,124 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
import time
import logging
from blockadeUtils.blockade import Blockade
from clusterUtils.cluster_utils import ClusterUtils
logger = logging.getLogger(__name__)
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
"docker-compose.yaml")
os.environ["DOCKER_COMPOSE_FILE"] = FILE
SCALE = 3
CONTAINER_LIST = []
OM = []
SCM = []
DATANODES = []
CLIENT = []
def setup():
global CONTAINER_LIST, OM, SCM, DATANODES, CLIENT, ORIG_CHECKSUM, \
TEST_VOLUME_NAME, TEST_BUCKET_NAME
epoch_time = int(time.time())
TEST_VOLUME_NAME = "%s%s" % ("volume", epoch_time)
TEST_BUCKET_NAME = "%s%s" % ("bucket", epoch_time)
Blockade.blockade_destroy()
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
exit_code, output = Blockade.blockade_status()
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
output
OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
CLIENT = filter(lambda x: 'ozone_client' in x, CONTAINER_LIST)
exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
"THREE", "ozone_client")
assert exit_code == 0, "freon run failed with output=[%s]" % output
ClusterUtils.create_volume(FILE, TEST_VOLUME_NAME)
ClusterUtils.create_bucket(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME)
ORIG_CHECKSUM = ClusterUtils.find_checksum(FILE, "/etc/passwd")
def teardown():
logger.info("Inside teardown")
Blockade.blockade_destroy()
def teardown_module():
ClusterUtils.cluster_destroy(FILE)
def test_client_failure_isolate_two_datanodes():
"""
In this test, all datanodes are isolated from each other.
two of the datanodes cannot communicate with any other node in the cluster.
Expectation :
Write should fail.
Keys written before parition created can be read.
"""
test_key_name = "testkey1"
ClusterUtils.put_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
"/etc/passwd", key_name=test_key_name,
replication_factor='THREE')
first_set = [OM[0], SCM[0], DATANODES[0], CLIENT[0]]
second_set = [DATANODES[1]]
third_set = [DATANODES[2]]
Blockade.blockade_create_partition(first_set, second_set, third_set)
Blockade.blockade_status()
exit_code, output = \
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
assert re.search(
"Allocate block failed, error:INTERNAL_ERROR",
output) is not None
ClusterUtils.get_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
test_key_name, "/tmp/")
key_checksum = ClusterUtils.find_checksum(FILE, "/tmp/%s" % test_key_name)
assert key_checksum == ORIG_CHECKSUM
def test_client_failure_isolate_one_datanode():
"""
In this test, one of the datanodes is isolated from all other nodes.
Expectation :
Write should pass.
Keys written before partition created can be read.
"""
test_key_name = "testkey2"
ClusterUtils.put_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
"/etc/passwd", key_name=test_key_name,
replication_factor='THREE')
first_set = [OM[0], SCM[0], DATANODES[0], DATANODES[1], CLIENT[0]]
second_set = [DATANODES[2]]
Blockade.blockade_create_partition(first_set, second_set)
Blockade.blockade_status()
exit_code, output = \
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
assert re.search("3 way commit failed", output) is not None
assert re.search("Status: Success", output) is not None
ClusterUtils.get_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
test_key_name, "/tmp/")
key_checksum = ClusterUtils.find_checksum(FILE, "/tmp/%s" % test_key_name)
assert key_checksum == ORIG_CHECKSUM

View File

@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
FILE = os.path.join(parent_dir, "compose", "ozoneblockade", FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
"docker-compose.yaml") "docker-compose.yaml")
os.environ["DOCKER_COMPOSE_FILE"] = FILE
SCALE = 3 SCALE = 3
CONTAINER_LIST = [] CONTAINER_LIST = []
OM = [] OM = []

View File

@ -27,6 +27,7 @@ logger = logging.getLogger(__name__)
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
FILE = os.path.join(parent_dir, "compose", "ozoneblockade", FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
"docker-compose.yaml") "docker-compose.yaml")
os.environ["DOCKER_COMPOSE_FILE"] = FILE
SCALE = 6 SCALE = 6
CONTAINER_LIST = [] CONTAINER_LIST = []

View File

@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
FILE = os.path.join(parent_dir, "compose", "ozoneblockade", FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
"docker-compose.yaml") "docker-compose.yaml")
os.environ["DOCKER_COMPOSE_FILE"] = FILE
SCALE = 3 SCALE = 3
CONTAINER_LIST = [] CONTAINER_LIST = []
OM = [] OM = []

View File

@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
FILE = os.path.join(parent_dir, "compose", "ozoneblockade", FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
"docker-compose.yaml") "docker-compose.yaml")
os.environ["DOCKER_COMPOSE_FILE"] = FILE
SCALE = 3 SCALE = 3
CONTAINER_LIST = [] CONTAINER_LIST = []
OM = [] OM = []

View File

@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
FILE = os.path.join(parent_dir, "compose", "ozoneblockade", FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
"docker-compose.yaml") "docker-compose.yaml")
os.environ["DOCKER_COMPOSE_FILE"] = FILE
SCALE = 3 SCALE = 3
CONTAINER_LIST = [] CONTAINER_LIST = []
OM = [] OM = []

View File

@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
FILE = os.path.join(parent_dir, "compose", "ozoneblockade", FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
"docker-compose.yaml") "docker-compose.yaml")
os.environ["DOCKER_COMPOSE_FILE"] = FILE
SCALE = 3 SCALE = 3
CONTAINER_LIST = [] CONTAINER_LIST = []
OM = [] OM = []

View File

@ -47,3 +47,12 @@ services:
environment: environment:
ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION
command: ["/opt/hadoop/bin/ozone","scm"] command: ["/opt/hadoop/bin/ozone","scm"]
ozone_client:
image: apache/hadoop-runner
volumes:
- ../..:/opt/hadoop
ports:
- 9869
command: ["tail", "-f","/etc/passwd"]
env_file:
- ./docker-config