HDDS-1027. Add blockade Tests for datanode isolation and scm failures. Contributed by Nilotpal Nandi.
This commit is contained in:
parent
49ddd8a6ed
commit
911790cc26
143
hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py
vendored
Normal file
143
hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py
vendored
Normal file
|
@ -0,0 +1,143 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
from blockadeUtils.blockade import Blockade
|
||||
from clusterUtils.cluster_utils import ClusterUtils
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
SCALE = 3
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
SCM = []
|
||||
DATANODES = []
|
||||
|
||||
|
||||
def setup():
|
||||
global CONTAINER_LIST, OM, SCM, DATANODES
|
||||
Blockade.blockade_destroy()
|
||||
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
|
||||
SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
|
||||
DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
|
||||
|
||||
exit_code, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||
|
||||
|
||||
def teardown():
|
||||
logger.info("Inside teardown")
|
||||
Blockade.blockade_destroy()
|
||||
|
||||
|
||||
def teardown_module():
|
||||
ClusterUtils.cluster_destroy(FILE)
|
||||
|
||||
|
||||
def test_three_dns_isolate_onescmfailure():
|
||||
"""
|
||||
In this test, all datanodes are isolated from each other.
|
||||
One of the datanodes (third datanode) cannot communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode should be closed.
|
||||
The container replica state in second datanode should be closed.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
second_set = [OM[0], SCM[0], DATANODES[1]]
|
||||
third_set = [OM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'CLOSED'
|
||||
assert second_datanode_status == 'CLOSED'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
|
||||
|
||||
def test_three_dns_isolate_twoscmfailure():
|
||||
"""
|
||||
In this test, all datanodes are isolated from each other.
|
||||
two datanodes cannot communicate with SCM (second datanode and third
|
||||
datanode)
|
||||
Expectation :
|
||||
The container replica state in first datanode should be quasi-closed.
|
||||
The container replica state in second datanode should be open.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1]]
|
||||
third_set = [OM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'QUASI_CLOSED'
|
||||
assert second_datanode_status == 'OPEN'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
|
||||
|
||||
def test_three_dns_isolate_threescmfailure():
|
||||
"""
|
||||
In this test, all datanodes are isolated from each other and also cannot
|
||||
communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode should be open.
|
||||
The container replica state in second datanode should be open.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1]]
|
||||
third_set = [OM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'OPEN'
|
||||
assert second_datanode_status == 'OPEN'
|
||||
assert third_datanode_status == 'OPEN'
|
120
hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py
vendored
Normal file
120
hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py
vendored
Normal file
|
@ -0,0 +1,120 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
from blockadeUtils.blockade import Blockade
|
||||
from clusterUtils.cluster_utils import ClusterUtils
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
SCALE = 3
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
SCM = []
|
||||
DATANODES = []
|
||||
|
||||
|
||||
def setup():
|
||||
global CONTAINER_LIST, OM, SCM, DATANODES
|
||||
Blockade.blockade_destroy()
|
||||
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
|
||||
SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
|
||||
DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
|
||||
|
||||
exit_code, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||
|
||||
|
||||
def teardown():
|
||||
logger.info("Inside teardown")
|
||||
Blockade.blockade_destroy()
|
||||
|
||||
|
||||
def teardown_module():
|
||||
ClusterUtils.cluster_destroy(FILE)
|
||||
|
||||
|
||||
def test_two_dns_isolate_scm_same_partition():
|
||||
"""
|
||||
In this test, one of the datanodes (first datanode) cannot communicate
|
||||
with other two datanodes.
|
||||
Two datanodes (second datanode and third datanode), on same network
|
||||
parition, cannot communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode should be quasi-closed.
|
||||
The container replica state in second datanode should be open.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], DATANODES[1], DATANODES[2]]
|
||||
second_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
Blockade.blockade_create_partition(first_set, second_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'QUASI_CLOSED'
|
||||
assert second_datanode_status == 'OPEN'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
|
||||
|
||||
def test_two_dns_isolate_scm_different_partition():
|
||||
"""
|
||||
In this test, one of the datanodes (first datanode) cannot communicate with
|
||||
other two datanodes.
|
||||
Two datanodes (first datanode and second datanode),
|
||||
on different network paritions, cannot communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode should be open.
|
||||
The container replica states can be either 'closed'
|
||||
in both second and third datanode, or,
|
||||
'open' in second datanode and 'quasi-closed' in third datanode.
|
||||
"""
|
||||
first_set = [OM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1], DATANODES[2]]
|
||||
third_set = [SCM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'OPEN'
|
||||
assert (second_datanode_status == 'CLOSED' and
|
||||
third_datanode_status == 'CLOSED') or \
|
||||
(second_datanode_status == 'OPEN' and
|
||||
third_datanode_status == 'QUASI_CLOSED')
|
Loading…
Reference in New Issue