HDDS-1786 : Datanodes takeSnapshot should delete previously created snapshots (#1163)
This commit is contained in:
parent
06ad6540b3
commit
8024818d33
|
@ -0,0 +1,48 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hdds.conf.Config;
|
||||||
|
import org.apache.hadoop.hdds.conf.ConfigGroup;
|
||||||
|
import org.apache.hadoop.hdds.conf.ConfigTag;
|
||||||
|
import org.apache.hadoop.hdds.conf.ConfigType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds configuration items for Ratis/Raft server.
|
||||||
|
*/
|
||||||
|
@ConfigGroup(prefix = "hdds.ratis.server")
|
||||||
|
public class RatisServerConfiguration {
|
||||||
|
|
||||||
|
private int numSnapshotsRetained;
|
||||||
|
|
||||||
|
@Config(key = "num.snapshots.retained",
|
||||||
|
type = ConfigType.INT,
|
||||||
|
defaultValue = "5",
|
||||||
|
tags = {ConfigTag.STORAGE},
|
||||||
|
description = "Config parameter to specify number of old snapshots " +
|
||||||
|
"retained at the Ratis leader.")
|
||||||
|
public void setNumSnapshotsRetained(int numSnapshotsRetained) {
|
||||||
|
this.numSnapshotsRetained = numSnapshotsRetained;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getNumSnapshotsRetained() {
|
||||||
|
return numSnapshotsRetained;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -19,8 +19,8 @@
|
||||||
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
|
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.conf.StorageUnit;
|
import org.apache.hadoop.conf.StorageUnit;
|
||||||
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
|
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
|
||||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto;
|
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto;
|
||||||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
|
||||||
|
@ -101,7 +101,7 @@ public final class XceiverServerRatis extends XceiverServer {
|
||||||
private final long cacheEntryExpiryInteval;
|
private final long cacheEntryExpiryInteval;
|
||||||
private boolean isStarted = false;
|
private boolean isStarted = false;
|
||||||
private DatanodeDetails datanodeDetails;
|
private DatanodeDetails datanodeDetails;
|
||||||
private final Configuration conf;
|
private final OzoneConfiguration conf;
|
||||||
// TODO: Remove the gids set when Ratis supports an api to query active
|
// TODO: Remove the gids set when Ratis supports an api to query active
|
||||||
// pipelines
|
// pipelines
|
||||||
private final Set<RaftGroupId> raftGids = new HashSet<>();
|
private final Set<RaftGroupId> raftGids = new HashSet<>();
|
||||||
|
@ -110,7 +110,7 @@ public final class XceiverServerRatis extends XceiverServer {
|
||||||
private XceiverServerRatis(DatanodeDetails dd, int port,
|
private XceiverServerRatis(DatanodeDetails dd, int port,
|
||||||
ContainerDispatcher dispatcher, ContainerController containerController,
|
ContainerDispatcher dispatcher, ContainerController containerController,
|
||||||
StateContext context, GrpcTlsConfig tlsConfig, CertificateClient caClient,
|
StateContext context, GrpcTlsConfig tlsConfig, CertificateClient caClient,
|
||||||
Configuration conf)
|
OzoneConfiguration conf)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(conf, caClient);
|
super(conf, caClient);
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
|
@ -255,6 +255,13 @@ public final class XceiverServerRatis extends XceiverServer {
|
||||||
OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_PURGE_GAP_DEFAULT);
|
OzoneConfigKeys.DFS_CONTAINER_RATIS_LOG_PURGE_GAP_DEFAULT);
|
||||||
RaftServerConfigKeys.Log.setPurgeGap(properties, purgeGap);
|
RaftServerConfigKeys.Log.setPurgeGap(properties, purgeGap);
|
||||||
|
|
||||||
|
//Set the number of Snapshots Retained.
|
||||||
|
RatisServerConfiguration ratisServerConfiguration =
|
||||||
|
conf.getObject(RatisServerConfiguration.class);
|
||||||
|
int numSnapshotsRetained =
|
||||||
|
ratisServerConfiguration.getNumSnapshotsRetained();
|
||||||
|
RaftServerConfigKeys.Snapshot.setSnapshotRetentionPolicy(properties,
|
||||||
|
numSnapshotsRetained);
|
||||||
return properties;
|
return properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -377,7 +384,7 @@ public final class XceiverServerRatis extends XceiverServer {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static XceiverServerRatis newXceiverServerRatis(
|
public static XceiverServerRatis newXceiverServerRatis(
|
||||||
DatanodeDetails datanodeDetails, Configuration ozoneConf,
|
DatanodeDetails datanodeDetails, OzoneConfiguration ozoneConf,
|
||||||
ContainerDispatcher dispatcher, ContainerController containerController,
|
ContainerDispatcher dispatcher, ContainerController containerController,
|
||||||
CertificateClient caClient, StateContext context) throws IOException {
|
CertificateClient caClient, StateContext context) throws IOException {
|
||||||
int localPort = ozoneConf.getInt(
|
int localPort = ozoneConf.getInt(
|
||||||
|
|
|
@ -23,15 +23,20 @@ import org.apache.hadoop.hdds.client.ReplicationType;
|
||||||
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
||||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
|
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
|
||||||
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
||||||
|
import org.apache.hadoop.ozone.OzoneConfigKeys;
|
||||||
import org.apache.hadoop.ozone.client.CertificateClientTestImpl;
|
import org.apache.hadoop.ozone.client.CertificateClientTestImpl;
|
||||||
import org.apache.hadoop.ozone.client.ObjectStore;
|
import org.apache.hadoop.ozone.client.ObjectStore;
|
||||||
import org.apache.hadoop.ozone.client.OzoneClient;
|
import org.apache.hadoop.ozone.client.OzoneClient;
|
||||||
import org.apache.hadoop.ozone.client.OzoneClientFactory;
|
import org.apache.hadoop.ozone.client.OzoneClientFactory;
|
||||||
import org.apache.hadoop.ozone.client.io.KeyOutputStream;
|
import org.apache.hadoop.ozone.client.io.KeyOutputStream;
|
||||||
import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
|
import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
|
||||||
|
import org.apache.hadoop.ozone.container.ContainerTestHelper;
|
||||||
|
import org.apache.hadoop.ozone.container.common.transport.server.ratis.ContainerStateMachine;
|
||||||
|
import org.apache.hadoop.ozone.container.common.transport.server.ratis.RatisServerConfiguration;
|
||||||
import org.apache.hadoop.ozone.om.OzoneManager;
|
import org.apache.hadoop.ozone.om.OzoneManager;
|
||||||
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
|
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
@ -39,6 +44,7 @@ import org.junit.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Path;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
@ -85,6 +91,7 @@ public class TestContainerStateMachine {
|
||||||
conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS);
|
conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS);
|
||||||
conf.setQuietMode(false);
|
conf.setQuietMode(false);
|
||||||
OzoneManager.setTestSecureOmFlag(true);
|
OzoneManager.setTestSecureOmFlag(true);
|
||||||
|
conf.setLong(OzoneConfigKeys.DFS_RATIS_SNAPSHOT_THRESHOLD_KEY, 1);
|
||||||
// conf.set(HADOOP_SECURITY_AUTHENTICATION, KERBEROS.toString());
|
// conf.set(HADOOP_SECURITY_AUTHENTICATION, KERBEROS.toString());
|
||||||
cluster =
|
cluster =
|
||||||
MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1)
|
MiniOzoneCluster.newBuilder(conf).setNumDatanodes(1)
|
||||||
|
@ -148,4 +155,57 @@ public class TestContainerStateMachine {
|
||||||
.getContainerState()
|
.getContainerState()
|
||||||
== ContainerProtos.ContainerDataProto.State.UNHEALTHY);
|
== ContainerProtos.ContainerDataProto.State.UNHEALTHY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRatisSnapshotRetention() throws Exception {
|
||||||
|
|
||||||
|
ContainerStateMachine stateMachine =
|
||||||
|
(ContainerStateMachine) ContainerTestHelper.getStateMachine(cluster);
|
||||||
|
SimpleStateMachineStorage storage =
|
||||||
|
(SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
|
||||||
|
Assert.assertNull(storage.findLatestSnapshot());
|
||||||
|
|
||||||
|
// Write 10 keys. Num snapshots should be equal to config value.
|
||||||
|
for (int i = 1; i <= 10; i++) {
|
||||||
|
OzoneOutputStream key =
|
||||||
|
objectStore.getVolume(volumeName).getBucket(bucketName)
|
||||||
|
.createKey(("ratis" + i), 1024, ReplicationType.RATIS,
|
||||||
|
ReplicationFactor.ONE, new HashMap<>());
|
||||||
|
// First write and flush creates a container in the datanode
|
||||||
|
key.write(("ratis" + i).getBytes());
|
||||||
|
key.flush();
|
||||||
|
key.write(("ratis" + i).getBytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
RatisServerConfiguration ratisServerConfiguration =
|
||||||
|
conf.getObject(RatisServerConfiguration.class);
|
||||||
|
|
||||||
|
stateMachine =
|
||||||
|
(ContainerStateMachine) ContainerTestHelper.getStateMachine(cluster);
|
||||||
|
storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
|
||||||
|
Path parentPath = storage.findLatestSnapshot().getFile().getPath();
|
||||||
|
int numSnapshots = parentPath.getParent().toFile().listFiles().length;
|
||||||
|
Assert.assertTrue(Math.abs(ratisServerConfiguration
|
||||||
|
.getNumSnapshotsRetained() - numSnapshots) <= 1);
|
||||||
|
|
||||||
|
// Write 10 more keys. Num Snapshots should remain the same.
|
||||||
|
for (int i = 11; i <= 20; i++) {
|
||||||
|
OzoneOutputStream key =
|
||||||
|
objectStore.getVolume(volumeName).getBucket(bucketName)
|
||||||
|
.createKey(("ratis" + i), 1024, ReplicationType.RATIS,
|
||||||
|
ReplicationFactor.ONE, new HashMap<>());
|
||||||
|
// First write and flush creates a container in the datanode
|
||||||
|
key.write(("ratis" + i).getBytes());
|
||||||
|
key.flush();
|
||||||
|
key.write(("ratis" + i).getBytes());
|
||||||
|
}
|
||||||
|
stateMachine =
|
||||||
|
(ContainerStateMachine) ContainerTestHelper.getStateMachine(cluster);
|
||||||
|
storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
|
||||||
|
parentPath = storage.findLatestSnapshot().getFile().getPath();
|
||||||
|
numSnapshots = parentPath.getParent().toFile().listFiles().length;
|
||||||
|
Assert.assertTrue(Math.abs(ratisServerConfiguration
|
||||||
|
.getNumSnapshotsRetained() - numSnapshots) <= 1);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
Loading…
Reference in New Issue