YARN-10442. RM should make sure node label file highly available. (#2390)

* YARN-10442. RM should make sure node label file highly available.

* YARN-10442. Corrected method name.
            Added license header for file TestNodeLabelFileReplication.java
            Added Test code.

* YARN-10442. Changed property name and log.

* YARN-10442. Changed default replication for FS store file.
This commit is contained in:
Surendra Singh Lilhore 2020-10-30 23:05:16 +05:30 committed by GitHub
parent c47c9fd65d
commit 7169ec4509
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 99 additions and 1 deletions

View File

@ -4076,6 +4076,11 @@ public class YarnConfiguration extends Configuration {
public static final String FS_NODE_LABELS_STORE_ROOT_DIR = NODE_LABELS_PREFIX
+ "fs-store.root-dir";
/** FS store file replication. */
public static final String FS_STORE_FILE_REPLICATION = YARN_PREFIX
+ "fs-store.file.replication";
public static final int DEFAULT_FS_STORE_FILE_REPLICATION = 0;
/**
* Node-attribute configurations.
*/

View File

@ -27,6 +27,7 @@ import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.yarn.nodelabels.store.op.FSNodeStoreLogOp;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.nodelabels.store.FSStoreOpHandler.StoreType;
import java.io.EOFException;
@ -47,6 +48,7 @@ public abstract class AbstractFSNodeStore<M> {
private FSDataOutputStream editlogOs;
private Path editLogPath;
private int replication;
private StoreSchema schema;
protected M manager;
@ -65,6 +67,8 @@ public abstract class AbstractFSNodeStore<M> {
initFileSystem(conf);
// mkdir of root dir path
fs.mkdirs(fsWorkingPath);
this.replication = conf.getInt(YarnConfiguration.FS_STORE_FILE_REPLICATION,
YarnConfiguration.DEFAULT_FS_STORE_FILE_REPLICATION);
LOG.info("Created store directory :" + fsWorkingPath);
}
@ -162,6 +166,7 @@ public abstract class AbstractFSNodeStore<M> {
StoreOp op = FSStoreOpHandler.getMirrorOp(storeType);
op.write(os, manager);
}
checkAvailability(writingMirrorPath);
// Move mirror to mirror.old
if (fs.exists(mirrorPath)) {
fs.delete(oldMirrorPath, false);
@ -178,11 +183,27 @@ public abstract class AbstractFSNodeStore<M> {
// create a new editlog file
editlogOs = fs.create(editLogPath, true);
editlogOs.close();
checkAvailability(editLogPath);
LOG.info("Finished write mirror at:" + mirrorPath.toString());
LOG.info("Finished create editlog file at:" + editLogPath.toString());
}
/**
* Make sure replica is highly available. It will avoid setting replication,
* if the value configured for
* {@link YarnConfiguration#FS_STORE_FILE_REPLICATION} is 0.
*/
private void checkAvailability(Path file) throws IOException {
try {
if (replication != 0
&& fs.getFileStatus(file).getReplication() < replication) {
fs.setReplication(file, (short) replication);
}
} catch (UnsupportedOperationException e) {
LOG.error("Failed set replication for a file : {}", file);
}
}
protected void loadManagerFromEditLog(Path editPath) throws IOException {
if (!fs.exists(editPath)) {
return;

View File

@ -3804,6 +3804,14 @@
<value>org.apache.hadoop.yarn.nodelabels.FileSystemNodeLabelsStore</value>
</property>
<property>
<description>The replication factor for the FS store
files. Default value is 0, means it will use file system
default replication.</description>
<name>yarn.fs-store.file.replication</name>
<value>0</value>
</property>
<property>
<description>
Enable the CSRF filter for the RM web app

View File

@ -0,0 +1,64 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.nodelabels;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.junit.Assert;
import org.junit.Test;
public class TestNodeLabelFileReplication {
@Test
public void testNodeLabelFileReplication() throws IOException {
int expectedReplication = 10;
Configuration conf = new Configuration();
conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
conf.setInt(YarnConfiguration.FS_STORE_FILE_REPLICATION,
expectedReplication);
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster.Builder(new Configuration()).numDataNodes(1)
.build();
FileSystem fs = cluster.getFileSystem();
String nodeLabelDir = fs.getUri().toString() + "/nodelabel";
conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR, nodeLabelDir);
CommonNodeLabelsManager manager = new CommonNodeLabelsManager();
manager.init(conf);
manager.start();
int fileReplication = fs
.getFileStatus(new Path(nodeLabelDir, "nodelabel.mirror"))
.getReplication();
Assert.assertEquals(
"Node label file replication should be " + expectedReplication,
expectedReplication, fileReplication);
manager.close();
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
}