YARN-11463. Node Labels root directory creation doesn't have a retry logic (#5562)
Co-authored-by: Ashutosh Gupta <ashugpt@amazon.com>
This commit is contained in:
parent
9e24ed2196
commit
964c1902c8
|
@ -217,6 +217,16 @@ public class YarnConfiguration extends Configuration {
|
||||||
|
|
||||||
public static final int DEFAULT_RM_APPLICATION_MAX_TAG_LENGTH = 100;
|
public static final int DEFAULT_RM_APPLICATION_MAX_TAG_LENGTH = 100;
|
||||||
|
|
||||||
|
public static final String NODE_STORE_ROOT_DIR_NUM_RETRIES =
|
||||||
|
RM_PREFIX + "nodestore-rootdir.num-retries";
|
||||||
|
|
||||||
|
public static final int NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES = 1000;
|
||||||
|
|
||||||
|
public static final String NODE_STORE_ROOT_DIR_RETRY_INTERVAL =
|
||||||
|
RM_PREFIX + "nodestore-rootdir.retry-interval-ms";
|
||||||
|
|
||||||
|
public static final int NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL = 1000;
|
||||||
|
|
||||||
public static final String RM_APPLICATION_MASTER_SERVICE_PROCESSORS =
|
public static final String RM_APPLICATION_MASTER_SERVICE_PROCESSORS =
|
||||||
RM_PREFIX + "application-master-service.processors";
|
RM_PREFIX + "application-master-service.processors";
|
||||||
|
|
||||||
|
|
|
@ -65,8 +65,32 @@ public abstract class AbstractFSNodeStore<M> {
|
||||||
this.fsWorkingPath = fsStorePath;
|
this.fsWorkingPath = fsStorePath;
|
||||||
this.manager = mgr;
|
this.manager = mgr;
|
||||||
initFileSystem(conf);
|
initFileSystem(conf);
|
||||||
// mkdir of root dir path
|
// mkdir of root dir path with retry logic
|
||||||
fs.mkdirs(fsWorkingPath);
|
int maxRetries = conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_RETRIES,
|
||||||
|
YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES);
|
||||||
|
int retryCount = 0;
|
||||||
|
boolean success = fs.mkdirs(fsWorkingPath);
|
||||||
|
|
||||||
|
while (!success && retryCount < maxRetries) {
|
||||||
|
try {
|
||||||
|
if (!fs.exists(fsWorkingPath)) {
|
||||||
|
success = fs.mkdirs(fsWorkingPath);
|
||||||
|
} else {
|
||||||
|
success = true;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
retryCount++;
|
||||||
|
if (retryCount >= maxRetries) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
Thread.sleep(conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_INTERVAL,
|
||||||
|
YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL));
|
||||||
|
} catch (InterruptedException ie) {
|
||||||
|
throw new RuntimeException(ie);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
this.replication = conf.getInt(YarnConfiguration.FS_STORE_FILE_REPLICATION,
|
this.replication = conf.getInt(YarnConfiguration.FS_STORE_FILE_REPLICATION,
|
||||||
YarnConfiguration.DEFAULT_FS_STORE_FILE_REPLICATION);
|
YarnConfiguration.DEFAULT_FS_STORE_FILE_REPLICATION);
|
||||||
LOG.info("Created store directory :" + fsWorkingPath);
|
LOG.info("Created store directory :" + fsWorkingPath);
|
||||||
|
|
|
@ -5177,4 +5177,20 @@
|
||||||
<value>1</value>
|
<value>1</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>
|
||||||
|
Number of Retries while trying to make root directory for node store.
|
||||||
|
</description>
|
||||||
|
<name>yarn.resourcemanager.nodestore-rootdir.num-retries</name>
|
||||||
|
<value>1000</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>
|
||||||
|
Interval in ms between retries while trying to make root directory for node store.
|
||||||
|
</description>
|
||||||
|
<name>yarn.resourcemanager.nodestore-rootdir.retry-interval-ms</name>
|
||||||
|
<value>1000</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
</configuration>
|
</configuration>
|
||||||
|
|
|
@ -359,9 +359,6 @@ public class TestFileSystemNodeLabelsStore extends NodeLabelTestBase {
|
||||||
|
|
||||||
mockStore.setFs(mockFs);
|
mockStore.setFs(mockFs);
|
||||||
verifyMkdirsCount(mockStore, true, 1);
|
verifyMkdirsCount(mockStore, true, 1);
|
||||||
verifyMkdirsCount(mockStore, false, 2);
|
|
||||||
verifyMkdirsCount(mockStore, true, 3);
|
|
||||||
verifyMkdirsCount(mockStore, false, 4);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyMkdirsCount(FileSystemNodeLabelsStore store,
|
private void verifyMkdirsCount(FileSystemNodeLabelsStore store,
|
||||||
|
|
Loading…
Reference in New Issue