HADOOP-15450. Avoid fsync storm triggered by DiskChecker and handle disk full situation. Contributed by Arpit Agarwal.

(cherry picked from commit bcc8e76bad)
This commit is contained in:
Kihwal Lee 2018-05-22 11:22:22 -05:00
parent 9788c8c01e
commit 6b8a5af6ef
3 changed files with 217 additions and 104 deletions

View File

@ -74,12 +74,30 @@ public class DiskChecker {
* @throws DiskErrorException * @throws DiskErrorException
*/ */
public static void checkDir(File dir) throws DiskErrorException { public static void checkDir(File dir) throws DiskErrorException {
checkDirInternal(dir);
}
/**
* Create the directory if it doesn't exist and check that dir is
* readable, writable and executable. Perform some disk IO to
* ensure that the disk is usable for writes.
*
* @param dir
* @throws DiskErrorException
*/
public static void checkDirWithDiskIo(File dir)
throws DiskErrorException {
checkDirInternal(dir);
doDiskIo(dir);
}
private static void checkDirInternal(File dir)
throws DiskErrorException {
if (!mkdirsWithExistsCheck(dir)) { if (!mkdirsWithExistsCheck(dir)) {
throw new DiskErrorException("Cannot create directory: " throw new DiskErrorException("Cannot create directory: "
+ dir.toString()); + dir.toString());
} }
checkAccessByFileMethods(dir); checkAccessByFileMethods(dir);
doDiskIo(dir);
} }
/** /**
@ -94,10 +112,34 @@ public class DiskChecker {
*/ */
public static void checkDir(LocalFileSystem localFS, Path dir, public static void checkDir(LocalFileSystem localFS, Path dir,
FsPermission expected) FsPermission expected)
throws DiskErrorException, IOException {
checkDirInternal(localFS, dir, expected);
}
/**
* Create the local directory if necessary, also ensure permissions
* allow it to be read from and written into. Perform some diskIO
* to ensure that the disk is usable for writes.
*
* @param localFS local filesystem
* @param dir directory
* @param expected permission
* @throws DiskErrorException
* @throws IOException
*/
public static void checkDirWithDiskIo(LocalFileSystem localFS, Path dir,
FsPermission expected)
throws DiskErrorException, IOException {
checkDirInternal(localFS, dir, expected);
doDiskIo(localFS.pathToFile(dir));
}
private static void checkDirInternal(LocalFileSystem localFS, Path dir,
FsPermission expected)
throws DiskErrorException, IOException { throws DiskErrorException, IOException {
mkdirsWithExistsAndPermissionCheck(localFS, dir, expected); mkdirsWithExistsAndPermissionCheck(localFS, dir, expected);
checkAccessByFileMethods(localFS.pathToFile(dir)); checkAccessByFileMethods(localFS.pathToFile(dir));
doDiskIo(localFS.pathToFile(dir));
} }
/** /**

View File

@ -19,7 +19,6 @@ package org.apache.hadoop.util;
import java.io.*; import java.io.*;
import java.nio.file.Files; import java.nio.file.Files;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.util.DiskChecker.FileIoProvider; import org.apache.hadoop.util.DiskChecker.FileIoProvider;
import org.junit.After; import org.junit.After;
@ -214,105 +213,4 @@ public class TestDiskChecker {
} }
localDir.delete(); localDir.delete();
} }
/**
* Verify DiskChecker ignores at least 2 transient file creation errors.
*/
@Test(timeout = 30000)
public void testDiskIoIgnoresTransientCreateErrors() throws Throwable {
DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider(
DiskChecker.DISK_IO_MAX_ITERATIONS - 1, 0));
checkDirs(true, "755", true);
}
/**
* Verify DiskChecker bails after 3 file creation errors.
*/
@Test(timeout = 30000)
public void testDiskIoDetectsCreateErrors() throws Throwable {
DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider(
DiskChecker.DISK_IO_MAX_ITERATIONS, 0));
checkDirs(true, "755", false);
}
/**
* Verify DiskChecker ignores at least 2 transient file write errors.
*/
@Test(timeout = 30000)
public void testDiskIoIgnoresTransientWriteErrors() throws Throwable {
DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider(
0, DiskChecker.DISK_IO_MAX_ITERATIONS - 1));
checkDirs(true, "755", true);
}
/**
* Verify DiskChecker bails after 3 file write errors.
*/
@Test(timeout = 30000)
public void testDiskIoDetectsWriteErrors() throws Throwable {
DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider(
0, DiskChecker.DISK_IO_MAX_ITERATIONS));
checkDirs(true, "755", false);
}
/**
* Verify DiskChecker's test file naming scheme.
*/
@Test(timeout = 30000)
public void testDiskIoFileNaming() throws Throwable {
final File rootDir = new File("/");
assertTrue(".001".matches("\\.00\\d$"));
for (int i = 1; i < DiskChecker.DISK_IO_MAX_ITERATIONS; ++i) {
final File file = DiskChecker.getFileNameForDiskIoCheck(rootDir, i);
assertTrue(
"File name does not match expected pattern: " + file,
file.toString().matches("^.*\\.[0-9]+$"));
}
final File guidFile = DiskChecker.getFileNameForDiskIoCheck(
rootDir, DiskChecker.DISK_IO_MAX_ITERATIONS);
assertTrue(
"File name does not match expected pattern: " + guidFile,
guidFile.toString().matches("^.*\\.[A-Za-z0-9-]+$"));
}
/**
* A dummy {@link DiskChecker#FileIoProvider} that can throw a programmable
* number of times.
*/
private static class TestFileIoProvider implements FileIoProvider {
private final AtomicInteger numCreateCalls = new AtomicInteger(0);
private final AtomicInteger numWriteCalls = new AtomicInteger(0);
private final int numTimesToThrowOnCreate;
private final int numTimesToThrowOnWrite;
public TestFileIoProvider(
int numTimesToThrowOnCreate, int numTimesToThrowOnWrite) {
this.numTimesToThrowOnCreate = numTimesToThrowOnCreate;
this.numTimesToThrowOnWrite = numTimesToThrowOnWrite;
}
/**
* {@inheritDoc}
*/
@Override
public FileOutputStream get(File f) throws FileNotFoundException {
if (numCreateCalls.getAndIncrement() < numTimesToThrowOnCreate) {
throw new FileNotFoundException("Dummy exception for testing");
}
// Can't mock final class FileOutputStream.
return new FileOutputStream(f);
}
/**
* {@inheritDoc}
*/
@Override
public void write(FileOutputStream fos, byte[] data) throws IOException {
if (numWriteCalls.getAndIncrement() < numTimesToThrowOnWrite) {
throw new IOException("Dummy exception for testing");
}
fos.write(data);
}
}
} }

View File

@ -0,0 +1,173 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.util.DiskChecker.FileIoProvider;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.attribute.PosixFilePermission;
import java.nio.file.attribute.PosixFilePermissions;
import java.util.concurrent.atomic.AtomicInteger;
import static org.junit.Assert.assertTrue;
/**
* Verify {@link DiskChecker} validation routines that perform
* Disk IO.
*/
public final class TestDiskCheckerWithDiskIo {
@Rule
public Timeout testTimeout = new Timeout(30_000);
/**
* Verify DiskChecker ignores at least 2 transient file creation errors.
*/
@Test
public final void testDiskIoIgnoresTransientCreateErrors() throws Throwable {
DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider(
DiskChecker.DISK_IO_MAX_ITERATIONS - 1, 0));
checkDirs(true);
}
/**
* Verify DiskChecker bails after 3 file creation errors.
*/
@Test(expected = DiskErrorException.class)
public final void testDiskIoDetectsCreateErrors() throws Throwable {
DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider(
DiskChecker.DISK_IO_MAX_ITERATIONS, 0));
checkDirs(false);
}
/**
* Verify DiskChecker ignores at least 2 transient file write errors.
*/
@Test
public final void testDiskIoIgnoresTransientWriteErrors() throws Throwable {
DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider(
0, DiskChecker.DISK_IO_MAX_ITERATIONS - 1));
checkDirs(true);
}
/**
* Verify DiskChecker bails after 3 file write errors.
*/
@Test(expected = DiskErrorException.class)
public final void testDiskIoDetectsWriteErrors() throws Throwable {
DiskChecker.replaceFileOutputStreamProvider(new TestFileIoProvider(
0, DiskChecker.DISK_IO_MAX_ITERATIONS));
checkDirs(false);
}
/**
* Verify DiskChecker's test file naming scheme.
*/
@Test
public void testDiskIoFileNaming() {
final File rootDir = new File("/");
assertTrue(".001".matches("\\.00\\d$"));
for (int i = 1; i < DiskChecker.DISK_IO_MAX_ITERATIONS; ++i) {
final File file = DiskChecker.getFileNameForDiskIoCheck(rootDir, i);
assertTrue(
"File name does not match expected pattern: " + file,
file.toString().matches("^.*\\.[0-9]+$"));
}
final File guidFile = DiskChecker.getFileNameForDiskIoCheck(
rootDir, DiskChecker.DISK_IO_MAX_ITERATIONS);
assertTrue(
"File name does not match expected pattern: " + guidFile,
guidFile.toString().matches("^.*\\.[A-Za-z0-9-]+$"));
}
/**
* A dummy {@link DiskChecker#FileIoProvider} that can throw a programmable
* number of times.
*/
private static class TestFileIoProvider implements FileIoProvider {
private final AtomicInteger numCreateCalls = new AtomicInteger(0);
private final AtomicInteger numWriteCalls = new AtomicInteger(0);
private final int numTimesToThrowOnCreate;
private final int numTimesToThrowOnWrite;
public TestFileIoProvider(
int numTimesToThrowOnCreate, int numTimesToThrowOnWrite) {
this.numTimesToThrowOnCreate = numTimesToThrowOnCreate;
this.numTimesToThrowOnWrite = numTimesToThrowOnWrite;
}
/**
* {@inheritDoc}
*/
@Override
public FileOutputStream get(File f) throws FileNotFoundException {
if (numCreateCalls.getAndIncrement() < numTimesToThrowOnCreate) {
throw new FileNotFoundException("Dummy exception for testing");
}
// Can't mock final class FileOutputStream.
return new FileOutputStream(f);
}
/**
* {@inheritDoc}
*/
@Override
public void write(FileOutputStream fos, byte[] data) throws IOException {
if (numWriteCalls.getAndIncrement() < numTimesToThrowOnWrite) {
throw new IOException("Dummy exception for testing");
}
fos.write(data);
}
}
private void checkDirs(boolean success)
throws Throwable {
File localDir = createTempDir();
try {
DiskChecker.checkDirWithDiskIo(localDir);
} finally {
localDir.delete();
}
}
/**
* Create an empty directory with a random name under test directory
* with Posix permissions "0755".
*
* @return the created directory
* @throws java.io.IOException if any
*/
private File createTempDir() throws java.io.IOException {
final File testDir = new File(System.getProperty("test.build.data"));
return Files.createTempDirectory(testDir.toPath(), "test",
PosixFilePermissions.asFileAttribute(
PosixFilePermissions.fromString("rwxr-xr-x"))).toFile();
}
}