Revert MAPREDUCE-3868. Reenable Raid.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1363572 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Eli Collins 2012-07-19 22:30:36 +00:00
parent 4c51dacd52
commit 370c65f282
69 changed files with 0 additions and 16688 deletions

View File

@ -1,60 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly>
<id>hadoop-raid-dist</id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<!-- Configuration files -->
<fileSet>
<directory>${basedir}/src/main/conf</directory>
<outputDirectory>/etc/hadoop</outputDirectory>
<includes>
<include>*</include>
</includes>
</fileSet>
<fileSet>
<directory>${basedir}/src/main/sbin</directory>
<outputDirectory>/sbin</outputDirectory>
<includes>
<include>*</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>${basedir}/src/main/libexec</directory>
<outputDirectory>/libexec</outputDirectory>
<includes>
<include>*</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
<!-- Documentation -->
<fileSet>
<directory>${project.build.directory}/site</directory>
<outputDirectory>/share/doc/hadoop/raid</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<outputDirectory>/share/hadoop/${hadoop.component}/lib</outputDirectory>
<unpack>false</unpack>
<scope>runtime</scope>
<useProjectArtifact>true</useProjectArtifact>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -52,11 +52,6 @@
<artifactId>hadoop-yarn-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs-raid</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
@ -125,7 +120,6 @@
run cp -r $ROOT/hadoop-common-project/hadoop-common/target/hadoop-common-${project.version}/* .
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${project.version}/* .
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-httpfs/target/hadoop-hdfs-httpfs-${project.version}/* .
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-raid/target/hadoop-hdfs-raid-${project.version}/* .
run cp -r $ROOT/hadoop-mapreduce-project/target/hadoop-mapreduce-${project.version}/* .
run cp -r $ROOT/hadoop-tools/hadoop-tools-dist/target/hadoop-tools-dist-${project.version}/* .
echo

View File

@ -1,170 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project>
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project-dist</artifactId>
<version>3.0.0-SNAPSHOT</version>
<relativePath>../../hadoop-project-dist</relativePath>
</parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs-raid</artifactId>
<version>3.0.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Apache Hadoop HDFS Raid</name>
<description>Apache Hadoop HDFS Raid</description>
<properties>
<hadoop.component>raid</hadoop.component>
<is.hadoop.component>false</is.hadoop.component>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-archives</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>create-mrapp-generated-classpath</id>
<phase>generate-test-resources</phase>
<goals>
<goal>build-classpath</goal>
</goals>
<configuration>
<!--
This is needed to run the unit tests. It generates the required classpath
that is required in the env of the launch container in the mini mr/yarn cluster.
-->
<outputFile>${project.build.directory}/test-classes/mrapp-generated-classpath</outputFile>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<configuration>
<excludeFilterFile combine.self="override"></excludeFilterFile>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>docs</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-site-plugin</artifactId>
<executions>
<execution>
<id>docs</id>
<phase>prepare-package</phase>
<goals>
<goal>site</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>dist</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-assemblies</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>dist</id>
<phase>prepare-package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<finalName>${project.artifactId}-${project.version}</finalName>
<appendAssemblyId>false</appendAssemblyId>
<attach>false</attach>
<descriptorRefs>
<descriptorRef>hadoop-raid-dist</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -1,58 +0,0 @@
<configuration>
<srcPath prefix="hdfs://dfs1.xxx.com:8000/user/dhruba/">
<policy name = "dhruba">
<property>
<name>srcReplication</name>
<value>3</value>
<description> pick files for RAID only if their replication factor is
greater than or equal to this value.
</description>
</property>
<property>
<name>targetReplication</name>
<value>2</value>
<description> after RAIDing, decrease the replication factor of a file to
this value.
</description>
</property>
<property>
<name>metaReplication</name>
<value>2</value>
<description> the replication factor of the RAID meta file
</description>
</property>
<property>
<name>modTimePeriod</name>
<value>3600000</value>
<description> time (milliseconds) after a file is modified to make it a
candidate for RAIDing
</description>
</property>
</policy>
</srcPath>
<srcPath prefix="hdfs://dfs1.xxx.com:9000/warehouse/table1">
<policy name = "table1">
<property>
<name>targetReplication</name>
<value>1</value>
<description> after RAIDing, decrease the replication factor of a file to
this value.
</description>
</property>
<property>
<name>metaReplication</name>
<value>2</value>
<description> the replication factor of the RAID meta file
</description>
</property>
<property>
<name>modTimePeriod</name>
<value>3600000</value>
<description> time (milliseconds) after a file is modified to make it a
candidate for RAIDing
</description>
</property>
</policy>
</srcPath>
</configuration>

View File

@ -1,509 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FilterFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.raid.Decoder;
import org.apache.hadoop.raid.RaidNode;
import org.apache.hadoop.raid.ReedSolomonDecoder;
import org.apache.hadoop.raid.XORDecoder;
import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType;
import org.apache.hadoop.util.ReflectionUtils;
/**
* This is an implementation of the Hadoop RAID Filesystem. This FileSystem
* wraps an instance of the DistributedFileSystem.
* If a file is corrupted, this FileSystem uses the parity blocks to
* regenerate the bad block.
*/
public class DistributedRaidFileSystem extends FilterFileSystem {
// these are alternate locations that can be used for read-only access
DecodeInfo[] alternates;
Configuration conf;
int stripeLength;
DistributedRaidFileSystem() throws IOException {
}
DistributedRaidFileSystem(FileSystem fs) throws IOException {
super(fs);
alternates = null;
stripeLength = 0;
}
// Information required for decoding a source file
static private class DecodeInfo {
final Path destPath;
final ErasureCodeType type;
final Configuration conf;
final int stripeLength;
private DecodeInfo(Configuration conf, ErasureCodeType type, Path destPath) {
this.conf = conf;
this.type = type;
this.destPath = destPath;
this.stripeLength = RaidNode.getStripeLength(conf);
}
Decoder createDecoder() {
if (this.type == ErasureCodeType.XOR) {
return new XORDecoder(conf, stripeLength);
} else if (this.type == ErasureCodeType.RS) {
return new ReedSolomonDecoder(conf, stripeLength,
RaidNode.rsParityLength(conf));
}
return null;
}
}
/* Initialize a Raid FileSystem
*/
public void initialize(URI name, Configuration conf) throws IOException {
this.conf = conf;
Class<?> clazz = conf.getClass("fs.raid.underlyingfs.impl",
DistributedFileSystem.class);
if (clazz == null) {
throw new IOException("No FileSystem for fs.raid.underlyingfs.impl.");
}
this.fs = (FileSystem)ReflectionUtils.newInstance(clazz, null);
super.initialize(name, conf);
// find stripe length configured
stripeLength = RaidNode.getStripeLength(conf);
if (stripeLength == 0) {
LOG.info("dfs.raid.stripeLength is incorrectly defined to be " +
stripeLength + " Ignoring...");
return;
}
// Put XOR and RS in alternates
alternates= new DecodeInfo[2];
Path xorPath = RaidNode.xorDestinationPath(conf, fs);
alternates[0] = new DecodeInfo(conf, ErasureCodeType.XOR, xorPath);
Path rsPath = RaidNode.rsDestinationPath(conf, fs);
alternates[1] = new DecodeInfo(conf, ErasureCodeType.RS, rsPath);
}
/*
* Returns the underlying filesystem
*/
public FileSystem getFileSystem() throws IOException {
return fs;
}
@Override
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
ExtFSDataInputStream fd = new ExtFSDataInputStream(conf, this, alternates, f,
stripeLength, bufferSize);
return fd;
}
public void close() throws IOException {
if (fs != null) {
try {
fs.close();
} catch(IOException ie) {
//this might already be closed, ignore
}
}
super.close();
}
/**
* Layered filesystem input stream. This input stream tries reading
* from alternate locations if it encoumters read errors in the primary location.
*/
private static class ExtFSDataInputStream extends FSDataInputStream {
private static class UnderlyingBlock {
// File that holds this block. Need not be the same as outer file.
public Path path;
// Offset within path where this block starts.
public long actualFileOffset;
// Offset within the outer file where this block starts.
public long originalFileOffset;
// Length of the block (length <= blk sz of outer file).
public long length;
public UnderlyingBlock(Path path, long actualFileOffset,
long originalFileOffset, long length) {
this.path = path;
this.actualFileOffset = actualFileOffset;
this.originalFileOffset = originalFileOffset;
this.length = length;
}
}
/**
* Create an input stream that wraps all the reads/positions/seeking.
*/
private static class ExtFsInputStream extends FSInputStream {
// Extents of "good" underlying data that can be read.
private UnderlyingBlock[] underlyingBlocks;
private long currentOffset;
private FSDataInputStream currentStream;
private UnderlyingBlock currentBlock;
private byte[] oneBytebuff = new byte[1];
private int nextLocation;
private DistributedRaidFileSystem lfs;
private Path path;
private FileStatus stat;
private final DecodeInfo[] alternates;
private final int buffersize;
private final Configuration conf;
private final int stripeLength;
ExtFsInputStream(Configuration conf, DistributedRaidFileSystem lfs,
DecodeInfo[] alternates, Path path, int stripeLength, int buffersize)
throws IOException {
this.path = path;
this.nextLocation = 0;
// Construct array of blocks in file.
this.stat = lfs.getFileStatus(path);
long numBlocks = (this.stat.getLen() % this.stat.getBlockSize() == 0) ?
this.stat.getLen() / this.stat.getBlockSize() :
1 + this.stat.getLen() / this.stat.getBlockSize();
this.underlyingBlocks = new UnderlyingBlock[(int)numBlocks];
for (int i = 0; i < numBlocks; i++) {
long actualFileOffset = i * stat.getBlockSize();
long originalFileOffset = i * stat.getBlockSize();
long length = Math.min(
stat.getBlockSize(), stat.getLen() - originalFileOffset);
this.underlyingBlocks[i] = new UnderlyingBlock(
path, actualFileOffset, originalFileOffset, length);
}
this.currentOffset = 0;
this.currentBlock = null;
this.alternates = alternates;
this.buffersize = buffersize;
this.conf = conf;
this.lfs = lfs;
this.stripeLength = stripeLength;
// Open a stream to the first block.
openCurrentStream();
}
private void closeCurrentStream() throws IOException {
if (currentStream != null) {
currentStream.close();
currentStream = null;
}
}
/**
* Open a stream to the file containing the current block
* and seek to the appropriate offset
*/
private void openCurrentStream() throws IOException {
int blockIdx = (int)(currentOffset/stat.getBlockSize());
UnderlyingBlock block = underlyingBlocks[blockIdx];
// If the current path is the same as we want.
if (currentBlock == block ||
currentBlock != null && currentBlock.path == block.path) {
// If we have a valid stream, nothing to do.
if (currentStream != null) {
currentBlock = block;
return;
}
} else {
closeCurrentStream();
}
currentBlock = block;
currentStream = lfs.fs.open(currentBlock.path, buffersize);
long offset = block.actualFileOffset +
(currentOffset - block.originalFileOffset);
currentStream.seek(offset);
}
/**
* Returns the number of bytes available in the current block.
*/
private int blockAvailable() {
return (int) (currentBlock.length -
(currentOffset - currentBlock.originalFileOffset));
}
@Override
public synchronized int available() throws IOException {
// Application should not assume that any bytes are buffered here.
nextLocation = 0;
return Math.min(blockAvailable(), currentStream.available());
}
@Override
public synchronized void close() throws IOException {
closeCurrentStream();
super.close();
}
@Override
public boolean markSupported() { return false; }
@Override
public void mark(int readLimit) {
// Mark and reset are not supported.
nextLocation = 0;
}
@Override
public void reset() throws IOException {
// Mark and reset are not supported.
nextLocation = 0;
}
@Override
public synchronized int read() throws IOException {
int value = read(oneBytebuff);
if (value < 0) {
return value;
} else {
return oneBytebuff[0];
}
}
@Override
public synchronized int read(byte[] b) throws IOException {
int value = read(b, 0, b.length);
nextLocation = 0;
return value;
}
@Override
public synchronized int read(byte[] b, int offset, int len)
throws IOException {
while (true) {
openCurrentStream();
try{
int limit = Math.min(blockAvailable(), len);
int value = currentStream.read(b, offset, limit);
currentOffset += value;
nextLocation = 0;
return value;
} catch (BlockMissingException e) {
setAlternateLocations(e, currentOffset);
} catch (ChecksumException e) {
setAlternateLocations(e, currentOffset);
}
}
}
@Override
public synchronized int read(long position, byte[] b, int offset, int len)
throws IOException {
long oldPos = currentOffset;
seek(position);
try {
return read(b, offset, len);
} finally {
seek(oldPos);
}
}
@Override
public synchronized long skip(long n) throws IOException {
long skipped = 0;
while (skipped < n) {
int val = read();
if (val < 0) {
break;
}
skipped++;
}
nextLocation = 0;
return skipped;
}
@Override
public synchronized long getPos() throws IOException {
nextLocation = 0;
return currentOffset;
}
@Override
public synchronized void seek(long pos) throws IOException {
if (pos != currentOffset) {
closeCurrentStream();
currentOffset = pos;
openCurrentStream();
}
nextLocation = 0;
}
@Override
public boolean seekToNewSource(long targetPos) throws IOException {
seek(targetPos);
boolean value = currentStream.seekToNewSource(currentStream.getPos());
nextLocation = 0;
return value;
}
/**
* position readable again.
*/
@Override
public void readFully(long pos, byte[] b, int offset, int length)
throws IOException {
long oldPos = currentOffset;
seek(pos);
try {
while (true) {
// This loop retries reading until successful. Unrecoverable errors
// cause exceptions.
// currentOffset is changed by read().
try {
while (length > 0) {
int n = read(b, offset, length);
if (n < 0) {
throw new IOException("Premature EOF");
}
offset += n;
length -= n;
}
nextLocation = 0;
return;
} catch (BlockMissingException e) {
setAlternateLocations(e, currentOffset);
} catch (ChecksumException e) {
setAlternateLocations(e, currentOffset);
}
}
} finally {
seek(oldPos);
}
}
@Override
public void readFully(long pos, byte[] b) throws IOException {
readFully(pos, b, 0, b.length);
nextLocation = 0;
}
/**
* Extract good block from RAID
* @throws IOException if all alternate locations are exhausted
*/
private void setAlternateLocations(IOException curexp, long offset)
throws IOException {
while (alternates != null && nextLocation < alternates.length) {
try {
int idx = nextLocation++;
// Start offset of block.
long corruptOffset =
(offset / stat.getBlockSize()) * stat.getBlockSize();
// Make sure we use DFS and not DistributedRaidFileSystem for unRaid.
Configuration clientConf = new Configuration(conf);
Class<?> clazz = conf.getClass("fs.raid.underlyingfs.impl",
DistributedFileSystem.class);
clientConf.set("fs.hdfs.impl", clazz.getName());
// Disable caching so that a previously cached RaidDfs is not used.
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
Path npath = RaidNode.unRaidCorruptBlock(clientConf, path,
alternates[idx].destPath,
alternates[idx].createDecoder(),
stripeLength, corruptOffset);
if (npath == null)
continue;
try {
String outdir = conf.get("fs.raid.recoverylogdir");
if (outdir != null) {
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss");
java.util.Date date = new java.util.Date();
String fname = path.getName() + dateFormat.format(date) +
(new Random()).nextInt() + ".txt";
Path outputunraid = new Path(outdir, fname);
FileSystem fs = outputunraid.getFileSystem(conf);
FSDataOutputStream dout = fs.create(outputunraid);
PrintStream ps = new PrintStream(dout);
ps.println("Recovery attempt log");
ps.println("Source path : " + path );
ps.println("Alternate path : " + alternates[idx].destPath);
ps.println("Stripe lentgh : " + stripeLength);
ps.println("Corrupt offset : " + corruptOffset);
String output = (npath==null) ? "UNSUCCESSFUL" : npath.toString();
ps.println("Output from unRaid : " + output);
ps.close();
}
} catch (Exception exc) {
LOG.info("Error while creating recovery log: " + exc);
}
closeCurrentStream();
LOG.info("Using block at offset " + corruptOffset + " from " +
npath);
currentBlock.path = npath;
currentBlock.actualFileOffset = 0; // Single block in file.
// Dont change currentOffset, in case the user had done a seek?
openCurrentStream();
return;
} catch (Exception e) {
LOG.info("Error in using alternate path " + path + ". " + e +
" Ignoring...");
}
}
throw curexp;
}
/**
* The name of the file system that is immediately below the
* DistributedRaidFileSystem. This is specified by the
* configuration parameter called fs.raid.underlyingfs.impl.
* If this parameter is not specified in the configuration, then
* the default class DistributedFileSystem is returned.
* @param conf the configuration object
* @return the filesystem object immediately below DistributedRaidFileSystem
* @throws IOException if all alternate locations are exhausted
*/
private FileSystem getUnderlyingFileSystem(Configuration conf) {
Class<?> clazz = conf.getClass("fs.raid.underlyingfs.impl", DistributedFileSystem.class);
FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf);
return fs;
}
}
/**
* constructor for ext input stream.
* @param fs the underlying filesystem
* @param p the path in the underlying file system
* @param buffersize the size of IO
* @throws IOException
*/
public ExtFSDataInputStream(Configuration conf, DistributedRaidFileSystem lfs,
DecodeInfo[] alternates, Path p, int stripeLength, int buffersize) throws IOException {
super(new ExtFsInputStream(conf, lfs, alternates, p, stripeLength, buffersize));
}
}
}

View File

@ -1,79 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.io.IOException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.LinkedList;
import java.util.List;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.tools.DFSck;
import org.apache.hadoop.util.ToolRunner;
public abstract class RaidDFSUtil {
/**
* Returns the corrupt blocks in a file.
*/
public static List<LocatedBlock> corruptBlocksInFile(
DistributedFileSystem dfs, String path, long offset, long length)
throws IOException {
List<LocatedBlock> corrupt = new LinkedList<LocatedBlock>();
LocatedBlocks locatedBlocks =
getBlockLocations(dfs, path, offset, length);
for (LocatedBlock b: locatedBlocks.getLocatedBlocks()) {
if (b.isCorrupt() ||
(b.getLocations().length == 0 && b.getBlockSize() > 0)) {
corrupt.add(b);
}
}
return corrupt;
}
public static LocatedBlocks getBlockLocations(
DistributedFileSystem dfs, String path, long offset, long length)
throws IOException {
return dfs.getClient().namenode.getBlockLocations(path, offset, length);
}
/**
* Make successive calls to listCorruptFiles to obtain all
* corrupt files.
*/
public static String[] getCorruptFiles(DistributedFileSystem dfs)
throws IOException {
Set<String> corruptFiles = new HashSet<String>();
RemoteIterator<Path> cfb = dfs.listCorruptFileBlocks(new Path("/"));
while (cfb.hasNext()) {
corruptFiles.add(cfb.next().toUri().getPath());
}
return corruptFiles.toArray(new String[corruptFiles.size()]);
}
}

View File

@ -1,632 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.blockmanagement;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.LinkedHashMap;
import java.util.ArrayList;
import java.util.Map;
import java.util.Comparator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.namenode.*;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.raid.RaidNode;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
/**
* This BlockPlacementPolicy spreads out the group of blocks which used by RAID
* for recovering each other. This is important for the availability
* of the blocks. This class can be used by multiple threads. It has to be
* thread safe.
*/
public class BlockPlacementPolicyRaid extends BlockPlacementPolicy {
public static final Log LOG =
LogFactory.getLog(BlockPlacementPolicyRaid.class);
Configuration conf;
private int stripeLength;
private int xorParityLength;
private int rsParityLength;
private String xorPrefix = null;
private String rsPrefix = null;
private String raidTempPrefix = null;
private String raidrsTempPrefix = null;
private String raidHarTempPrefix = null;
private String raidrsHarTempPrefix = null;
private FSNamesystem namesystem = null;
private BlockPlacementPolicyDefault defaultPolicy;
CachedLocatedBlocks cachedLocatedBlocks;
CachedFullPathNames cachedFullPathNames;
/** {@inheritDoc} */
@Override
public void initialize(Configuration conf, FSClusterStats stats,
NetworkTopology clusterMap) {
this.conf = conf;
this.stripeLength = RaidNode.getStripeLength(conf);
this.rsParityLength = RaidNode.rsParityLength(conf);
this.xorParityLength = 1;
try {
this.xorPrefix = RaidNode.xorDestinationPath(conf).toUri().getPath();
this.rsPrefix = RaidNode.rsDestinationPath(conf).toUri().getPath();
} catch (IOException e) {
}
if (this.xorPrefix == null) {
this.xorPrefix = RaidNode.DEFAULT_RAID_LOCATION;
}
if (this.rsPrefix == null) {
this.rsPrefix = RaidNode.DEFAULT_RAIDRS_LOCATION;
}
// Throws ClassCastException if we cannot cast here.
this.namesystem = (FSNamesystem) stats;
this.cachedLocatedBlocks = new CachedLocatedBlocks(namesystem);
this.cachedFullPathNames = new CachedFullPathNames(namesystem);
this.raidTempPrefix = RaidNode.xorTempPrefix(conf);
this.raidrsTempPrefix = RaidNode.rsTempPrefix(conf);
this.raidHarTempPrefix = RaidNode.xorHarTempPrefix(conf);
this.raidrsHarTempPrefix = RaidNode.rsHarTempPrefix(conf);
defaultPolicy = new BlockPlacementPolicyDefault(conf, stats, clusterMap);
}
@Override
DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas,
DatanodeDescriptor writer, List<DatanodeDescriptor> chosenNodes,
long blocksize) {
return chooseTarget(srcPath, numOfReplicas, writer, chosenNodes,
null, blocksize);
}
@Override
public DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas,
DatanodeDescriptor writer, List<DatanodeDescriptor> chosenNodes,
boolean returnChosenNodes,
HashMap<Node, Node> excludedNodes, long blocksize) {
try {
FileType type = getFileType(srcPath);
if (type == FileType.NOT_RAID) {
return defaultPolicy.chooseTarget(
srcPath, numOfReplicas, writer, chosenNodes, blocksize);
}
if (excludedNodes == null) {
excludedNodes = new HashMap<Node, Node>();
}
addExcludedNodes(srcPath, type, excludedNodes);
DatanodeDescriptor[] result =
defaultPolicy.chooseTarget(numOfReplicas, writer,
chosenNodes, returnChosenNodes, excludedNodes, blocksize);
// Add the added block locations in the block locations cache.
// So the rest of the blocks know about these locations.
cachedLocatedBlocks.get(srcPath).
add(new LocatedBlock(new ExtendedBlock(), result));
return result;
} catch (Exception e) {
LOG.debug("Error happend when choosing datanode to write:" +
StringUtils.stringifyException(e));
return defaultPolicy.chooseTarget(srcPath, numOfReplicas, writer,
chosenNodes, blocksize);
}
}
@Override
public int verifyBlockPlacement(String srcPath, LocatedBlock lBlk,
int minRacks) {
return defaultPolicy.verifyBlockPlacement(srcPath, lBlk, minRacks);
}
/** {@inheritDoc} */
@Override
public DatanodeDescriptor chooseReplicaToDelete(BlockCollection bc,
Block block, short replicationFactor,
Collection<DatanodeDescriptor> first,
Collection<DatanodeDescriptor> second) {
DatanodeDescriptor chosenNode = null;
try {
String path = cachedFullPathNames.get(bc);
FileType type = getFileType(path);
if (type == FileType.NOT_RAID) {
return defaultPolicy.chooseReplicaToDelete(
bc, block, replicationFactor, first, second);
}
List<LocatedBlock> companionBlocks =
getCompanionBlocks(path, type, block);
if (companionBlocks == null || companionBlocks.size() == 0) {
// Use the default method if it is not a valid raided or parity file
return defaultPolicy.chooseReplicaToDelete(
bc, block, replicationFactor, first, second);
}
// Delete from the first collection first
// This ensures the number of unique rack of this block is not reduced
Collection<DatanodeDescriptor> all = new HashSet<DatanodeDescriptor>();
all.addAll(first);
all.addAll(second);
chosenNode = chooseReplicaToDelete(companionBlocks, all);
if (chosenNode != null) {
return chosenNode;
}
return defaultPolicy.chooseReplicaToDelete(
bc, block, replicationFactor, first, second);
} catch (Exception e) {
LOG.debug("Error happend when choosing replica to delete" +
StringUtils.stringifyException(e));
return defaultPolicy.chooseReplicaToDelete(
bc, block, replicationFactor, first, second);
}
}
/**
* Obtain the excluded nodes for the current block that is being written
*/
void addExcludedNodes(String file, FileType type, HashMap<Node, Node> excluded)
throws IOException {
Collection<LocatedBlock> blocks = getCompanionBlocks(file, type, null);
if (blocks == null) {
return;
}
for (LocatedBlock b : blocks) {
for (Node n : b.getLocations()) {
excluded.put(n, n);
}
}
}
private DatanodeDescriptor chooseReplicaToDelete(
Collection<LocatedBlock> companionBlocks,
Collection<DatanodeDescriptor> dataNodes) throws IOException {
if (dataNodes.isEmpty()) {
return null;
}
// Count the number of replicas on each node and rack
final Map<String, Integer> nodeCompanionBlockCount =
countCompanionBlocks(companionBlocks, false);
final Map<String, Integer> rackCompanionBlockCount =
countCompanionBlocks(companionBlocks, true);
NodeComparator comparator =
new NodeComparator(nodeCompanionBlockCount, rackCompanionBlockCount);
return Collections.max(dataNodes, comparator);
}
/**
* Count how many companion blocks are on each datanode or the each rack
* @param companionBlocks a collection of all the companion blocks
* @param doRackCount count the companion blocks on the racks of datanodes
* @param result the map from node name to the number of companion blocks
*/
static Map<String, Integer> countCompanionBlocks(
Collection<LocatedBlock> companionBlocks, boolean doRackCount) {
Map<String, Integer> result = new HashMap<String, Integer>();
for (LocatedBlock block : companionBlocks) {
for (DatanodeInfo d : block.getLocations()) {
String name = doRackCount ? d.getParent().getName() : d.getName();
if (result.containsKey(name)) {
int count = result.get(name) + 1;
result.put(name, count);
} else {
result.put(name, 1);
}
}
}
return result;
}
/**
* Compares the datanodes based on the number of companion blocks on the same
* node and rack. If even, compare the remaining space on the datanodes.
*/
class NodeComparator implements Comparator<DatanodeDescriptor> {
private Map<String, Integer> nodeBlockCount;
private Map<String, Integer> rackBlockCount;
private NodeComparator(Map<String, Integer> nodeBlockCount,
Map<String, Integer> rackBlockCount) {
this.nodeBlockCount = nodeBlockCount;
this.rackBlockCount = rackBlockCount;
}
@Override
public int compare(DatanodeDescriptor d1, DatanodeDescriptor d2) {
int res = compareBlockCount(d1, d2, nodeBlockCount);
if (res != 0) {
return res;
}
res = compareBlockCount(d1.getParent(), d2.getParent(), rackBlockCount);
if (res != 0) {
return res;
}
if (d1.getRemaining() > d2.getRemaining()) {
return -1;
}
if (d1.getRemaining() < d2.getRemaining()) {
return 1;
}
return 0;
}
private int compareBlockCount(Node node1, Node node2,
Map<String, Integer> blockCount) {
Integer count1 = blockCount.get(node1.getName());
Integer count2 = blockCount.get(node2.getName());
count1 = count1 == null ? 0 : count1;
count2 = count2 == null ? 0 : count2;
if (count1 > count2) {
return 1;
}
if (count1 < count2) {
return -1;
}
return 0;
}
}
/**
* Obtain the companion blocks of the give block
* Companion blocks are defined as the blocks that can help recover each
* others by using raid decoder.
* @param path The path of the file contains the block
* @param type The type of this file
* @param block The given block
* null if it is the block which is currently being written to
* @return the block locations of companion blocks
*/
List<LocatedBlock> getCompanionBlocks(String path, FileType type,
Block block) throws IOException {
switch (type) {
case NOT_RAID:
return new ArrayList<LocatedBlock>();
case XOR_HAR_TEMP_PARITY:
return getCompanionBlocksForHarParityBlock(
path, xorParityLength, block);
case RS_HAR_TEMP_PARITY:
return getCompanionBlocksForHarParityBlock(
path, rsParityLength, block);
case XOR_TEMP_PARITY:
return getCompanionBlocksForParityBlock(
getSourceFile(path, raidTempPrefix), path, xorParityLength, block);
case RS_TEMP_PARITY:
return getCompanionBlocksForParityBlock(
getSourceFile(path, raidrsTempPrefix), path, rsParityLength, block);
case XOR_PARITY:
return getCompanionBlocksForParityBlock(getSourceFile(path, xorPrefix),
path, xorParityLength, block);
case RS_PARITY:
return getCompanionBlocksForParityBlock(getSourceFile(path, rsPrefix),
path, rsParityLength, block);
case XOR_SOURCE:
return getCompanionBlocksForSourceBlock(
path, getParityFile(path), xorParityLength, block);
case RS_SOURCE:
return getCompanionBlocksForSourceBlock(
path, getParityFile(path), xorParityLength, block);
}
return new ArrayList<LocatedBlock>();
}
private List<LocatedBlock> getCompanionBlocksForHarParityBlock(
String parity, int parityLength, Block block)
throws IOException {
int blockIndex = getBlockIndex(parity, block);
// consider only parity file in this case because source file block
// location is not easy to obtain
List<LocatedBlock> parityBlocks = cachedLocatedBlocks.get(parity);
List<LocatedBlock> result = new ArrayList<LocatedBlock>();
synchronized (parityBlocks) {
int start = Math.max(0, blockIndex - parityLength + 1);
int end = Math.min(parityBlocks.size(), blockIndex + parityLength);
result.addAll(parityBlocks.subList(start, end));
}
return result;
}
private List<LocatedBlock> getCompanionBlocksForParityBlock(
String src, String parity, int parityLength, Block block)
throws IOException {
int blockIndex = getBlockIndex(parity, block);
List<LocatedBlock> result = new ArrayList<LocatedBlock>();
List<LocatedBlock> parityBlocks = cachedLocatedBlocks.get(parity);
int stripeIndex = blockIndex / parityLength;
synchronized (parityBlocks) {
int parityStart = stripeIndex * parityLength;
int parityEnd = Math.min(parityStart + parityLength,
parityBlocks.size());
// for parity, always consider the neighbor blocks as companion blocks
if (parityStart < parityBlocks.size()) {
result.addAll(parityBlocks.subList(parityStart, parityEnd));
}
}
if (src == null) {
return result;
}
List<LocatedBlock> sourceBlocks = cachedLocatedBlocks.get(src);
synchronized (sourceBlocks) {
int sourceStart = stripeIndex * stripeLength;
int sourceEnd = Math.min(sourceStart + stripeLength,
sourceBlocks.size());
if (sourceStart < sourceBlocks.size()) {
result.addAll(sourceBlocks.subList(sourceStart, sourceEnd));
}
}
return result;
}
private List<LocatedBlock> getCompanionBlocksForSourceBlock(
String src, String parity, int parityLength, Block block)
throws IOException {
int blockIndex = getBlockIndex(src, block);
List<LocatedBlock> result = new ArrayList<LocatedBlock>();
List<LocatedBlock> sourceBlocks = cachedLocatedBlocks.get(src);
int stripeIndex = blockIndex / stripeLength;
synchronized (sourceBlocks) {
int sourceStart = stripeIndex * stripeLength;
int sourceEnd = Math.min(sourceStart + stripeLength,
sourceBlocks.size());
if (sourceStart < sourceBlocks.size()) {
result.addAll(sourceBlocks.subList(sourceStart, sourceEnd));
}
}
if (parity == null) {
return result;
}
List<LocatedBlock> parityBlocks = cachedLocatedBlocks.get(parity);
synchronized (parityBlocks) {
int parityStart = stripeIndex * parityLength;
int parityEnd = Math.min(parityStart + parityLength,
parityBlocks.size());
if (parityStart < parityBlocks.size()) {
result.addAll(parityBlocks.subList(parityStart, parityEnd));
}
}
return result;
}
private int getBlockIndex(String file, Block block) throws IOException {
List<LocatedBlock> blocks = cachedLocatedBlocks.get(file);
synchronized (blocks) {
// null indicates that this block is currently added. Return size()
// as the index in this case
if (block == null) {
return blocks.size();
}
for (int i = 0; i < blocks.size(); i++) {
if (blocks.get(i).getBlock().getLocalBlock().equals(block)) {
return i;
}
}
}
throw new IOException("Cannot locate " + block + " in file " + file);
}
/**
* Cache results for getFullPathName()
*/
static class CachedFullPathNames {
FSNamesystem namesystem;
CachedFullPathNames(FSNamesystem namesystem) {
this.namesystem = namesystem;
}
private Cache<INodeWithHashCode, String> cacheInternal =
new Cache<INodeWithHashCode, String>() {
@Override
public String getDirectly(INodeWithHashCode inode) throws IOException {
namesystem.readLock();
try {
return inode.getFullPathName();
} finally {
namesystem.readUnlock();
}
}
};
static private class INodeWithHashCode {
BlockCollection bc;
INodeWithHashCode(BlockCollection bc) {
this.bc= bc;
}
@Override
public boolean equals(Object obj) {
return bc== obj;
}
@Override
public int hashCode() {
return System.identityHashCode(bc);
}
String getFullPathName() {
return bc.getName();
}
}
public String get(BlockCollection bc) throws IOException {
return cacheInternal.get(new INodeWithHashCode(bc));
}
}
/**
* Cache results for FSNamesystem.getBlockLocations()
*/
static class CachedLocatedBlocks extends Cache<String, List<LocatedBlock>> {
FSNamesystem namesystem;
CachedLocatedBlocks(FSNamesystem namesystem) {
this.namesystem = namesystem;
}
@Override
public List<LocatedBlock> getDirectly(String file) throws IOException {
long len = NameNodeRaidUtil.getFileInfo(namesystem, file, true).getLen();
List<LocatedBlock> result = NameNodeRaidUtil.getBlockLocations(namesystem,
file, 0L, len, false, false).getLocatedBlocks();
if (result == null || result.isEmpty()) {
result = new ArrayList<LocatedBlock>();
}
return Collections.synchronizedList(result);
}
}
static abstract class Cache<K, V> {
private Map<K, ValueWithTime> cache;
private static final long CACHE_TIMEOUT = 300000L; // 5 minutes
// The timeout is long but the consequence of stale value is not serious
Cache() {
Map<K, ValueWithTime> map = new LinkedHashMap<K, ValueWithTime>() {
private static final long serialVersionUID = 1L;
final private int MAX_ENTRIES = 50000;
@Override
protected boolean removeEldestEntry(
Map.Entry<K, ValueWithTime> eldest) {
return size() > MAX_ENTRIES;
}
};
this.cache = Collections.synchronizedMap(map);
}
// Note that this method may hold FSNamesystem.readLock() and it may
// be called inside FSNamesystem.writeLock(). If we make this method
// synchronized, it will deadlock.
abstract protected V getDirectly(K key) throws IOException;
public V get(K key) throws IOException {
// The method is not synchronized so we may get some stale value here but
// it's OK.
ValueWithTime result = cache.get(key);
long now = Time.now();
if (result != null &&
now - result.cachedTime < CACHE_TIMEOUT) {
return result.value;
}
result = new ValueWithTime();
result.value = getDirectly(key);
result.cachedTime = now;
cache.put(key, result);
return result.value;
}
private class ValueWithTime {
V value = null;
long cachedTime = 0L;
}
}
/**
* Get path for the corresponding source file for a valid parity
* file. Returns null if it does not exists
* @param parity the toUri path of the parity file
* @return the toUri path of the source file
*/
String getSourceFile(String parity, String prefix) throws IOException {
if (isHarFile(parity)) {
return null;
}
// remove the prefix
String src = parity.substring(prefix.length());
if (NameNodeRaidUtil.getFileInfo(namesystem, src, true) == null) {
return null;
}
return src;
}
/**
* Get path for the corresponding parity file for a source file.
* Returns null if it does not exists
* @param src the toUri path of the source file
* @return the toUri path of the parity file
*/
String getParityFile(String src) throws IOException {
String xorParity = getParityFile(xorPrefix, src);
if (xorParity != null) {
return xorParity;
}
String rsParity = getParityFile(rsPrefix, src);
if (rsParity != null) {
return rsParity;
}
return null;
}
/**
* Get path for the parity file. Returns null if it does not exists
* @param parityPrefix usuall "/raid/" or "/raidrs/"
* @return the toUri path of the parity file
*/
private String getParityFile(String parityPrefix, String src)
throws IOException {
String parity = parityPrefix + src;
if (NameNodeRaidUtil.getFileInfo(namesystem, parity, true) == null) {
return null;
}
return parity;
}
private boolean isHarFile(String path) {
return path.lastIndexOf(RaidNode.HAR_SUFFIX) != -1;
}
enum FileType {
NOT_RAID,
XOR_HAR_TEMP_PARITY,
XOR_TEMP_PARITY,
XOR_PARITY,
XOR_SOURCE,
RS_HAR_TEMP_PARITY,
RS_TEMP_PARITY,
RS_PARITY,
RS_SOURCE,
}
FileType getFileType(String path) throws IOException {
if (path.startsWith(raidHarTempPrefix + Path.SEPARATOR)) {
return FileType.XOR_HAR_TEMP_PARITY;
}
if (path.startsWith(raidrsHarTempPrefix + Path.SEPARATOR)) {
return FileType.RS_HAR_TEMP_PARITY;
}
if (path.startsWith(raidTempPrefix + Path.SEPARATOR)) {
return FileType.XOR_TEMP_PARITY;
}
if (path.startsWith(raidrsTempPrefix + Path.SEPARATOR)) {
return FileType.RS_TEMP_PARITY;
}
if (path.startsWith(xorPrefix + Path.SEPARATOR)) {
return FileType.XOR_PARITY;
}
if (path.startsWith(rsPrefix + Path.SEPARATOR)) {
return FileType.RS_PARITY;
}
String parity = getParityFile(path);
if (parity == null) {
return FileType.NOT_RAID;
}
if (parity.startsWith(xorPrefix + Path.SEPARATOR)) {
return FileType.XOR_SOURCE;
}
if (parity.startsWith(rsPrefix + Path.SEPARATOR)) {
return FileType.RS_SOURCE;
}
return FileType.NOT_RAID;
}
}

View File

@ -1,505 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.SocketException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.net.SocketOutputStream;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.StringUtils;
/**
* Reads a block from the disk and sends it to a recipient.
*/
public class RaidBlockSender implements java.io.Closeable {
public static final Log LOG = DataNode.LOG;
static final Log ClientTraceLog = DataNode.ClientTraceLog;
private ExtendedBlock block; // the block to read from
/** The visible length of a replica. */
private final long replicaVisibleLength;
private InputStream blockIn; // data stream
private long blockInPosition = -1; // updated while using transferTo().
private DataInputStream checksumIn; // checksum datastream
private DataChecksum checksum; // checksum stream
private long offset; // starting position to read
/** Initial position to read */
private long initialOffset;
private long endOffset; // ending position
private int chunkSize; // chunk size
private int checksumSize; // checksum size
private boolean corruptChecksumOk; // if need to verify checksum
private boolean chunkOffsetOK; // if need to send chunk offset
private long seqno; // sequence number of packet
private boolean transferToAllowed = true;
private boolean blockReadFully; //set when the whole block is read
private boolean verifyChecksum; //if true, check is verified while reading
private final String clientTraceFmt; // format of client trace log message
/**
* Minimum buffer used while sending data to clients. Used only if
* transferTo() is enabled. 64KB is not that large. It could be larger, but
* not sure if there will be much more improvement.
*/
private static final int MIN_BUFFER_WITH_TRANSFERTO = 64*1024;
private static final int TRANSFERTO_BUFFER_SIZE = Math.max(
HdfsConstants.IO_FILE_BUFFER_SIZE, MIN_BUFFER_WITH_TRANSFERTO);
private volatile ChunkChecksum lastChunkChecksum = null;
public RaidBlockSender(ExtendedBlock block, long blockLength, long startOffset, long length,
boolean corruptChecksumOk, boolean chunkOffsetOK,
boolean verifyChecksum, boolean transferToAllowed,
DataInputStream metadataIn, InputStreamFactory streamFactory
) throws IOException {
this(block, blockLength, startOffset, length,
corruptChecksumOk, chunkOffsetOK,
verifyChecksum, transferToAllowed,
metadataIn, streamFactory, null);
}
public RaidBlockSender(ExtendedBlock block, long blockLength, long startOffset, long length,
boolean corruptChecksumOk, boolean chunkOffsetOK,
boolean verifyChecksum, boolean transferToAllowed,
DataInputStream metadataIn, InputStreamFactory streamFactory,
String clientTraceFmt) throws IOException {
try {
this.block = block;
this.chunkOffsetOK = chunkOffsetOK;
this.corruptChecksumOk = corruptChecksumOk;
this.verifyChecksum = verifyChecksum;
this.replicaVisibleLength = blockLength;
this.transferToAllowed = transferToAllowed;
this.clientTraceFmt = clientTraceFmt;
if ( !corruptChecksumOk || metadataIn != null) {
this.checksumIn = metadataIn;
// read and handle the common header here. For now just a version
BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn);
short version = header.getVersion();
if (version != BlockMetadataHeader.VERSION) {
LOG.warn("Wrong version (" + version + ") for metadata file for "
+ block + " ignoring ...");
}
checksum = header.getChecksum();
} else {
LOG.warn("Could not find metadata file for " + block);
// This only decides the buffer size. Use BUFFER_SIZE?
checksum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_NULL,
16 * 1024);
}
/* If bytesPerChecksum is very large, then the metadata file
* is mostly corrupted. For now just truncate bytesPerchecksum to
* blockLength.
*/
int size = checksum.getBytesPerChecksum();
if (size > 10*1024*1024 && size > replicaVisibleLength) {
checksum = DataChecksum.newDataChecksum(checksum.getChecksumType(),
Math.max((int)replicaVisibleLength, 10*1024*1024));
size = checksum.getBytesPerChecksum();
}
chunkSize = size;
checksumSize = checksum.getChecksumSize();
if (length < 0) {
length = replicaVisibleLength;
}
endOffset = blockLength;
if (startOffset < 0 || startOffset > endOffset
|| (length + startOffset) > endOffset) {
String msg = " Offset " + startOffset + " and length " + length
+ " don't match block " + block + " ( blockLen " + endOffset + " )";
LOG.warn("sendBlock() : " + msg);
throw new IOException(msg);
}
offset = (startOffset - (startOffset % chunkSize));
if (length >= 0) {
// Make sure endOffset points to end of a checksumed chunk.
long tmpLen = startOffset + length;
if (tmpLen % chunkSize != 0) {
tmpLen += (chunkSize - tmpLen % chunkSize);
}
if (tmpLen < endOffset) {
// will use on-disk checksum here since the end is a stable chunk
endOffset = tmpLen;
}
}
// seek to the right offsets
if (offset > 0) {
long checksumSkip = (offset / chunkSize) * checksumSize;
// note blockInStream is seeked when created below
if (checksumSkip > 0) {
// Should we use seek() for checksum file as well?
IOUtils.skipFully(checksumIn, checksumSkip);
}
}
seqno = 0;
blockIn = streamFactory.createStream(offset);
} catch (IOException ioe) {
IOUtils.closeStream(this);
IOUtils.closeStream(blockIn);
throw ioe;
}
}
/**
* close opened files.
*/
public void close() throws IOException {
IOException ioe = null;
// close checksum file
if(checksumIn!=null) {
try {
checksumIn.close();
} catch (IOException e) {
ioe = e;
}
checksumIn = null;
}
// close data file
if(blockIn!=null) {
try {
blockIn.close();
} catch (IOException e) {
ioe = e;
}
blockIn = null;
}
// throw IOException if there is any
if(ioe!= null) {
throw ioe;
}
}
/**
* Converts an IOExcpetion (not subclasses) to SocketException.
* This is typically done to indicate to upper layers that the error
* was a socket error rather than often more serious exceptions like
* disk errors.
*/
private static IOException ioeToSocketException(IOException ioe) {
if (ioe.getClass().equals(IOException.class)) {
// "se" could be a new class in stead of SocketException.
IOException se = new SocketException("Original Exception : " + ioe);
se.initCause(ioe);
/* Change the stacktrace so that original trace is not truncated
* when printed.*/
se.setStackTrace(ioe.getStackTrace());
return se;
}
// otherwise just return the same exception.
return ioe;
}
/**
* @param datalen Length of data
* @return number of chunks for data of given size
*/
private int numberOfChunks(long datalen) {
return (int) ((datalen + chunkSize - 1)/chunkSize);
}
/**
* Write packet header into {@code pkt}
*/
private void writePacketHeader(ByteBuffer pkt, int dataLen, int packetLen) {
pkt.clear();
PacketHeader header = new PacketHeader(packetLen, offset, seqno,
(dataLen == 0), dataLen, false);
header.putInBuffer(pkt);
}
/**
* Read checksum into given buffer
* @param buf buffer to read the checksum into
* @param checksumOffset offset at which to write the checksum into buf
* @param checksumLen length of checksum to write
* @throws IOException on error
*/
private void readChecksum(byte[] buf, final int checksumOffset,
final int checksumLen) throws IOException {
if (checksumSize <= 0 && checksumIn == null) {
return;
}
try {
checksumIn.readFully(buf, checksumOffset, checksumLen);
} catch (IOException e) {
LOG.warn(" Could not read or failed to veirfy checksum for data"
+ " at offset " + offset + " for block " + block, e);
IOUtils.closeStream(checksumIn);
checksumIn = null;
if (corruptChecksumOk) {
if (checksumOffset < checksumLen) {
// Just fill the array with zeros.
Arrays.fill(buf, checksumOffset, checksumLen, (byte) 0);
}
} else {
throw e;
}
}
}
/**
* Sends a packet with up to maxChunks chunks of data.
*
* @param pkt buffer used for writing packet data
* @param maxChunks maximum number of chunks to send
* @param out stream to send data to
* @param transferTo use transferTo to send data
* @param throttler used for throttling data transfer bandwidth
*/
private int sendPacket(ByteBuffer pkt, int maxChunks, OutputStream out,
boolean transferTo, DataTransferThrottler throttler) throws IOException {
int dataLen = (int) Math.min(endOffset - offset,
(chunkSize * (long) maxChunks));
int numChunks = numberOfChunks(dataLen); // Number of chunks be sent in the packet
int checksumDataLen = numChunks * checksumSize;
int packetLen = dataLen + checksumDataLen + 4;
boolean lastDataPacket = offset + dataLen == endOffset && dataLen > 0;
writePacketHeader(pkt, dataLen, packetLen);
int checksumOff = pkt.position();
byte[] buf = pkt.array();
if (checksumSize > 0 && checksumIn != null) {
readChecksum(buf, checksumOff, checksumDataLen);
// write in progress that we need to use to get last checksum
if (lastDataPacket && lastChunkChecksum != null) {
int start = checksumOff + checksumDataLen - checksumSize;
byte[] updatedChecksum = lastChunkChecksum.getChecksum();
if (updatedChecksum != null) {
System.arraycopy(updatedChecksum, 0, buf, start, checksumSize);
}
}
}
int dataOff = checksumOff + checksumDataLen;
if (!transferTo) { // normal transfer
IOUtils.readFully(blockIn, buf, dataOff, dataLen);
if (verifyChecksum) {
verifyChecksum(buf, dataOff, dataLen, numChunks, checksumOff);
}
}
try {
if (transferTo) {
SocketOutputStream sockOut = (SocketOutputStream)out;
sockOut.write(buf, 0, dataOff); // First write checksum
// no need to flush. since we know out is not a buffered stream.
sockOut.transferToFully(((FileInputStream)blockIn).getChannel(),
blockInPosition, dataLen);
blockInPosition += dataLen;
} else {
// normal transfer
out.write(buf, 0, dataOff + dataLen);
}
} catch (IOException e) {
/* Exception while writing to the client. Connection closure from
* the other end is mostly the case and we do not care much about
* it. But other things can go wrong, especially in transferTo(),
* which we do not want to ignore.
*
* The message parsing below should not be considered as a good
* coding example. NEVER do it to drive a program logic. NEVER.
* It was done here because the NIO throws an IOException for EPIPE.
*/
String ioem = e.getMessage();
if (!ioem.startsWith("Broken pipe") && !ioem.startsWith("Connection reset")) {
LOG.error("BlockSender.sendChunks() exception: ", e);
}
throw ioeToSocketException(e);
}
if (throttler != null) { // rebalancing so throttle
throttler.throttle(packetLen);
}
return dataLen;
}
/**
* Compute checksum for chunks and verify the checksum that is read from
* the metadata file is correct.
*
* @param buf buffer that has checksum and data
* @param dataOffset position where data is written in the buf
* @param datalen length of data
* @param numChunks number of chunks corresponding to data
* @param checksumOffset offset where checksum is written in the buf
* @throws ChecksumException on failed checksum verification
*/
public void verifyChecksum(final byte[] buf, final int dataOffset,
final int datalen, final int numChunks, final int checksumOffset)
throws ChecksumException {
int dOff = dataOffset;
int cOff = checksumOffset;
int dLeft = datalen;
for (int i = 0; i < numChunks; i++) {
checksum.reset();
int dLen = Math.min(dLeft, chunkSize);
checksum.update(buf, dOff, dLen);
if (!checksum.compare(buf, cOff)) {
long failedPos = offset + datalen - dLeft;
throw new ChecksumException("Checksum failed at " + failedPos,
failedPos);
}
dLeft -= dLen;
dOff += dLen;
cOff += checksumSize;
}
}
/**
* sendBlock() is used to read block and its metadata and stream the data to
* either a client or to another datanode.
*
* @param out stream to which the block is written to
* @param baseStream optional. if non-null, <code>out</code> is assumed to
* be a wrapper over this stream. This enables optimizations for
* sending the data, e.g.
* {@link SocketOutputStream#transferToFully(FileChannel,
* long, int)}.
* @return total bytes reads, including crc.
*/
public long sendBlock(DataOutputStream out, OutputStream baseStream)
throws IOException {
if (out == null) {
throw new IOException( "out stream is null" );
}
initialOffset = offset;
long totalRead = 0;
OutputStream streamForSendChunks = out;
final long startTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0;
try {
int maxChunksPerPacket;
int pktSize = PacketHeader.PKT_HEADER_LEN;
boolean transferTo = transferToAllowed && !verifyChecksum
&& baseStream instanceof SocketOutputStream
&& blockIn instanceof FileInputStream;
if (transferTo) {
FileChannel fileChannel = ((FileInputStream)blockIn).getChannel();
blockInPosition = fileChannel.position();
streamForSendChunks = baseStream;
maxChunksPerPacket = numberOfChunks(TRANSFERTO_BUFFER_SIZE);
// Smaller packet size to only hold checksum when doing transferTo
pktSize += checksumSize * maxChunksPerPacket;
} else {
maxChunksPerPacket = Math.max(1,
numberOfChunks(HdfsConstants.IO_FILE_BUFFER_SIZE));
// Packet size includes both checksum and data
pktSize += (chunkSize + checksumSize) * maxChunksPerPacket;
}
ByteBuffer pktBuf = ByteBuffer.allocate(pktSize);
while (endOffset > offset) {
long len = sendPacket(pktBuf, maxChunksPerPacket, streamForSendChunks,
transferTo, null);
offset += len;
totalRead += len + (numberOfChunks(len) * checksumSize);
seqno++;
}
try {
// send an empty packet to mark the end of the block
sendPacket(pktBuf, maxChunksPerPacket, streamForSendChunks, transferTo,
null);
out.flush();
} catch (IOException e) { //socket error
throw ioeToSocketException(e);
}
blockReadFully = true;
} finally {
if (clientTraceFmt != null) {
final long endTime = System.nanoTime();
ClientTraceLog.info(String.format(clientTraceFmt, totalRead,
initialOffset, endTime - startTime));
}
close();
}
return totalRead;
}
boolean isBlockReadFully() {
return blockReadFully;
}
public static interface InputStreamFactory {
public InputStream createStream(long offset) throws IOException;
}
/**
* @return the checksum type that will be used with this block transfer.
*/
public DataChecksum getChecksum() {
return checksum;
}
private static class BlockInputStreamFactory implements InputStreamFactory {
private final ExtendedBlock block;
private final FsDatasetSpi<?> data;
private BlockInputStreamFactory(ExtendedBlock block, FsDatasetSpi<?> data) {
this.block = block;
this.data = data;
}
@Override
public InputStream createStream(long offset) throws IOException {
return data.getBlockInputStream(block, offset);
}
}
}

View File

@ -1,56 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import java.io.*;
import org.apache.hadoop.classification.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.hdfs.protocol.*;
import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.security.AccessControlException;
/** Utilities used by RAID for accessing NameNode. */
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class NameNodeRaidUtil {
/** Accessing FSDirectory.getFileInfo(..) */
public static HdfsFileStatus getFileInfo(final FSDirectory dir,
final String src, final boolean resolveLink
) throws UnresolvedLinkException {
return dir.getFileInfo(src, resolveLink);
}
/** Accessing FSNamesystem.getFileInfo(..)
* @throws StandbyException */
public static HdfsFileStatus getFileInfo(final FSNamesystem namesystem,
final String src, final boolean resolveLink
) throws AccessControlException, UnresolvedLinkException, StandbyException {
return namesystem.getFileInfo(src, resolveLink);
}
/** Accessing FSNamesystem.getBlockLocations(..) */
public static LocatedBlocks getBlockLocations(final FSNamesystem namesystem,
final String src, final long offset, final long length,
final boolean doAccessTime, final boolean needBlockToken
) throws FileNotFoundException, UnresolvedLinkException, IOException {
return namesystem.getBlockLocations(src, offset, length,
doAccessTime, needBlockToken, true);
}
}

View File

@ -1,840 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.nio.channels.SocketChannel;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.datatransfer.*;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.RaidBlockSender;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.RaidDFSUtil;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.raid.RaidNode;
import org.apache.hadoop.raid.RaidUtils;
/**
* contains the core functionality of the block fixer
*
* configuration options:
* raid.blockfix.classname - the class name of the block fixer
* implementation to use
*
* raid.blockfix.interval - interval between checks for corrupt files
*
* raid.blockfix.history.interval - interval before fixing same file again
*
* raid.blockfix.read.timeout - read time out
*
* raid.blockfix.write.timeout - write time out
*/
public abstract class BlockFixer extends Configured implements Runnable {
public static final String BLOCKFIX_CLASSNAME = "raid.blockfix.classname";
public static final String BLOCKFIX_INTERVAL = "raid.blockfix.interval";
public static final String BLOCKFIX_HISTORY_INTERVAL =
"raid.blockfix.history.interval";
public static final String BLOCKFIX_READ_TIMEOUT =
"raid.blockfix.read.timeout";
public static final String BLOCKFIX_WRITE_TIMEOUT =
"raid.blockfix.write.timeout";
public static final long DEFAULT_BLOCKFIX_INTERVAL = 60 * 1000; // 1 min
public static final long DEFAULT_BLOCKFIX_HISTORY_INTERVAL =
60 * 60 * 1000; // 60 mins
public static BlockFixer createBlockFixer(Configuration conf)
throws ClassNotFoundException {
try {
// default to distributed block fixer
Class<?> blockFixerClass =
conf.getClass(BLOCKFIX_CLASSNAME, DistBlockFixer.class);
if (!BlockFixer.class.isAssignableFrom(blockFixerClass)) {
throw new ClassNotFoundException("not an implementation of blockfixer");
}
Constructor<?> constructor =
blockFixerClass.getConstructor(new Class[] {Configuration.class} );
return (BlockFixer) constructor.newInstance(conf);
} catch (NoSuchMethodException e) {
throw new ClassNotFoundException("cannot construct blockfixer", e);
} catch (InstantiationException e) {
throw new ClassNotFoundException("cannot construct blockfixer", e);
} catch (IllegalAccessException e) {
throw new ClassNotFoundException("cannot construct blockfixer", e);
} catch (InvocationTargetException e) {
throw new ClassNotFoundException("cannot construct blockfixer", e);
}
}
private long numFilesFixed = 0;
public volatile boolean running = true;
// interval between checks for corrupt files
protected long blockFixInterval;
// interval before fixing same file again
protected long historyInterval;
public BlockFixer(Configuration conf) {
super(conf);
blockFixInterval =
getConf().getLong(BLOCKFIX_INTERVAL, DEFAULT_BLOCKFIX_INTERVAL);
historyInterval =
getConf().getLong(BLOCKFIX_HISTORY_INTERVAL,
DEFAULT_BLOCKFIX_HISTORY_INTERVAL);
}
@Override
public abstract void run();
/**
* returns the number of files that have been fixed by this block fixer
*/
public synchronized long filesFixed() {
return numFilesFixed;
}
/**
* increments the number of files that have been fixed by this block fixer
*/
protected synchronized void incrFilesFixed() {
numFilesFixed++;
}
/**
* increments the number of files that have been fixed by this block fixer
*/
protected synchronized void incrFilesFixed(long incr) {
if (incr < 0) {
throw new IllegalArgumentException("cannot increment by negative value " +
incr);
}
numFilesFixed += incr;
}
static boolean isSourceFile(Path p, String[] destPrefixes) {
String pathStr = p.toUri().getPath();
for (String destPrefix: destPrefixes) {
if (pathStr.startsWith(destPrefix)) {
return false;
}
}
return true;
}
void filterUnfixableSourceFiles(Iterator<Path> it) throws IOException {
String xorPrefix = RaidNode.xorDestinationPath(getConf()).toUri().getPath();
if (!xorPrefix.endsWith(Path.SEPARATOR)) {
xorPrefix += Path.SEPARATOR;
}
String rsPrefix = RaidNode.rsDestinationPath(getConf()).toUri().getPath();
if (!rsPrefix.endsWith(Path.SEPARATOR)) {
rsPrefix += Path.SEPARATOR;
}
String[] destPrefixes = new String[]{xorPrefix, rsPrefix};
while (it.hasNext()) {
Path p = it.next();
if (isSourceFile(p, destPrefixes) &&
RaidNode.xorParityForSource(p, getConf()) == null &&
RaidNode.rsParityForSource(p, getConf()) == null) {
it.remove();
}
}
}
/**
* this class implements the actual fixing functionality
* we keep this in a separate class so that
* the distributed block fixer can use it
*/
static class BlockFixerHelper extends Configured {
public static final Log LOG = LogFactory.getLog(BlockFixer.
BlockFixerHelper.class);
private String xorPrefix;
private String rsPrefix;
private XOREncoder xorEncoder;
private XORDecoder xorDecoder;
private ReedSolomonEncoder rsEncoder;
private ReedSolomonDecoder rsDecoder;
public BlockFixerHelper(Configuration conf) throws IOException {
super(conf);
xorPrefix = RaidNode.xorDestinationPath(getConf()).toUri().getPath();
if (!xorPrefix.endsWith(Path.SEPARATOR)) {
xorPrefix += Path.SEPARATOR;
}
rsPrefix = RaidNode.rsDestinationPath(getConf()).toUri().getPath();
if (!rsPrefix.endsWith(Path.SEPARATOR)) {
rsPrefix += Path.SEPARATOR;
}
int stripeLength = RaidNode.getStripeLength(getConf());
xorEncoder = new XOREncoder(getConf(), stripeLength);
xorDecoder = new XORDecoder(getConf(), stripeLength);
int parityLength = RaidNode.rsParityLength(getConf());
rsEncoder = new ReedSolomonEncoder(getConf(), stripeLength, parityLength);
rsDecoder = new ReedSolomonDecoder(getConf(), stripeLength, parityLength);
}
/**
* checks whether file is xor parity file
*/
boolean isXorParityFile(Path p) {
String pathStr = p.toUri().getPath();
if (pathStr.contains(RaidNode.HAR_SUFFIX)) {
return false;
}
return pathStr.startsWith(xorPrefix);
}
/**
* checks whether file is rs parity file
*/
boolean isRsParityFile(Path p) {
String pathStr = p.toUri().getPath();
if (pathStr.contains(RaidNode.HAR_SUFFIX)) {
return false;
}
return pathStr.startsWith(rsPrefix);
}
/**
* Fix a file, do not report progess.
*
* @return true if file has been fixed, false if no fixing
* was necessary or possible.
*/
boolean fixFile(Path srcPath) throws IOException {
return fixFile(srcPath, new RaidUtils.DummyProgressable());
}
/**
* Fix a file, report progess.
*
* @return true if file has been fixed, false if no fixing
* was necessary or possible.
*/
boolean fixFile(Path srcPath, Progressable progress) throws IOException {
if (RaidNode.isParityHarPartFile(srcPath)) {
return processCorruptParityHarPartFile(srcPath, progress);
}
// The corrupted file is a XOR parity file
if (isXorParityFile(srcPath)) {
return processCorruptParityFile(srcPath, xorEncoder, progress);
}
// The corrupted file is a ReedSolomon parity file
if (isRsParityFile(srcPath)) {
return processCorruptParityFile(srcPath, rsEncoder, progress);
}
// The corrupted file is a source file
RaidNode.ParityFilePair ppair =
RaidNode.xorParityForSource(srcPath, getConf());
Decoder decoder = null;
if (ppair != null) {
decoder = xorDecoder;
} else {
ppair = RaidNode.rsParityForSource(srcPath, getConf());
if (ppair != null) {
decoder = rsDecoder;
}
}
// If we have a parity file, process the file and fix it.
if (ppair != null) {
return processCorruptFile(srcPath, ppair, decoder, progress);
}
// there was nothing to do
return false;
}
/**
* Sorts source files ahead of parity files.
*/
void sortCorruptFiles(List<Path> files) {
// TODO: We should first fix the files that lose more blocks
Comparator<Path> comp = new Comparator<Path>() {
public int compare(Path p1, Path p2) {
if (isXorParityFile(p2) || isRsParityFile(p2)) {
// If p2 is a parity file, p1 is smaller.
return -1;
}
if (isXorParityFile(p1) || isRsParityFile(p1)) {
// If p1 is a parity file, p2 is smaller.
return 1;
}
// If both are source files, they are equal.
return 0;
}
};
Collections.sort(files, comp);
}
/**
* Returns a DistributedFileSystem hosting the path supplied.
*/
protected DistributedFileSystem getDFS(Path p) throws IOException {
return (DistributedFileSystem) p.getFileSystem(getConf());
}
/**
* Reads through a corrupt source file fixing corrupt blocks on the way.
* @param srcPath Path identifying the corrupt file.
* @throws IOException
* @return true if file has been fixed, false if no fixing
* was necessary or possible.
*/
boolean processCorruptFile(Path srcPath, RaidNode.ParityFilePair parityPair,
Decoder decoder, Progressable progress)
throws IOException {
LOG.info("Processing corrupt file " + srcPath);
DistributedFileSystem srcFs = getDFS(srcPath);
FileStatus srcStat = srcFs.getFileStatus(srcPath);
long blockSize = srcStat.getBlockSize();
long srcFileSize = srcStat.getLen();
String uriPath = srcPath.toUri().getPath();
int numBlocksFixed = 0;
List<LocatedBlock> corrupt =
RaidDFSUtil.corruptBlocksInFile(srcFs, uriPath, 0, srcFileSize);
if (corrupt.size() == 0) {
return false;
}
for (LocatedBlock lb: corrupt) {
ExtendedBlock corruptBlock = lb.getBlock();
long corruptOffset = lb.getStartOffset();
LOG.info("Found corrupt block " + corruptBlock +
", offset " + corruptOffset);
final long blockContentsSize =
Math.min(blockSize, srcFileSize - corruptOffset);
File localBlockFile =
File.createTempFile(corruptBlock.getBlockName(), ".tmp");
localBlockFile.deleteOnExit();
try {
decoder.recoverBlockToFile(srcFs, srcPath, parityPair.getFileSystem(),
parityPair.getPath(), blockSize,
corruptOffset, localBlockFile,
blockContentsSize);
// We have a the contents of the block, send them.
DatanodeInfo datanode = chooseDatanode(lb.getLocations());
computeMetadataAndSendFixedBlock(datanode, localBlockFile,
lb, blockContentsSize);
numBlocksFixed++;
} finally {
localBlockFile.delete();
}
progress.progress();
}
LOG.info("Fixed " + numBlocksFixed + " blocks in " + srcPath);
return true;
}
/**
* Fixes corrupt blocks in a parity file.
* This function uses the corresponding source file to regenerate parity
* file blocks.
* @return true if file has been fixed, false if no fixing
* was necessary or possible.
*/
boolean processCorruptParityFile(Path parityPath, Encoder encoder,
Progressable progress)
throws IOException {
LOG.info("Processing corrupt file " + parityPath);
Path srcPath = sourcePathFromParityPath(parityPath);
if (srcPath == null) {
LOG.warn("Unusable parity file " + parityPath);
return false;
}
DistributedFileSystem parityFs = getDFS(parityPath);
FileStatus parityStat = parityFs.getFileStatus(parityPath);
long blockSize = parityStat.getBlockSize();
long parityFileSize = parityStat.getLen();
FileStatus srcStat = getDFS(srcPath).getFileStatus(srcPath);
long srcFileSize = srcStat.getLen();
// Check timestamp.
if (srcStat.getModificationTime() != parityStat.getModificationTime()) {
LOG.info("Mismatching timestamp for " + srcPath + " and " + parityPath +
", moving on...");
return false;
}
String uriPath = parityPath.toUri().getPath();
int numBlocksFixed = 0;
List<LocatedBlock> corrupt =
RaidDFSUtil.corruptBlocksInFile(parityFs, uriPath, 0, parityFileSize);
if (corrupt.size() == 0) {
return false;
}
for (LocatedBlock lb: corrupt) {
ExtendedBlock corruptBlock = lb.getBlock();
long corruptOffset = lb.getStartOffset();
LOG.info("Found corrupt block " + corruptBlock +
", offset " + corruptOffset);
File localBlockFile =
File.createTempFile(corruptBlock.getBlockName(), ".tmp");
localBlockFile.deleteOnExit();
try {
encoder.recoverParityBlockToFile(parityFs, srcPath, srcFileSize,
blockSize, parityPath,
corruptOffset, localBlockFile);
// We have a the contents of the block, send them.
DatanodeInfo datanode = chooseDatanode(lb.getLocations());
computeMetadataAndSendFixedBlock(datanode, localBlockFile, lb,
blockSize);
numBlocksFixed++;
} finally {
localBlockFile.delete();
}
progress.progress();
}
LOG.info("Fixed " + numBlocksFixed + " blocks in " + parityPath);
return true;
}
/**
* Reads through a parity HAR part file, fixing corrupt blocks on the way.
* A HAR block can contain many file blocks, as long as the HAR part file
* block size is a multiple of the file block size.
* @return true if file has been fixed, false if no fixing
* was necessary or possible.
*/
boolean processCorruptParityHarPartFile(Path partFile,
Progressable progress)
throws IOException {
LOG.info("Processing corrupt file " + partFile);
// Get some basic information.
DistributedFileSystem dfs = getDFS(partFile);
FileStatus partFileStat = dfs.getFileStatus(partFile);
long partFileSize = partFileStat.getLen();
long partFileBlockSize = partFileStat.getBlockSize();
LOG.info(partFile + " has block size " + partFileBlockSize);
// Find the path to the index file.
// Parity file HARs are only one level deep, so the index files is at the
// same level as the part file.
String harDirectory = partFile.toUri().getPath(); // Temporarily.
harDirectory =
harDirectory.substring(0, harDirectory.lastIndexOf(Path.SEPARATOR));
Path indexFile = new Path(harDirectory + "/" + HarIndex.indexFileName);
FileStatus indexStat = dfs.getFileStatus(indexFile);
// Parses through the HAR index file.
HarIndex harIndex = new HarIndex(dfs.open(indexFile), indexStat.getLen());
String uriPath = partFile.toUri().getPath();
int numBlocksFixed = 0;
List<LocatedBlock> corrupt =
RaidDFSUtil.corruptBlocksInFile(dfs, uriPath, 0, partFileSize);
if (corrupt.size() == 0) {
return false;
}
for (LocatedBlock lb: corrupt) {
ExtendedBlock corruptBlock = lb.getBlock();
long corruptOffset = lb.getStartOffset();
File localBlockFile =
File.createTempFile(corruptBlock.getBlockName(), ".tmp");
localBlockFile.deleteOnExit();
processCorruptParityHarPartBlock(dfs, partFile, corruptBlock,
corruptOffset, partFileStat, harIndex,
localBlockFile, progress);
// Now we have recovered the part file block locally, send it.
try {
DatanodeInfo datanode = chooseDatanode(lb.getLocations());
computeMetadataAndSendFixedBlock(datanode, localBlockFile,
lb, localBlockFile.length());
numBlocksFixed++;
} finally {
localBlockFile.delete();
}
progress.progress();
}
LOG.info("Fixed " + numBlocksFixed + " blocks in " + partFile);
return true;
}
/**
* This fixes a single part file block by recovering in sequence each
* parity block in the part file block.
*/
private void processCorruptParityHarPartBlock(FileSystem dfs, Path partFile,
ExtendedBlock corruptBlock,
long corruptOffset,
FileStatus partFileStat,
HarIndex harIndex,
File localBlockFile,
Progressable progress)
throws IOException {
String partName = partFile.toUri().getPath(); // Temporarily.
partName = partName.substring(1 + partName.lastIndexOf(Path.SEPARATOR));
OutputStream out = new FileOutputStream(localBlockFile);
try {
// A HAR part file block could map to several parity files. We need to
// use all of them to recover this block.
final long corruptEnd = Math.min(corruptOffset +
partFileStat.getBlockSize(),
partFileStat.getLen());
for (long offset = corruptOffset; offset < corruptEnd; ) {
HarIndex.IndexEntry entry = harIndex.findEntry(partName, offset);
if (entry == null) {
String msg = "Corrupt index file has no matching index entry for " +
partName + ":" + offset;
LOG.warn(msg);
throw new IOException(msg);
}
Path parityFile = new Path(entry.fileName);
Encoder encoder;
if (isXorParityFile(parityFile)) {
encoder = xorEncoder;
} else if (isRsParityFile(parityFile)) {
encoder = rsEncoder;
} else {
String msg = "Could not figure out parity file correctly";
LOG.warn(msg);
throw new IOException(msg);
}
Path srcFile = sourcePathFromParityPath(parityFile);
FileStatus srcStat = dfs.getFileStatus(srcFile);
if (srcStat.getModificationTime() != entry.mtime) {
String msg = "Modification times of " + parityFile + " and " +
srcFile + " do not match.";
LOG.warn(msg);
throw new IOException(msg);
}
long corruptOffsetInParity = offset - entry.startOffset;
LOG.info(partFile + ":" + offset + " maps to " +
parityFile + ":" + corruptOffsetInParity +
" and will be recovered from " + srcFile);
encoder.recoverParityBlockToStream(dfs, srcFile, srcStat.getLen(),
srcStat.getBlockSize(), parityFile,
corruptOffsetInParity, out);
// Finished recovery of one parity block. Since a parity block has the
// same size as a source block, we can move offset by source block size.
offset += srcStat.getBlockSize();
LOG.info("Recovered " + srcStat.getBlockSize() + " part file bytes ");
if (offset > corruptEnd) {
String msg =
"Recovered block spills across part file blocks. Cannot continue.";
throw new IOException(msg);
}
progress.progress();
}
} finally {
out.close();
}
}
/**
* Choose a datanode (hostname:portnumber). The datanode is chosen at
* random from the live datanodes.
* @param locationsToAvoid locations to avoid.
* @return A datanode
* @throws IOException
*/
private DatanodeInfo chooseDatanode(DatanodeInfo[] locationsToAvoid)
throws IOException {
DistributedFileSystem dfs = getDFS(new Path("/"));
DatanodeInfo[] live =
dfs.getClient().datanodeReport(DatanodeReportType.LIVE);
LOG.info("Choosing a datanode from " + live.length +
" live nodes while avoiding " + locationsToAvoid.length);
Random rand = new Random();
DatanodeInfo chosen = null;
int maxAttempts = 1000;
for (int i = 0; i < maxAttempts && chosen == null; i++) {
int idx = rand.nextInt(live.length);
chosen = live[idx];
for (DatanodeInfo avoid: locationsToAvoid) {
if (chosen.getName().equals(avoid.getName())) {
LOG.info("Avoiding " + avoid.getName());
chosen = null;
break;
}
}
}
if (chosen == null) {
throw new IOException("Could not choose datanode");
}
LOG.info("Choosing datanode " + chosen.getName());
return chosen;
}
/**
* Reads data from the data stream provided and computes metadata.
*/
static DataInputStream computeMetadata(Configuration conf,
InputStream dataStream)
throws IOException {
ByteArrayOutputStream mdOutBase = new ByteArrayOutputStream(1024*1024);
DataOutputStream mdOut = new DataOutputStream(mdOutBase);
// First, write out the version.
mdOut.writeShort(BlockMetadataHeader.VERSION);
// Create a summer and write out its header.
int bytesPerChecksum = conf.getInt("dfs.bytes-per-checksum", 512);
DataChecksum sum =
DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
bytesPerChecksum);
sum.writeHeader(mdOut);
// Buffer to read in a chunk of data.
byte[] buf = new byte[bytesPerChecksum];
// Buffer to store the checksum bytes.
byte[] chk = new byte[sum.getChecksumSize()];
// Read data till we reach the end of the input stream.
int bytesSinceFlush = 0;
while (true) {
// Read some bytes.
int bytesRead = dataStream.read(buf, bytesSinceFlush,
bytesPerChecksum-bytesSinceFlush);
if (bytesRead == -1) {
if (bytesSinceFlush > 0) {
boolean reset = true;
sum.writeValue(chk, 0, reset); // This also resets the sum.
// Write the checksum to the stream.
mdOut.write(chk, 0, chk.length);
bytesSinceFlush = 0;
}
break;
}
// Update the checksum.
sum.update(buf, bytesSinceFlush, bytesRead);
bytesSinceFlush += bytesRead;
// Flush the checksum if necessary.
if (bytesSinceFlush == bytesPerChecksum) {
boolean reset = true;
sum.writeValue(chk, 0, reset); // This also resets the sum.
// Write the checksum to the stream.
mdOut.write(chk, 0, chk.length);
bytesSinceFlush = 0;
}
}
byte[] mdBytes = mdOutBase.toByteArray();
return new DataInputStream(new ByteArrayInputStream(mdBytes));
}
private void computeMetadataAndSendFixedBlock(DatanodeInfo datanode,
File localBlockFile,
LocatedBlock block,
long blockSize)
throws IOException {
LOG.info("Computing metdata");
InputStream blockContents = null;
DataInputStream blockMetadata = null;
try {
blockContents = new FileInputStream(localBlockFile);
blockMetadata = computeMetadata(getConf(), blockContents);
blockContents.close();
// Reopen
blockContents = new FileInputStream(localBlockFile);
sendFixedBlock(datanode, blockContents, blockMetadata, block,
blockSize);
} finally {
if (blockContents != null) {
blockContents.close();
blockContents = null;
}
if (blockMetadata != null) {
blockMetadata.close();
blockMetadata = null;
}
}
}
/**
* Send a generated block to a datanode.
* @param datanode Chosen datanode name in host:port form.
* @param blockContents Stream with the block contents.
* @param corruptBlock Block identifying the block to be sent.
* @param blockSize size of the block.
* @throws IOException
*/
private void sendFixedBlock(DatanodeInfo datanode,
final InputStream blockContents,
DataInputStream metadataIn,
LocatedBlock block, long blockSize)
throws IOException {
InetSocketAddress target = NetUtils.createSocketAddr(datanode.getName());
Socket sock = SocketChannel.open().socket();
int readTimeout =
getConf().getInt(BLOCKFIX_READ_TIMEOUT,
HdfsServerConstants.READ_TIMEOUT);
NetUtils.connect(sock, target, readTimeout);
sock.setSoTimeout(readTimeout);
int writeTimeout = getConf().getInt(BLOCKFIX_WRITE_TIMEOUT,
HdfsServerConstants.WRITE_TIMEOUT);
OutputStream baseStream = NetUtils.getOutputStream(sock, writeTimeout);
DataOutputStream out =
new DataOutputStream(new BufferedOutputStream(baseStream,
HdfsConstants.
SMALL_BUFFER_SIZE));
boolean corruptChecksumOk = false;
boolean chunkOffsetOK = false;
boolean verifyChecksum = true;
boolean transferToAllowed = false;
try {
LOG.info("Sending block " + block.getBlock() +
" from " + sock.getLocalSocketAddress().toString() +
" to " + sock.getRemoteSocketAddress().toString() +
" " + blockSize + " bytes");
RaidBlockSender blockSender =
new RaidBlockSender(block.getBlock(), blockSize, 0, blockSize,
corruptChecksumOk, chunkOffsetOK, verifyChecksum,
transferToAllowed, metadataIn,
new RaidBlockSender.InputStreamFactory() {
@Override
public InputStream
createStream(long offset) throws IOException {
// we are passing 0 as the offset above,
// so we can safely ignore
// the offset passed
return blockContents;
}
});
DatanodeInfo[] nodes = new DatanodeInfo[]{datanode};
DataChecksum checksum = blockSender.getChecksum();
new Sender(out).writeBlock(block.getBlock(), block.getBlockToken(), "",
nodes, null, BlockConstructionStage.PIPELINE_SETUP_CREATE,
1, 0L, blockSize, 0L, DataChecksum.newDataChecksum(
checksum.getChecksumType(), checksum.getBytesPerChecksum()));
blockSender.sendBlock(out, baseStream);
LOG.info("Sent block " + block.getBlock() + " to " + datanode.getName());
} finally {
out.close();
}
}
/**
* returns the source file corresponding to a parity file
*/
Path sourcePathFromParityPath(Path parityPath) {
String parityPathStr = parityPath.toUri().getPath();
if (parityPathStr.startsWith(xorPrefix)) {
// Remove the prefix to get the source file.
String src = parityPathStr.replaceFirst(xorPrefix, "/");
return new Path(src);
} else if (parityPathStr.startsWith(rsPrefix)) {
// Remove the prefix to get the source file.
String src = parityPathStr.replaceFirst(rsPrefix, "/");
return new Path(src);
}
return null;
}
/**
* Returns the corrupt blocks in a file.
*/
List<LocatedBlock> corruptBlocksInFile(DistributedFileSystem fs,
String uriPath, FileStatus stat)
throws IOException {
List<LocatedBlock> corrupt = new LinkedList<LocatedBlock>();
LocatedBlocks locatedBlocks =
RaidDFSUtil.getBlockLocations(fs, uriPath, 0, stat.getLen());
for (LocatedBlock b: locatedBlocks.getLocatedBlocks()) {
if (b.isCorrupt() ||
(b.getLocations().length == 0 && b.getBlockSize() > 0)) {
corrupt.add(b);
}
}
return corrupt;
}
}
}

View File

@ -1,408 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.raid.protocol.PolicyList;
/**
* Maintains the configuration xml file that is read into memory.
*/
class ConfigManager {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.ConfigManager");
/** Time to wait between checks of the config file */
public static final long RELOAD_INTERVAL = 10 * 1000;
/** Time to wait between successive runs of all policies */
public static final long RESCAN_INTERVAL = 3600 * 1000;
public static final long HAR_PARTFILE_SIZE = 10 * 1024 * 1024 * 1024l;
public static final int DISTRAID_MAX_JOBS = 10;
public static final int DISTRAID_MAX_FILES = 10000;
/**
* Time to wait after the config file has been modified before reloading it
* (this is done to prevent loading a file that hasn't been fully written).
*/
public static final long RELOAD_WAIT = 5 * 1000;
private Configuration conf; // Hadoop configuration
private String configFileName; // Path to config XML file
private long lastReloadAttempt; // Last time we tried to reload the config file
private long lastSuccessfulReload; // Last time we successfully reloaded config
private boolean lastReloadAttemptFailed = false;
private long reloadInterval = RELOAD_INTERVAL;
private long periodicity; // time between runs of all policies
private long harPartfileSize;
private int maxJobsPerPolicy; // Max no. of jobs running simultaneously for
// a job.
private int maxFilesPerJob; // Max no. of files raided by a job.
// Reload the configuration
private boolean doReload;
private Thread reloadThread;
private volatile boolean running = false;
// Collection of all configured policies.
Collection<PolicyList> allPolicies = new ArrayList<PolicyList>();
public ConfigManager(Configuration conf) throws IOException, SAXException,
RaidConfigurationException, ClassNotFoundException, ParserConfigurationException {
this.conf = conf;
this.configFileName = conf.get("raid.config.file");
this.doReload = conf.getBoolean("raid.config.reload", true);
this.reloadInterval = conf.getLong("raid.config.reload.interval", RELOAD_INTERVAL);
this.periodicity = conf.getLong("raid.policy.rescan.interval", RESCAN_INTERVAL);
this.harPartfileSize = conf.getLong("raid.har.partfile.size", HAR_PARTFILE_SIZE);
this.maxJobsPerPolicy = conf.getInt("raid.distraid.max.jobs",
DISTRAID_MAX_JOBS);
this.maxFilesPerJob = conf.getInt("raid.distraid.max.files",
DISTRAID_MAX_FILES);
if (configFileName == null) {
String msg = "No raid.config.file given in conf - " +
"the Hadoop Raid utility cannot run. Aborting....";
LOG.warn(msg);
throw new IOException(msg);
}
reloadConfigs();
lastSuccessfulReload = RaidNode.now();
lastReloadAttempt = RaidNode.now();
running = true;
}
/**
* Reload config file if it hasn't been loaded in a while
* Returns true if the file was reloaded.
*/
public synchronized boolean reloadConfigsIfNecessary() {
long time = RaidNode.now();
if (time > lastReloadAttempt + reloadInterval) {
lastReloadAttempt = time;
try {
File file = new File(configFileName);
long lastModified = file.lastModified();
if (lastModified > lastSuccessfulReload &&
time > lastModified + RELOAD_WAIT) {
reloadConfigs();
lastSuccessfulReload = time;
lastReloadAttemptFailed = false;
return true;
}
} catch (Exception e) {
if (!lastReloadAttemptFailed) {
LOG.error("Failed to reload config file - " +
"will use existing configuration.", e);
}
lastReloadAttemptFailed = true;
}
}
return false;
}
/**
* Updates the in-memory data structures from the config file. This file is
* expected to be in the following whitespace-separated format:
*
<configuration>
<srcPath prefix="hdfs://hadoop.myhost.com:9000/user/warehouse/u_full/*">
<policy name = RaidScanWeekly>
<destPath> hdfs://dfsname.myhost.com:9000/archive/</destPath>
<parentPolicy> RaidScanMonthly</parentPolicy>
<property>
<name>targetReplication</name>
<value>2</value>
<description> after RAIDing, decrease the replication factor of the file to
this value.
</description>
</property>
<property>
<name>metaReplication</name>
<value>2</value>
<description> the replication factor of the RAID meta file
</description>
</property>
<property>
<name>stripeLength</name>
<value>10</value>
<description> the number of blocks to RAID together
</description>
</property>
</policy>
</srcPath>
</configuration>
*
* Blank lines and lines starting with # are ignored.
*
* @throws IOException if the config file cannot be read.
* @throws RaidConfigurationException if configuration entries are invalid.
* @throws ClassNotFoundException if user-defined policy classes cannot be loaded
* @throws ParserConfigurationException if XML parser is misconfigured.
* @throws SAXException if config file is malformed.
* @returns A new set of policy categories.
*/
void reloadConfigs() throws IOException, ParserConfigurationException,
SAXException, ClassNotFoundException, RaidConfigurationException {
if (configFileName == null) {
return;
}
File file = new File(configFileName);
if (!file.exists()) {
throw new RaidConfigurationException("Configuration file " + configFileName +
" does not exist.");
}
// Create some temporary hashmaps to hold the new allocs, and we only save
// them in our fields if we have parsed the entire allocs file successfully.
List<PolicyList> all = new ArrayList<PolicyList>();
long periodicityValue = periodicity;
// Read and parse the configuration file.
// allow include files in configuration file
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
docBuilderFactory.setIgnoringComments(true);
docBuilderFactory.setNamespaceAware(true);
try {
docBuilderFactory.setXIncludeAware(true);
} catch (UnsupportedOperationException e) {
LOG.error("Failed to set setXIncludeAware(true) for raid parser "
+ docBuilderFactory + ":" + e, e);
}
LOG.error("Reloading config file " + file);
DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
Document doc = builder.parse(file);
Element root = doc.getDocumentElement();
if (!"configuration".equalsIgnoreCase(root.getTagName()))
throw new RaidConfigurationException("Bad configuration file: " +
"top-level element not <configuration>");
NodeList elements = root.getChildNodes();
Map<String, PolicyInfo> existingPolicies =
new HashMap<String, PolicyInfo>();
// loop through all the configured source paths.
for (int i = 0; i < elements.getLength(); i++) {
Node node = elements.item(i);
if (!(node instanceof Element)) {
continue;
}
Element element = (Element)node;
String elementTagName = element.getTagName();
if ("srcPath".equalsIgnoreCase(elementTagName)) {
String srcPathPrefix = element.getAttribute("prefix");
PolicyList policyList = null;
if (srcPathPrefix != null && srcPathPrefix.length() != 0) {
// Empty srcPath will have no effect but policies will be processed
// This allow us to define some "abstract" policies
policyList = new PolicyList();
all.add(policyList);
policyList.setSrcPath(conf, srcPathPrefix);
}
// loop through all the policies for this source path
NodeList policies = element.getChildNodes();
for (int j = 0; j < policies.getLength(); j++) {
Node node1 = policies.item(j);
if (!(node1 instanceof Element)) {
continue;
}
Element policy = (Element)node1;
if (!"policy".equalsIgnoreCase(policy.getTagName())) {
throw new RaidConfigurationException("Bad configuration file: " +
"Expecting <policy> for srcPath " + srcPathPrefix);
}
String policyName = policy.getAttribute("name");
PolicyInfo curr = new PolicyInfo(policyName, conf);
if (srcPathPrefix != null && srcPathPrefix.length() > 0) {
curr.setSrcPath(srcPathPrefix);
}
// loop through all the properties of this policy
NodeList properties = policy.getChildNodes();
PolicyInfo parent = null;
for (int k = 0; k < properties.getLength(); k++) {
Node node2 = properties.item(k);
if (!(node2 instanceof Element)) {
continue;
}
Element property = (Element)node2;
String propertyName = property.getTagName();
if ("erasureCode".equalsIgnoreCase(propertyName)) {
String text = ((Text)property.getFirstChild()).getData().trim();
LOG.info(policyName + ".erasureCode = " + text);
curr.setErasureCode(text);
} else if ("description".equalsIgnoreCase(propertyName)) {
String text = ((Text)property.getFirstChild()).getData().trim();
curr.setDescription(text);
} else if ("parentPolicy".equalsIgnoreCase(propertyName)) {
String text = ((Text)property.getFirstChild()).getData().trim();
parent = existingPolicies.get(text);
} else if ("property".equalsIgnoreCase(propertyName)) {
NodeList nl = property.getChildNodes();
String pname=null,pvalue=null;
for (int l = 0; l < nl.getLength(); l++){
Node node3 = nl.item(l);
if (!(node3 instanceof Element)) {
continue;
}
Element item = (Element) node3;
String itemName = item.getTagName();
if ("name".equalsIgnoreCase(itemName)){
pname = ((Text)item.getFirstChild()).getData().trim();
} else if ("value".equalsIgnoreCase(itemName)){
pvalue = ((Text)item.getFirstChild()).getData().trim();
}
}
if (pname != null && pvalue != null) {
LOG.info(policyName + "." + pname + " = " + pvalue);
curr.setProperty(pname,pvalue);
}
} else {
LOG.warn("Found bad property " + propertyName +
" for srcPath" + srcPathPrefix +
" policy name " + policyName +
". Ignoring.");
}
} // done with all properties of this policy
PolicyInfo pinfo;
if (parent != null) {
pinfo = new PolicyInfo(policyName, conf);
pinfo.copyFrom(parent);
pinfo.copyFrom(curr);
} else {
pinfo = curr;
}
if (policyList != null) {
policyList.add(pinfo);
}
existingPolicies.put(policyName, pinfo);
} // done with all policies for this srcpath
}
} // done with all srcPaths
setAllPolicies(all);
periodicity = periodicityValue;
return;
}
public synchronized long getPeriodicity() {
return periodicity;
}
public synchronized long getHarPartfileSize() {
return harPartfileSize;
}
public synchronized int getMaxJobsPerPolicy() {
return maxJobsPerPolicy;
}
public synchronized int getMaxFilesPerJob() {
return maxFilesPerJob;
}
/**
* Get a collection of all policies
*/
public synchronized Collection<PolicyList> getAllPolicies() {
return new ArrayList(allPolicies);
}
/**
* Set a collection of all policies
*/
protected synchronized void setAllPolicies(Collection<PolicyList> value) {
this.allPolicies = value;
}
/**
* Start a background thread to reload the config file
*/
void startReload() {
if (doReload) {
reloadThread = new UpdateThread();
reloadThread.start();
}
}
/**
* Stop the background thread that reload the config file
*/
void stopReload() throws InterruptedException {
if (reloadThread != null) {
running = false;
reloadThread.interrupt();
reloadThread.join();
reloadThread = null;
}
}
/**
* A thread which reloads the config file.
*/
private class UpdateThread extends Thread {
private UpdateThread() {
super("Raid update thread");
}
public void run() {
while (running) {
try {
Thread.sleep(reloadInterval);
reloadConfigsIfNecessary();
} catch (InterruptedException e) {
// do nothing
} catch (Exception e) {
LOG.error("Failed to reload config file ", e);
}
}
}
}
}

View File

@ -1,213 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.BlockMissingException;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
/**
* Represents a generic decoder that can be used to read a file with
* corrupt blocks by using the parity file.
* This is an abstract class, concrete subclasses need to implement
* fixErasedBlock.
*/
public abstract class Decoder {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.Decoder");
protected Configuration conf;
protected int stripeSize;
protected int paritySize;
protected Random rand;
protected int bufSize;
protected byte[][] readBufs;
protected byte[][] writeBufs;
Decoder(Configuration conf, int stripeSize, int paritySize) {
this.conf = conf;
this.stripeSize = stripeSize;
this.paritySize = paritySize;
this.rand = new Random();
this.bufSize = conf.getInt("raid.decoder.bufsize", 1024 * 1024);
this.readBufs = new byte[stripeSize + paritySize][];
this.writeBufs = new byte[paritySize][];
allocateBuffers();
}
private void allocateBuffers() {
for (int i = 0; i < stripeSize + paritySize; i++) {
readBufs[i] = new byte[bufSize];
}
for (int i = 0; i < paritySize; i++) {
writeBufs[i] = new byte[bufSize];
}
}
private void configureBuffers(long blockSize) {
if ((long)bufSize > blockSize) {
bufSize = (int)blockSize;
allocateBuffers();
} else if (blockSize % bufSize != 0) {
bufSize = (int)(blockSize / 256L); // heuristic.
if (bufSize == 0) {
bufSize = 1024;
}
bufSize = Math.min(bufSize, 1024 * 1024);
allocateBuffers();
}
}
/**
* The interface to generate a decoded file using the good portion of the
* source file and the parity file.
* @param fs The filesystem containing the source file.
* @param srcFile The damaged source file.
* @param parityFs The filesystem containing the parity file. This could be
* different from fs in case the parity file is part of a HAR archive.
* @param parityFile The parity file.
* @param errorOffset Known location of error in the source file. There could
* be additional errors in the source file that are discovered during
* the decode process.
* @param decodedFile The decoded file. This will have the exact same contents
* as the source file on success.
*/
public void decodeFile(
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
long errorOffset, Path decodedFile) throws IOException {
LOG.info("Create " + decodedFile + " for error at " +
srcFile + ":" + errorOffset);
FileStatus srcStat = fs.getFileStatus(srcFile);
long blockSize = srcStat.getBlockSize();
configureBuffers(blockSize);
// Move the offset to the start of the block.
errorOffset = (errorOffset / blockSize) * blockSize;
// Create the decoded file.
FSDataOutputStream out = fs.create(
decodedFile, false, conf.getInt("io.file.buffer.size", 64 * 1024),
srcStat.getReplication(), srcStat.getBlockSize());
// Open the source file.
FSDataInputStream in = fs.open(
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
// Start copying data block-by-block.
for (long offset = 0; offset < srcStat.getLen(); offset += blockSize) {
long limit = Math.min(blockSize, srcStat.getLen() - offset);
long bytesAlreadyCopied = 0;
if (offset != errorOffset) {
try {
in = fs.open(
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
in.seek(offset);
RaidUtils.copyBytes(in, out, readBufs[0], limit);
assert(out.getPos() == offset +limit);
LOG.info("Copied till " + out.getPos() + " from " + srcFile);
continue;
} catch (BlockMissingException e) {
LOG.warn("Encountered BME at " + srcFile + ":" + offset);
bytesAlreadyCopied = out.getPos() - offset;
} catch (ChecksumException e) {
LOG.warn("Encountered CE at " + srcFile + ":" + offset);
bytesAlreadyCopied = out.getPos() - offset;
}
}
// If we are here offset == errorOffset or we got an exception.
// Recover the block starting at offset.
fixErasedBlock(fs, srcFile, parityFs, parityFile, blockSize, offset,
bytesAlreadyCopied, limit, out);
}
out.close();
try {
fs.setOwner(decodedFile, srcStat.getOwner(), srcStat.getGroup());
fs.setPermission(decodedFile, srcStat.getPermission());
fs.setTimes(decodedFile, srcStat.getModificationTime(),
srcStat.getAccessTime());
} catch (Exception exc) {
LOG.warn("Didn't manage to copy meta information because of " + exc +
" Ignoring...");
}
}
/**
* Recovers a corrupt block to local file.
*
* @param srcFs The filesystem containing the source file.
* @param srcPath The damaged source file.
* @param parityFs The filesystem containing the parity file. This could be
* different from fs in case the parity file is part of a HAR archive.
* @param parityPath The parity file.
* @param blockSize The block size of the file.
* @param blockOffset Known location of error in the source file. There could
* be additional errors in the source file that are discovered during
* the decode process.
* @param localBlockFile The file to write the block to.
* @param limit The maximum number of bytes to be written out.
* This is to prevent writing beyond the end of the file.
*/
public void recoverBlockToFile(
FileSystem srcFs, Path srcPath, FileSystem parityFs, Path parityPath,
long blockSize, long blockOffset, File localBlockFile, long limit)
throws IOException {
OutputStream out = new FileOutputStream(localBlockFile);
fixErasedBlock(srcFs, srcPath, parityFs, parityPath,
blockSize, blockOffset, 0, limit, out);
out.close();
}
/**
* Implementation-specific mechanism of writing a fixed block.
* @param fs The filesystem containing the source file.
* @param srcFile The damaged source file.
* @param parityFs The filesystem containing the parity file. This could be
* different from fs in case the parity file is part of a HAR archive.
* @param parityFile The parity file.
* @param blockSize The maximum size of a block.
* @param errorOffset Known location of error in the source file. There could
* be additional errors in the source file that are discovered during
* the decode process.
* @param bytesToSkip After the block is generated, these many bytes should be
* skipped before writing to the output. This is needed because the
* output may have a portion of the block written from the source file
* before a new corruption is discovered in the block.
* @param limit The maximum number of bytes to be written out, including
* bytesToSkip. This is to prevent writing beyond the end of the file.
* @param out The output.
*/
protected abstract void fixErasedBlock(
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
long blockSize, long errorOffset, long bytesToSkip, long limit,
OutputStream out) throws IOException;
}

View File

@ -1,323 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Stack;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.Semaphore;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
/**
* Implements depth-first traversal using a Stack object. The traversal
* can be stopped at any time and the state of traversal is saved.
*/
public class DirectoryTraversal {
public static final Log LOG =
LogFactory.getLog("org.apache.hadoop.raid.DirectoryTraversal");
private FileSystem fs;
private List<FileStatus> paths;
private int pathIdx = 0; // Next path to process.
private Stack<Node> stack = new Stack<Node>();
private ExecutorService executor;
private int numThreads;
/**
* A FileFilter object can be used to choose files during directory traversal.
*/
public interface FileFilter {
/**
* @return a boolean value indicating if the file passes the filter.
*/
boolean check(FileStatus f) throws IOException;
}
/**
* Represents a directory node in directory traversal.
*/
static class Node {
private FileStatus path; // Path that this node represents.
private FileStatus[] elements; // Elements in the node.
private int idx = 0;
public Node(FileStatus path, FileStatus[] elements) {
this.path = path;
this.elements = elements;
}
public boolean hasNext() {
return idx < elements.length;
}
public FileStatus next() {
return elements[idx++];
}
public FileStatus path() {
return this.path;
}
}
/**
* Constructor.
* @param fs The filesystem to use.
* @param startPaths A list of paths that need to be traversed
*/
public DirectoryTraversal(FileSystem fs, List<FileStatus> startPaths) {
this(fs, startPaths, 1);
}
public DirectoryTraversal(
FileSystem fs, List<FileStatus> startPaths, int numThreads) {
this.fs = fs;
paths = startPaths;
pathIdx = 0;
this.numThreads = numThreads;
executor = Executors.newFixedThreadPool(numThreads);
}
public List<FileStatus> getFilteredFiles(FileFilter filter, int limit) {
List<FileStatus> filtered = new ArrayList<FileStatus>();
if (limit == 0)
return filtered;
// We need this semaphore to block when the number of running workitems
// is equal to the number of threads. FixedThreadPool limits the number
// of threads, but not the queue size. This way we will limit the memory
// usage.
Semaphore slots = new Semaphore(numThreads);
while (true) {
FilterFileWorkItem work = null;
try {
slots.acquire();
synchronized(filtered) {
if (filtered.size() >= limit) {
slots.release();
break;
}
}
Node next = getNextDirectoryNode();
if (next == null) {
slots.release();
break;
}
work = new FilterFileWorkItem(filter, next, filtered, slots);
} catch (InterruptedException ie) {
slots.release();
break;
} catch (IOException e) {
slots.release();
break;
}
executor.execute(work);
}
try {
// Wait for all submitted items to finish.
slots.acquire(numThreads);
// If this traversal is finished, shutdown the executor.
if (doneTraversal()) {
executor.shutdown();
executor.awaitTermination(1, TimeUnit.HOURS);
}
} catch (InterruptedException ie) {
}
return filtered;
}
class FilterFileWorkItem implements Runnable {
FileFilter filter;
Node dir;
List<FileStatus> filtered;
Semaphore slots;
FilterFileWorkItem(FileFilter filter, Node dir, List<FileStatus> filtered,
Semaphore slots) {
this.slots = slots;
this.filter = filter;
this.dir = dir;
this.filtered = filtered;
}
@SuppressWarnings("deprecation")
public void run() {
try {
LOG.info("Initiating file filtering for " + dir.path.getPath());
for (FileStatus f: dir.elements) {
if (!f.isFile()) {
continue;
}
if (filter.check(f)) {
synchronized(filtered) {
filtered.add(f);
}
}
}
} catch (Exception e) {
LOG.error("Error in directory traversal: "
+ StringUtils.stringifyException(e));
} finally {
slots.release();
}
}
}
/**
* Return the next file.
* @throws IOException
*/
public FileStatus getNextFile() throws IOException {
// Check if traversal is done.
while (!doneTraversal()) {
// If traversal is not done, check if the stack is not empty.
while (!stack.isEmpty()) {
// If the stack is not empty, look at the top node.
Node node = stack.peek();
// Check if the top node has an element.
if (node.hasNext()) {
FileStatus element = node.next();
// Is the next element a directory.
if (!element.isDir()) {
// It is a file, return it.
return element;
}
// Next element is a directory, push it on to the stack and
// continue
try {
pushNewNode(element);
} catch (FileNotFoundException e) {
// Ignore and move to the next element.
}
continue;
} else {
// Top node has no next element, pop it and continue.
stack.pop();
continue;
}
}
// If the stack is empty, do we have more paths?
while (!paths.isEmpty()) {
FileStatus next = paths.remove(0);
pathIdx++;
if (!next.isDir()) {
return next;
}
try {
pushNewNode(next);
} catch (FileNotFoundException e) {
continue;
}
break;
}
}
return null;
}
/**
* Gets the next directory in the tree. The algorithm returns deeper directories
* first.
* @return A FileStatus representing the directory.
* @throws IOException
*/
public FileStatus getNextDirectory() throws IOException {
Node dirNode = getNextDirectoryNode();
if (dirNode != null) {
return dirNode.path;
}
return null;
}
private Node getNextDirectoryNode() throws IOException {
// Check if traversal is done.
while (!doneTraversal()) {
// If traversal is not done, check if the stack is not empty.
while (!stack.isEmpty()) {
// If the stack is not empty, look at the top node.
Node node = stack.peek();
// Check if the top node has an element.
if (node.hasNext()) {
FileStatus element = node.next();
// Is the next element a directory.
if (element.isDir()) {
// Next element is a directory, push it on to the stack and
// continue
try {
pushNewNode(element);
} catch (FileNotFoundException e) {
// Ignore and move to the next element.
}
continue;
}
} else {
stack.pop();
return node;
}
}
// If the stack is empty, do we have more paths?
while (!paths.isEmpty()) {
FileStatus next = paths.remove(0);
pathIdx++;
if (next.isDir()) {
try {
pushNewNode(next);
} catch (FileNotFoundException e) {
continue;
}
break;
}
}
}
return null;
}
private void pushNewNode(FileStatus stat) throws IOException {
if (!stat.isDir()) {
return;
}
Path p = stat.getPath();
FileStatus[] elements = fs.listStatus(p);
Node newNode = new Node(stat, (elements == null? new FileStatus[0]: elements));
stack.push(newNode);
}
public boolean doneTraversal() {
return paths.isEmpty() && stack.isEmpty();
}
}

View File

@ -1,660 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.io.PrintStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.util.List;
import java.util.LinkedList;
import java.util.Map;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Date;
import java.text.SimpleDateFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.RaidDFSUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
/**
* distributed block fixer, uses map reduce jobs to fix corrupt files
*
* configuration options
* raid.blockfix.filespertask - number of corrupt files to fix in a single
* map reduce task (i.e., at one mapper node)
*
* raid.blockfix.maxpendingfiles - maximum number of files to fix
* simultaneously
*/
public class DistBlockFixer extends BlockFixer {
// volatile should be sufficient since only the block fixer thread
// updates numJobsRunning (other threads may read)
private volatile int numJobsRunning = 0;
private static final String WORK_DIR_PREFIX = "blockfixer";
private static final String IN_FILE_SUFFIX = ".in";
private static final String PART_PREFIX = "part-";
private static final String BLOCKFIX_FILES_PER_TASK =
"raid.blockfix.filespertask";
private static final String BLOCKFIX_MAX_PENDING_FILES =
"raid.blockfix.maxpendingfiles";
// default number of files to fix in a task
private static final long DEFAULT_BLOCKFIX_FILES_PER_TASK = 10L;
// default number of files to fix simultaneously
private static final long DEFAULT_BLOCKFIX_MAX_PENDING_FILES = 1000L;
protected static final Log LOG = LogFactory.getLog(DistBlockFixer.class);
// number of files to fix in a task
private long filesPerTask;
// number of files to fix simultaneously
final private long maxPendingFiles;
// number of files being fixed right now
private long pendingFiles;
private long lastCheckTime;
private final SimpleDateFormat dateFormat =
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private Map<String, CorruptFileInfo> fileIndex =
new HashMap<String, CorruptFileInfo>();
private Map<Job, List<CorruptFileInfo>> jobIndex =
new HashMap<Job, List<CorruptFileInfo>>();
static enum Counter {
FILES_SUCCEEDED, FILES_FAILED, FILES_NOACTION
}
public DistBlockFixer(Configuration conf) {
super(conf);
filesPerTask = DistBlockFixer.filesPerTask(getConf());
maxPendingFiles = DistBlockFixer.maxPendingFiles(getConf());
pendingFiles = 0L;
// start off due for the first iteration
lastCheckTime = Time.now() - blockFixInterval;
}
/**
* determines how many files to fix in a single task
*/
protected static long filesPerTask(Configuration conf) {
return conf.getLong(BLOCKFIX_FILES_PER_TASK,
DEFAULT_BLOCKFIX_FILES_PER_TASK);
}
/**
* determines how many files to fix simultaneously
*/
protected static long maxPendingFiles(Configuration conf) {
return conf.getLong(BLOCKFIX_MAX_PENDING_FILES,
DEFAULT_BLOCKFIX_MAX_PENDING_FILES);
}
/**
* runs the block fixer periodically
*/
public void run() {
while (running) {
// check if it is time to run the block fixer
long now = Time.now();
if (now >= lastCheckTime + blockFixInterval) {
lastCheckTime = now;
try {
checkAndFixBlocks(now);
} catch (InterruptedException ignore) {
LOG.info("interrupted");
} catch (Exception e) {
// log exceptions and keep running
LOG.error(StringUtils.stringifyException(e));
} catch (Error e) {
LOG.error(StringUtils.stringifyException(e));
throw e;
}
}
// try to sleep for the remainder of the interval
long sleepPeriod = (lastCheckTime - Time.now()) +
blockFixInterval;
if ((sleepPeriod > 0L) && running) {
try {
Thread.sleep(sleepPeriod);
} catch (InterruptedException ignore) {
LOG.info("interrupted");
}
}
}
}
/**
* checks for corrupt blocks and fixes them (if any)
*/
private void checkAndFixBlocks(long startTime)
throws IOException, InterruptedException, ClassNotFoundException {
checkJobs();
if (pendingFiles >= maxPendingFiles) {
return;
}
List<Path> corruptFiles = getCorruptFiles();
filterUnfixableSourceFiles(corruptFiles.iterator());
String startTimeStr = dateFormat.format(new Date(startTime));
LOG.info("found " + corruptFiles.size() + " corrupt files");
if (corruptFiles.size() > 0) {
String jobName = "blockfixer." + startTime;
startJob(jobName, corruptFiles);
}
}
/**
* Handle a failed job.
*/
private void failJob(Job job) throws IOException {
// assume no files have been fixed
LOG.error("DistBlockFixer job " + job.getJobID() + "(" + job.getJobName() +
") finished (failed)");
for (CorruptFileInfo fileInfo: jobIndex.get(job)) {
fileInfo.fail();
}
numJobsRunning--;
}
/**
* Handle a successful job.
*/
private void succeedJob(Job job, long filesSucceeded, long filesFailed)
throws IOException {
LOG.info("DistBlockFixer job " + job.getJobID() + "(" + job.getJobName() +
") finished (succeeded)");
if (filesFailed == 0) {
// no files have failed
for (CorruptFileInfo fileInfo: jobIndex.get(job)) {
fileInfo.succeed();
}
} else {
// we have to look at the output to check which files have failed
Set<String> failedFiles = getFailedFiles(job);
for (CorruptFileInfo fileInfo: jobIndex.get(job)) {
if (failedFiles.contains(fileInfo.getFile().toString())) {
fileInfo.fail();
} else {
// call succeed for files that have succeeded or for which no action
// was taken
fileInfo.succeed();
}
}
}
// report succeeded files to metrics
incrFilesFixed(filesSucceeded);
numJobsRunning--;
}
/**
* checks if jobs have completed and updates job and file index
* returns a list of failed files for restarting
*/
private void checkJobs() throws IOException {
Iterator<Job> jobIter = jobIndex.keySet().iterator();
while(jobIter.hasNext()) {
Job job = jobIter.next();
try {
if (job.isComplete()) {
long filesSucceeded =
job.getCounters().findCounter(Counter.FILES_SUCCEEDED).getValue();
long filesFailed =
job.getCounters().findCounter(Counter.FILES_FAILED).getValue();
long filesNoAction =
job.getCounters().findCounter(Counter.FILES_NOACTION).getValue();
int files = jobIndex.get(job).size();
if (job.isSuccessful() &&
(filesSucceeded + filesFailed + filesNoAction ==
((long) files))) {
// job has processed all files
succeedJob(job, filesSucceeded, filesFailed);
} else {
failJob(job);
}
jobIter.remove();
} else {
LOG.info("job " + job.getJobName() + " still running");
}
} catch (Exception e) {
LOG.error(StringUtils.stringifyException(e));
failJob(job);
try {
job.killJob();
} catch (Exception ee) {
LOG.error(StringUtils.stringifyException(ee));
}
jobIter.remove();
}
}
purgeFileIndex();
}
/**
* determines which files have failed for a given job
*/
private Set<String> getFailedFiles(Job job) throws IOException {
Set<String> failedFiles = new HashSet<String>();
Path outDir = SequenceFileOutputFormat.getOutputPath(job);
FileSystem fs = outDir.getFileSystem(getConf());
if (!fs.getFileStatus(outDir).isDir()) {
throw new IOException(outDir.toString() + " is not a directory");
}
FileStatus[] files = fs.listStatus(outDir);
for (FileStatus f: files) {
Path fPath = f.getPath();
if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) {
LOG.info("opening " + fPath.toString());
SequenceFile.Reader reader =
new SequenceFile.Reader(fs, fPath, getConf());
Text key = new Text();
Text value = new Text();
while (reader.next(key, value)) {
failedFiles.add(key.toString());
}
reader.close();
}
}
return failedFiles;
}
/**
* purge expired jobs from the file index
*/
private void purgeFileIndex() {
Iterator<String> fileIter = fileIndex.keySet().iterator();
while(fileIter.hasNext()) {
String file = fileIter.next();
if (fileIndex.get(file).isExpired()) {
fileIter.remove();
}
}
}
/**
* creates and submits a job, updates file index and job index
*/
private Job startJob(String jobName, List<Path> corruptFiles)
throws IOException, InterruptedException, ClassNotFoundException {
Path inDir = new Path(WORK_DIR_PREFIX + "/in/" + jobName);
Path outDir = new Path(WORK_DIR_PREFIX + "/out/" + jobName);
List<Path> filesInJob = createInputFile(jobName, inDir, corruptFiles);
Configuration jobConf = new Configuration(getConf());
Job job = new Job(jobConf, jobName);
job.setJarByClass(getClass());
job.setMapperClass(DistBlockFixerMapper.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(DistBlockFixerInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
DistBlockFixerInputFormat.setInputPaths(job, inDir);
SequenceFileOutputFormat.setOutputPath(job, outDir);
job.submit();
LOG.info("DistBlockFixer job " + job.getJobID() + "(" + job.getJobName() +
") started");
// submit the job before inserting it into the index
// this way, if submit fails, we won't have added anything to the index
insertJob(job, filesInJob);
return job;
}
/**
* inserts new job into file index and job index
*/
private void insertJob(Job job, List<Path> corruptFiles) {
List<CorruptFileInfo> fileInfos = new LinkedList<CorruptFileInfo>();
for (Path file: corruptFiles) {
CorruptFileInfo fileInfo = new CorruptFileInfo(file, job);
fileInfos.add(fileInfo);
fileIndex.put(file.toString(), fileInfo);
}
jobIndex.put(job, fileInfos);
numJobsRunning++;
}
/**
* creates the input file (containing the names of the files to be fixed
*/
private List<Path> createInputFile(String jobName, Path inDir,
List<Path> corruptFiles)
throws IOException {
Path file = new Path(inDir, jobName + IN_FILE_SUFFIX);
FileSystem fs = file.getFileSystem(getConf());
SequenceFile.Writer fileOut = SequenceFile.createWriter(fs, getConf(), file,
LongWritable.class,
Text.class);
long index = 0L;
List<Path> filesAdded = new LinkedList<Path>();
for (Path corruptFile: corruptFiles) {
if (pendingFiles >= maxPendingFiles) {
break;
}
String corruptFileName = corruptFile.toString();
fileOut.append(new LongWritable(index++), new Text(corruptFileName));
filesAdded.add(corruptFile);
pendingFiles++;
if (index % filesPerTask == 0) {
fileOut.sync(); // create sync point to make sure we can split here
}
}
fileOut.close();
return filesAdded;
}
/**
* gets a list of corrupt files from the name node
* and filters out files that are currently being fixed or
* that were recently fixed
*/
private List<Path> getCorruptFiles() throws IOException {
DistributedFileSystem dfs = (DistributedFileSystem)
(new Path("/")).getFileSystem(getConf());
String[] files = RaidDFSUtil.getCorruptFiles(dfs);
List<Path> corruptFiles = new LinkedList<Path>();
for (String f: files) {
Path p = new Path(f);
// filter out files that are being fixed or that were recently fixed
if (!fileIndex.containsKey(p.toString())) {
corruptFiles.add(p);
}
}
RaidUtils.filterTrash(getConf(), corruptFiles);
return corruptFiles;
}
/**
* returns the number of map reduce jobs running
*/
public int jobsRunning() {
return numJobsRunning;
}
/**
* hold information about a corrupt file that is being fixed
*/
class CorruptFileInfo {
private Path file;
private Job job;
private boolean done;
private long time;
public CorruptFileInfo(Path file, Job job) {
this.file = file;
this.job = job;
this.done = false;
this.time = 0;
}
public boolean isDone() {
return done;
}
public boolean isExpired() {
return done && ((Time.now() - time) > historyInterval);
}
public Path getFile() {
return file;
}
/**
* updates file index to record a failed attempt at fixing a file,
* immediately removes the entry from the file index
* (instead of letting it expire)
* so that we can retry right away
*/
public void fail() {
// remove this file from the index
CorruptFileInfo removed = fileIndex.remove(file.toString());
if (removed == null) {
LOG.error("trying to remove file not in file index: " +
file.toString());
} else {
LOG.error("fixing " + file.toString() + " failed");
}
pendingFiles--;
}
/**
* marks a file as fixed successfully
* and sets time stamp for expiry after specified interval
*/
public void succeed() {
// leave the file in the index,
// will be pruged later
job = null;
done = true;
time = Time.now();
LOG.info("fixing " + file.toString() + " succeeded");
pendingFiles--;
}
}
static class DistBlockFixerInputFormat
extends SequenceFileInputFormat<LongWritable, Text> {
protected static final Log LOG =
LogFactory.getLog(DistBlockFixerMapper.class);
/**
* splits the input files into tasks handled by a single node
* we have to read the input files to do this based on a number of
* items in a sequence
*/
@Override
public List <InputSplit> getSplits(JobContext job)
throws IOException {
long filesPerTask = DistBlockFixer.filesPerTask(job.getConfiguration());
Path[] inPaths = getInputPaths(job);
List<InputSplit> splits = new LinkedList<InputSplit>();
long fileCounter = 0;
for (Path inPath: inPaths) {
FileSystem fs = inPath.getFileSystem(job.getConfiguration());
if (!fs.getFileStatus(inPath).isDir()) {
throw new IOException(inPath.toString() + " is not a directory");
}
FileStatus[] inFiles = fs.listStatus(inPath);
for (FileStatus inFileStatus: inFiles) {
Path inFile = inFileStatus.getPath();
if (!inFileStatus.isDir() &&
(inFile.getName().equals(job.getJobName() + IN_FILE_SUFFIX))) {
fileCounter++;
SequenceFile.Reader inFileReader =
new SequenceFile.Reader(fs, inFile, job.getConfiguration());
long startPos = inFileReader.getPosition();
long counter = 0;
// create an input split every filesPerTask items in the sequence
LongWritable key = new LongWritable();
Text value = new Text();
try {
while (inFileReader.next(key, value)) {
if (counter % filesPerTask == filesPerTask - 1L) {
splits.add(new FileSplit(inFile, startPos,
inFileReader.getPosition() -
startPos,
null));
startPos = inFileReader.getPosition();
}
counter++;
}
// create input split for remaining items if necessary
// this includes the case where no splits were created by the loop
if (startPos != inFileReader.getPosition()) {
splits.add(new FileSplit(inFile, startPos,
inFileReader.getPosition() - startPos,
null));
}
} finally {
inFileReader.close();
}
}
}
}
LOG.info("created " + splits.size() + " input splits from " +
fileCounter + " files");
return splits;
}
/**
* indicates that input file can be split
*/
@Override
public boolean isSplitable (JobContext job, Path file) {
return true;
}
}
/**
* mapper for fixing stripes with corrupt blocks
*/
static class DistBlockFixerMapper
extends Mapper<LongWritable, Text, Text, Text> {
protected static final Log LOG =
LogFactory.getLog(DistBlockFixerMapper.class);
/**
* fix a stripe
*/
@Override
public void map(LongWritable key, Text fileText, Context context)
throws IOException, InterruptedException {
BlockFixerHelper helper =
new BlockFixerHelper(context.getConfiguration());
String fileStr = fileText.toString();
LOG.info("fixing " + fileStr);
Path file = new Path(fileStr);
boolean success = false;
try {
boolean fixed = helper.fixFile(file, context);
if (fixed) {
context.getCounter(Counter.FILES_SUCCEEDED).increment(1L);
} else {
context.getCounter(Counter.FILES_NOACTION).increment(1L);
}
} catch (Exception e) {
LOG.error(StringUtils.stringifyException(e));
// report file as failed
context.getCounter(Counter.FILES_FAILED).increment(1L);
String outkey = fileStr;
String outval = "failed";
context.write(new Text(outkey), new Text(outval));
}
context.progress();
}
}
}

View File

@ -1,374 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.Date;
import java.text.SimpleDateFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.raid.RaidNode.Statistics;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.util.StringUtils;
public class DistRaid extends Configured {
protected static final Log LOG = LogFactory.getLog(DistRaid.class);
static final String NAME = "distRaid";
static final String JOB_DIR_LABEL = NAME + ".job.dir";
static final int OP_LIST_BLOCK_SIZE = 32 * 1024 * 1024; // block size of control file
static final short OP_LIST_REPLICATION = 10; // replication factor of control file
public static final String OPS_PER_TASK = "raid.distraid.opspertask";
private static final int DEFAULT_OPS_PER_TASK = 100;
private static final int SYNC_FILE_MAX = 10;
private static final SimpleDateFormat dateForm = new SimpleDateFormat("yyyy-MM-dd HH:mm");
static enum Counter {
FILES_SUCCEEDED, FILES_FAILED, PROCESSED_BLOCKS, PROCESSED_SIZE, META_BLOCKS, META_SIZE
}
public DistRaid(Configuration conf) {
super(conf);
}
private static final Random RANDOM = new Random();
protected static String getRandomId() {
return Integer.toString(RANDOM.nextInt(Integer.MAX_VALUE), 36);
}
/**
*
* helper class which holds the policy and paths
*
*/
public static class RaidPolicyPathPair {
public PolicyInfo policy;
public List<FileStatus> srcPaths;
RaidPolicyPathPair(PolicyInfo policy, List<FileStatus> srcPaths) {
this.policy = policy;
this.srcPaths = srcPaths;
}
}
List<RaidPolicyPathPair> raidPolicyPathPairList = new ArrayList<RaidPolicyPathPair>();
private Job runningJob;
private String lastReport = null;
/** Responsible for generating splits of the src file list. */
static class DistRaidInputFormat extends
SequenceFileInputFormat<Text, PolicyInfo> {
/**
* Produce splits such that each is no greater than the quotient of the
* total size and the number of splits requested.
*
* @param job
* The handle to the Configuration object
* @param numSplits
* Number of splits requested
*/
public List<InputSplit> getSplits(JobContext job) throws IOException {
Configuration conf = job.getConfiguration();
// We create only one input file. So just get the first file in the first
// input directory.
Path inDir = getInputPaths(job)[0];
FileSystem fs = inDir.getFileSystem(conf);
FileStatus[] inputFiles = fs.listStatus(inDir);
Path inputFile = inputFiles[0].getPath();
List<InputSplit> splits = new ArrayList<InputSplit>();
SequenceFile.Reader in =
new SequenceFile.Reader(conf, Reader.file(inputFile));
long prev = 0L;
final int opsPerTask = conf.getInt(OPS_PER_TASK, DEFAULT_OPS_PER_TASK);
try {
Text key = new Text();
PolicyInfo value = new PolicyInfo();
int count = 0; // count src
while (in.next(key, value)) {
long curr = in.getPosition();
long delta = curr - prev;
if (++count > opsPerTask) {
count = 0;
splits.add(new FileSplit(inputFile, prev, delta, (String[]) null));
prev = curr;
}
}
} finally {
in.close();
}
long remaining = fs.getFileStatus(inputFile).getLen() - prev;
if (remaining != 0) {
splits.add(new FileSplit(inputFile, prev, remaining, (String[]) null));
}
return splits;
}
}
/** The mapper for raiding files. */
static class DistRaidMapper extends Mapper<Text, PolicyInfo, Text, Text> {
private boolean ignoreFailures = false;
private int failcount = 0;
private int succeedcount = 0;
private Statistics st = new Statistics();
private String getCountString() {
return "Succeeded: " + succeedcount + " Failed: " + failcount;
}
/** Run a FileOperation
* @throws IOException
* @throws InterruptedException */
public void map(Text key, PolicyInfo policy, Context context)
throws IOException, InterruptedException {
try {
Configuration jobConf = context.getConfiguration();
LOG.info("Raiding file=" + key.toString() + " policy=" + policy);
Path p = new Path(key.toString());
FileStatus fs = p.getFileSystem(jobConf).getFileStatus(p);
st.clear();
RaidNode.doRaid(jobConf, policy, fs, st, context);
++succeedcount;
context.getCounter(Counter.PROCESSED_BLOCKS).increment(st.numProcessedBlocks);
context.getCounter(Counter.PROCESSED_SIZE).increment(st.processedSize);
context.getCounter(Counter.META_BLOCKS).increment(st.numMetaBlocks);
context.getCounter(Counter.META_SIZE).increment(st.metaSize);
context.getCounter(Counter.FILES_SUCCEEDED).increment(1);
} catch (IOException e) {
++failcount;
context.getCounter(Counter.FILES_FAILED).increment(1);
String s = "FAIL: " + policy + ", " + key + " "
+ StringUtils.stringifyException(e);
context.write(new Text(key), new Text(s));
LOG.error(s);
} finally {
context.setStatus(getCountString());
}
}
/** {@inheritDoc} */
public void close() throws IOException {
if (failcount == 0 || ignoreFailures) {
return;
}
throw new IOException(getCountString());
}
}
/**
* Set options specified in raid.scheduleroption.
* The string should be formatted as key:value[,key:value]*
*/
static void setSchedulerOption(Configuration conf) {
String schedulerOption = conf.get("raid.scheduleroption");
if (schedulerOption != null) {
// Parse the scheduler option to get key:value pairs.
String[] keyValues = schedulerOption.trim().split(",");
for (String keyValue: keyValues) {
String[] fields = keyValue.trim().split(":");
String key = fields[0].trim();
String value = fields[1].trim();
conf.set(key, value);
}
}
}
/**
* Creates a new Job object.
* @param conf
* @return a Job object
* @throws IOException
*/
static Job createJob(Configuration jobConf) throws IOException {
String jobName = NAME + " " + dateForm.format(new Date(RaidNode.now()));
setSchedulerOption(jobConf);
Job job = Job.getInstance(jobConf, jobName);
job.setSpeculativeExecution(false);
job.setJarByClass(DistRaid.class);
job.setInputFormatClass(DistRaidInputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setMapperClass(DistRaidMapper.class);
job.setNumReduceTasks(0);
return job;
}
/** Add paths to be raided */
public void addRaidPaths(PolicyInfo info, List<FileStatus> paths) {
raidPolicyPathPairList.add(new RaidPolicyPathPair(info, paths));
}
/** Invokes a map-reduce job do parallel raiding.
* @return true if the job was started, false otherwise
* @throws InterruptedException
*/
public boolean startDistRaid() throws IOException {
assert(raidPolicyPathPairList.size() > 0);
Job job = createJob(getConf());
createInputFile(job);
try {
job.submit();
this.runningJob = job;
LOG.info("Job Started: " + runningJob.getJobID());
return true;
} catch (ClassNotFoundException e) {
throw new IOException(e);
} catch (InterruptedException e) {
return false;
}
}
/** Checks if the map-reduce job has completed.
*
* @return true if the job completed, false otherwise.
* @throws IOException
*/
public boolean checkComplete() throws IOException {
JobID jobID = runningJob.getJobID();
LOG.info("Checking job " + jobID);
try {
if (runningJob.isComplete()) {
// delete job directory
Configuration jobConf = runningJob.getConfiguration();
final String jobdir = jobConf.get(JOB_DIR_LABEL);
if (jobdir != null) {
final Path jobpath = new Path(jobdir);
jobpath.getFileSystem(jobConf).delete(jobpath, true);
}
if (runningJob.isSuccessful()) {
LOG.info("Job Complete(Succeeded): " + jobID);
} else {
LOG.error("Job Complete(Failed): " + jobID);
}
raidPolicyPathPairList.clear();
return true;
} else {
String report = (" job " + jobID +
" map " + StringUtils.formatPercent(runningJob.mapProgress(), 0)+
" reduce " + StringUtils.formatPercent(runningJob.reduceProgress(), 0));
if (!report.equals(lastReport)) {
LOG.info(report);
lastReport = report;
}
return false;
}
} catch (InterruptedException e) {
return false;
}
}
public boolean successful() throws IOException {
try {
return runningJob.isSuccessful();
} catch (InterruptedException e) {
return false;
}
}
/**
* set up input file which has the list of input files.
*
* @return boolean
* @throws IOException
*/
private void createInputFile(Job job) throws IOException {
Configuration jobConf = job.getConfiguration();
Path jobDir = new Path(JOB_DIR_LABEL + getRandomId());
Path inDir = new Path(jobDir, "in");
Path outDir = new Path(jobDir, "out");
FileInputFormat.setInputPaths(job, inDir);
FileOutputFormat.setOutputPath(job, outDir);
Path opList = new Path(inDir, NAME);
Configuration tmp = new Configuration(jobConf);
// The control file should have small size blocks. This helps
// in spreading out the load from mappers that will be spawned.
tmp.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE);
FileSystem fs = opList.getFileSystem(tmp);
int opCount = 0, synCount = 0;
SequenceFile.Writer opWriter = null;
try {
opWriter = SequenceFile.createWriter(
jobConf, Writer.file(opList), Writer.keyClass(Text.class),
Writer.valueClass(PolicyInfo.class),
Writer.compression(SequenceFile.CompressionType.NONE));
for (RaidPolicyPathPair p : raidPolicyPathPairList) {
// If a large set of files are Raided for the first time, files
// in the same directory that tend to have the same size will end up
// with the same map. This shuffle mixes things up, allowing a better
// mix of files.
java.util.Collections.shuffle(p.srcPaths);
for (FileStatus st : p.srcPaths) {
opWriter.append(new Text(st.getPath().toString()), p.policy);
opCount++;
if (++synCount > SYNC_FILE_MAX) {
opWriter.sync();
synCount = 0;
}
}
}
} finally {
if (opWriter != null) {
opWriter.close();
}
// increase replication for control file
fs.setReplication(opList, OP_LIST_REPLICATION);
}
raidPolicyPathPairList.clear();
LOG.info("Number of files=" + opCount);
}
}

View File

@ -1,106 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.raid.protocol.PolicyInfo;
/**
* Implementation of {@link RaidNode} that uses map reduce jobs to raid files.
*/
public class DistRaidNode extends RaidNode {
public static final Log LOG = LogFactory.getLog(DistRaidNode.class);
/** Daemon thread to monitor raid job progress */
JobMonitor jobMonitor = null;
Daemon jobMonitorThread = null;
public DistRaidNode(Configuration conf) throws IOException {
super(conf);
this.jobMonitor = new JobMonitor(conf);
this.jobMonitorThread = new Daemon(this.jobMonitor);
this.jobMonitorThread.start();
}
/**
* {@inheritDoc}
*/
@Override
public void join() {
super.join();
try {
if (jobMonitorThread != null) jobMonitorThread.join();
} catch (InterruptedException ie) {
// do nothing
}
}
/**
* {@inheritDoc}
*/
@Override
public void stop() {
if (stopRequested) {
return;
}
super.stop();
if (jobMonitor != null) jobMonitor.running = false;
if (jobMonitorThread != null) jobMonitorThread.interrupt();
}
/**
* {@inheritDoc}
*/
@Override
void raidFiles(PolicyInfo info, List<FileStatus> paths) throws IOException {
// We already checked that no job for this policy is running
// So we can start a new job.
DistRaid dr = new DistRaid(conf);
//add paths for distributed raiding
dr.addRaidPaths(info, paths);
boolean started = dr.startDistRaid();
if (started) {
jobMonitor.monitorJob(info.getName(), dr);
}
}
/**
* {@inheritDoc}
*/
@Override
int getRunningJobsForPolicy(String policyName) {
return jobMonitor.runningJobsCount(policyName);
}
}

View File

@ -1,350 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Progressable;
/**
* Represents a generic encoder that can generate a parity file for a source
* file.
* This is an abstract class, concrete subclasses need to implement
* encodeFileImpl.
*/
public abstract class Encoder {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.Encoder");
protected Configuration conf;
protected int stripeSize;
protected int paritySize;
protected Random rand;
protected int bufSize;
protected byte[][] readBufs;
protected byte[][] writeBufs;
/**
* A class that acts as a sink for data, similar to /dev/null.
*/
static class NullOutputStream extends OutputStream {
public void write(byte[] b) throws IOException {}
public void write(int b) throws IOException {}
public void write(byte[] b, int off, int len) throws IOException {}
}
Encoder(
Configuration conf, int stripeSize, int paritySize) {
this.conf = conf;
this.stripeSize = stripeSize;
this.paritySize = paritySize;
this.rand = new Random();
this.bufSize = conf.getInt("raid.encoder.bufsize", 1024 * 1024);
this.readBufs = new byte[stripeSize][];
this.writeBufs = new byte[paritySize][];
allocateBuffers();
}
private void allocateBuffers() {
for (int i = 0; i < stripeSize; i++) {
readBufs[i] = new byte[bufSize];
}
for (int i = 0; i < paritySize; i++) {
writeBufs[i] = new byte[bufSize];
}
}
private void configureBuffers(long blockSize) {
if ((long)bufSize > blockSize) {
bufSize = (int)blockSize;
allocateBuffers();
} else if (blockSize % bufSize != 0) {
bufSize = (int)(blockSize / 256L); // heuristic.
if (bufSize == 0) {
bufSize = 1024;
}
bufSize = Math.min(bufSize, 1024 * 1024);
allocateBuffers();
}
}
/**
* The interface to use to generate a parity file.
* This method can be called multiple times with the same Encoder object,
* thus allowing reuse of the buffers allocated by the Encoder object.
*
* @param fs The filesystem containing the source file.
* @param srcFile The source file.
* @param parityFile The parity file to be generated.
*/
public void encodeFile(
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
short parityRepl, Progressable reporter) throws IOException {
FileStatus srcStat = fs.getFileStatus(srcFile);
long srcSize = srcStat.getLen();
long blockSize = srcStat.getBlockSize();
configureBuffers(blockSize);
// Create a tmp file to which we will write first.
Path tmpDir = getParityTempPath();
if (!parityFs.mkdirs(tmpDir)) {
throw new IOException("Could not create tmp dir " + tmpDir);
}
Path parityTmp = new Path(tmpDir,
parityFile.getName() + rand.nextLong());
FSDataOutputStream out = parityFs.create(
parityTmp,
true,
conf.getInt("io.file.buffer.size", 64 * 1024),
parityRepl,
blockSize);
try {
encodeFileToStream(fs, srcFile, srcSize, blockSize, out, reporter);
out.close();
out = null;
LOG.info("Wrote temp parity file " + parityTmp);
// delete destination if exists
if (parityFs.exists(parityFile)){
parityFs.delete(parityFile, false);
}
parityFs.mkdirs(parityFile.getParent());
if (!parityFs.rename(parityTmp, parityFile)) {
String msg = "Unable to rename file " + parityTmp + " to " + parityFile;
throw new IOException (msg);
}
LOG.info("Wrote parity file " + parityFile);
} finally {
if (out != null) {
out.close();
}
parityFs.delete(parityTmp, false);
}
}
/**
* Recovers a corrupt block in a parity file to a local file.
*
* The encoder generates paritySize parity blocks for a source file stripe.
* Since we want only one of the parity blocks, this function creates
* null outputs for the blocks to be discarded.
*
* @param fs The filesystem in which both srcFile and parityFile reside.
* @param srcFile The source file.
* @param srcSize The size of the source file.
* @param blockSize The block size for the source/parity files.
* @param corruptOffset The location of corruption in the parity file.
* @param localBlockFile The destination for the reovered block.
*/
public void recoverParityBlockToFile(
FileSystem fs,
Path srcFile, long srcSize, long blockSize,
Path parityFile, long corruptOffset,
File localBlockFile) throws IOException {
OutputStream out = new FileOutputStream(localBlockFile);
try {
recoverParityBlockToStream(fs, srcFile, srcSize, blockSize, parityFile,
corruptOffset, out);
} finally {
out.close();
}
}
/**
* Recovers a corrupt block in a parity file to a local file.
*
* The encoder generates paritySize parity blocks for a source file stripe.
* Since we want only one of the parity blocks, this function creates
* null outputs for the blocks to be discarded.
*
* @param fs The filesystem in which both srcFile and parityFile reside.
* @param srcFile The source file.
* @param srcSize The size of the source file.
* @param blockSize The block size for the source/parity files.
* @param corruptOffset The location of corruption in the parity file.
* @param out The destination for the reovered block.
*/
public void recoverParityBlockToStream(
FileSystem fs,
Path srcFile, long srcSize, long blockSize,
Path parityFile, long corruptOffset,
OutputStream out) throws IOException {
LOG.info("Recovering parity block" + parityFile + ":" + corruptOffset);
// Get the start offset of the corrupt block.
corruptOffset = (corruptOffset / blockSize) * blockSize;
// Output streams to each block in the parity file stripe.
OutputStream[] outs = new OutputStream[paritySize];
long indexOfCorruptBlockInParityStripe =
(corruptOffset / blockSize) % paritySize;
LOG.info("Index of corrupt block in parity stripe: " +
indexOfCorruptBlockInParityStripe);
// Create a real output stream for the block we want to recover,
// and create null streams for the rest.
for (int i = 0; i < paritySize; i++) {
if (indexOfCorruptBlockInParityStripe == i) {
outs[i] = out;
} else {
outs[i] = new NullOutputStream();
}
}
// Get the stripe index and start offset of stripe.
long stripeIdx = corruptOffset / (paritySize * blockSize);
long stripeStart = stripeIdx * blockSize * stripeSize;
// Get input streams to each block in the source file stripe.
InputStream[] blocks = stripeInputs(fs, srcFile, stripeStart,
srcSize, blockSize);
LOG.info("Starting recovery by using source stripe " +
srcFile + ":" + stripeStart);
// Read the data from the blocks and write to the parity file.
encodeStripe(blocks, stripeStart, blockSize, outs,
new RaidUtils.DummyProgressable());
}
/**
* Recovers a corrupt block in a parity file to an output stream.
*
* The encoder generates paritySize parity blocks for a source file stripe.
* Since there is only one output provided, some blocks are written out to
* files before being written out to the output.
*
* @param fs The filesystem in which both srcFile and parityFile reside.
* @param srcFile The source file.
* @param srcSize The size of the source file.
* @param blockSize The block size for the source/parity files.
* @param out The destination for the reovered block.
*/
private void encodeFileToStream(FileSystem fs, Path srcFile, long srcSize,
long blockSize, OutputStream out, Progressable reporter) throws IOException {
OutputStream[] tmpOuts = new OutputStream[paritySize];
// One parity block can be written directly to out, rest to local files.
tmpOuts[0] = out;
File[] tmpFiles = new File[paritySize - 1];
for (int i = 0; i < paritySize - 1; i++) {
tmpFiles[i] = File.createTempFile("parity", "_" + i);
LOG.info("Created tmp file " + tmpFiles[i]);
tmpFiles[i].deleteOnExit();
}
try {
// Loop over stripes in the file.
for (long stripeStart = 0; stripeStart < srcSize;
stripeStart += blockSize * stripeSize) {
reporter.progress();
LOG.info("Starting encoding of stripe " + srcFile + ":" + stripeStart);
// Create input streams for blocks in the stripe.
InputStream[] blocks = stripeInputs(fs, srcFile, stripeStart,
srcSize, blockSize);
// Create output streams to the temp files.
for (int i = 0; i < paritySize - 1; i++) {
tmpOuts[i + 1] = new FileOutputStream(tmpFiles[i]);
}
// Call the implementation of encoding.
encodeStripe(blocks, stripeStart, blockSize, tmpOuts, reporter);
// Close output streams to the temp files and write the temp files
// to the output provided.
for (int i = 0; i < paritySize - 1; i++) {
tmpOuts[i + 1].close();
tmpOuts[i + 1] = null;
InputStream in = new FileInputStream(tmpFiles[i]);
RaidUtils.copyBytes(in, out, writeBufs[i], blockSize);
reporter.progress();
}
}
} finally {
for (int i = 0; i < paritySize - 1; i++) {
if (tmpOuts[i + 1] != null) {
tmpOuts[i + 1].close();
}
tmpFiles[i].delete();
LOG.info("Deleted tmp file " + tmpFiles[i]);
}
}
}
/**
* Return input streams for each block in a source file's stripe.
* @param fs The filesystem where the file resides.
* @param srcFile The source file.
* @param stripeStartOffset The start offset of the stripe.
* @param srcSize The size of the source file.
* @param blockSize The block size for the source file.
*/
protected InputStream[] stripeInputs(
FileSystem fs,
Path srcFile,
long stripeStartOffset,
long srcSize,
long blockSize
) throws IOException {
InputStream[] blocks = new InputStream[stripeSize];
for (int i = 0; i < stripeSize; i++) {
long seekOffset = stripeStartOffset + i * blockSize;
if (seekOffset < srcSize) {
FSDataInputStream in = fs.open(
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
in.seek(seekOffset);
LOG.info("Opening stream at " + srcFile + ":" + seekOffset);
blocks[i] = in;
} else {
LOG.info("Using zeros at offset " + seekOffset);
// We have no src data at this offset.
blocks[i] = new RaidUtils.ZeroInputStream(
seekOffset + blockSize);
}
}
return blocks;
}
/**
* The implementation of generating parity data for a stripe.
*
* @param blocks The streams to blocks in the stripe.
* @param stripeStartOffset The start offset of the stripe
* @param blockSize The maximum size of a block.
* @param outs output streams to the parity blocks.
* @param reporter progress indicator.
*/
protected abstract void encodeStripe(
InputStream[] blocks,
long stripeStartOffset,
long blockSize,
OutputStream[] outs,
Progressable reporter) throws IOException;
/**
* Return the temp path for the parity file
*/
protected abstract Path getParityTempPath();
}

View File

@ -1,60 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
public interface ErasureCode {
/**
* Encodes the given message.
* @param message The data of the message. The data is present in the least
* significant bits of each int. The number of data bits is
* symbolSize(). The number of elements of message is
* stripeSize().
* @param parity (out) The information is present in the least
* significant bits of each int. The number of parity bits is
* symbolSize(). The number of elements in the code is
* paritySize().
*/
public void encode(int[] message, int[] parity);
/**
* Generates missing portions of data.
* @param data The message and parity. The parity should be placed in the
* first part of the array. In each integer, the relevant portion
* is present in the least significant bits of each int.
* The number of elements in data is stripeSize() + paritySize().
* @param erasedLocations The indexes in data which are not available.
* @param erasedValues (out)The decoded values corresponding to erasedLocations.
*/
public void decode(int[] data, int[] erasedLocations, int[] erasedValues);
/**
* The number of elements in the message.
*/
public int stripeSize();
/**
* The number of elements in the code.
*/
public int paritySize();
/**
* Number of bits for each symbol.
*/
public int symbolSize();
}

View File

@ -1,350 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.util.HashMap;
import java.util.Map;
/**
* Implementation of Galois field arithmetics with 2^p elements.
* The input must be unsigned integers.
*/
public class GaloisField {
private final int[] logTable;
private final int[] powTable;
private final int[][] mulTable;
private final int[][] divTable;
private final int fieldSize;
private final int primitivePeriod;
private final int primitivePolynomial;
// Field size 256 is good for byte based system
private static final int DEFAULT_FIELD_SIZE = 256;
// primitive polynomial 1 + X^2 + X^3 + X^4 + X^8
private static final int DEFAULT_PRIMITIVE_POLYNOMIAL = 285;
static private final Map<Integer, GaloisField> instances =
new HashMap<Integer, GaloisField>();
/**
* Get the object performs Galois field arithmetics
* @param fieldSize size of the field
* @param primitivePolynomial a primitive polynomial corresponds to the size
*/
public static GaloisField getInstance(int fieldSize,
int primitivePolynomial) {
int key = ((fieldSize << 16) & 0xFFFF0000) + (primitivePolynomial & 0x0000FFFF);
GaloisField gf;
synchronized (instances) {
gf = instances.get(key);
if (gf == null) {
gf = new GaloisField(fieldSize, primitivePolynomial);
instances.put(key, gf);
}
}
return gf;
}
/**
* Get the object performs Galois field arithmetics with default setting
*/
public static GaloisField getInstance() {
return getInstance(DEFAULT_FIELD_SIZE, DEFAULT_PRIMITIVE_POLYNOMIAL);
}
private GaloisField(int fieldSize, int primitivePolynomial) {
assert fieldSize > 0;
assert primitivePolynomial > 0;
this.fieldSize = fieldSize;
this.primitivePeriod = fieldSize - 1;
this.primitivePolynomial = primitivePolynomial;
logTable = new int[fieldSize];
powTable = new int[fieldSize];
mulTable = new int[fieldSize][fieldSize];
divTable = new int[fieldSize][fieldSize];
int value = 1;
for (int pow = 0; pow < fieldSize - 1; pow++) {
powTable[pow] = value;
logTable[value] = pow;
value = value * 2;
if (value >= fieldSize) {
value = value ^ primitivePolynomial;
}
}
// building multiplication table
for (int i = 0; i < fieldSize; i++) {
for (int j = 0; j < fieldSize; j++) {
if (i == 0 || j == 0) {
mulTable[i][j] = 0;
continue;
}
int z = logTable[i] + logTable[j];
z = z >= primitivePeriod ? z - primitivePeriod : z;
z = powTable[z];
mulTable[i][j] = z;
}
}
// building division table
for (int i = 0; i < fieldSize; i++) {
for (int j = 1; j < fieldSize; j++) {
if (i == 0) {
divTable[i][j] = 0;
continue;
}
int z = logTable[i] - logTable[j];
z = z < 0 ? z + primitivePeriod : z;
z = powTable[z];
divTable[i][j] = z;
}
}
}
/**
* Return number of elements in the field
* @return number of elements in the field
*/
public int getFieldSize() {
return fieldSize;
}
/**
* Return the primitive polynomial in GF(2)
* @return primitive polynomial as a integer
*/
public int getPrimitivePolynomial() {
return primitivePolynomial;
}
/**
* Compute the sum of two fields
* @param x input field
* @param y input field
* @return result of addition
*/
public int add(int x, int y) {
assert(x >= 0 && x < getFieldSize() && y >= 0 && y < getFieldSize());
return x ^ y;
}
/**
* Compute the multiplication of two fields
* @param x input field
* @param y input field
* @return result of multiplication
*/
public int multiply(int x, int y) {
assert(x >= 0 && x < getFieldSize() && y >= 0 && y < getFieldSize());
return mulTable[x][y];
}
/**
* Compute the division of two fields
* @param x input field
* @param y input field
* @return x/y
*/
public int divide(int x, int y) {
assert(x >= 0 && x < getFieldSize() && y > 0 && y < getFieldSize());
return divTable[x][y];
}
/**
* Compute power n of a field
* @param x input field
* @param n power
* @return x^n
*/
public int power(int x, int n) {
assert(x >= 0 && x < getFieldSize());
if (n == 0) {
return 1;
}
if (x == 0) {
return 0;
}
x = logTable[x] * n;
if (x < primitivePeriod) {
return powTable[x];
}
x = x % primitivePeriod;
return powTable[x];
}
/**
* Given a Vandermonde matrix V[i][j]=x[j]^i and vector y, solve for z such
* that Vz=y. The output z will be placed in y.
* @param x the vector which describe the Vandermonde matrix
* @param y right-hand side of the Vandermonde system equation.
* will be replaced the output in this vector
*/
public void solveVandermondeSystem(int[] x, int[] y) {
solveVandermondeSystem(x, y, x.length);
}
/**
* Given a Vandermonde matrix V[i][j]=x[j]^i and vector y, solve for z such
* that Vz=y. The output z will be placed in y.
* @param x the vector which describe the Vandermonde matrix
* @param y right-hand side of the Vandermonde system equation.
* will be replaced the output in this vector
* @param len consider x and y only from 0...len-1
*/
public void solveVandermondeSystem(int[] x, int[] y, int len) {
assert(y.length <= len);
for (int i = 0; i < len - 1; i++) {
for (int j = len - 1; j > i; j--) {
y[j] = y[j] ^ mulTable[x[i]][y[j - 1]];
}
}
for (int i = len - 1; i >= 0; i--) {
for (int j = i + 1; j < len; j++) {
y[j] = divTable[y[j]][x[j] ^ x[j - i - 1]];
}
for (int j = i; j < len - 1; j++) {
y[j] = y[j] ^ y[j + 1];
}
}
}
/**
* Compute the multiplication of two polynomials. The index in the
* array corresponds to the power of the entry. For example p[0] is the
* constant term of the polynomial p.
* @param p input polynomial
* @param q input polynomial
* @return polynomial represents p*q
*/
public int[] multiply(int[] p, int[] q) {
int len = p.length + q.length - 1;
int[] result = new int[len];
for (int i = 0; i < len; i++) {
result[i] = 0;
}
for (int i = 0; i < p.length; i++) {
for (int j = 0; j < q.length; j++) {
result[i + j] = add(result[i + j], multiply(p[i], q[j]));
}
}
return result;
}
/**
* Compute the remainder of a dividend and divisor pair. The index in the
* array corresponds to the power of the entry. For example p[0] is the
* constant term of the polynomial p.
* @param dividend dividend polynomial, the remainder will be placed here when return
* @param divisor divisor polynomial
*/
public void remainder(int[] dividend, int[] divisor) {
for (int i = dividend.length - divisor.length; i >= 0; i--) {
int ratio =
divTable[dividend[i + divisor.length - 1]][divisor[divisor.length - 1]];
for (int j = 0; j < divisor.length; j++) {
int k = j + i;
dividend[k] = dividend[k] ^ mulTable[ratio][divisor[j]];
}
}
}
/**
* Compute the sum of two polynomials. The index in the
* array corresponds to the power of the entry. For example p[0] is the
* constant term of the polynomial p.
* @param p input polynomial
* @param q input polynomial
* @return polynomial represents p+q
*/
public int[] add(int[] p, int[] q) {
int len = Math.max(p.length, q.length);
int[] result = new int[len];
for (int i = 0; i < len; i++) {
if (i < p.length && i < q.length) {
result[i] = add(p[i], q[i]);
} else if (i < p.length) {
result[i] = p[i];
} else {
result[i] = q[i];
}
}
return result;
}
/**
* Substitute x into polynomial p(x).
* @param p input polynomial
* @param x input field
* @return p(x)
*/
public int substitute(int[] p, int x) {
int result = 0;
int y = 1;
for (int i = 0; i < p.length; i++) {
result = result ^ mulTable[p[i]][y];
y = mulTable[x][y];
}
return result;
}
/**
* Perform Gaussian elimination on the given matrix. This matrix has to be a
* fat matrix (number of rows > number of columns).
*/
public void gaussianElimination(int[][] matrix) {
assert(matrix != null && matrix.length > 0 && matrix[0].length > 0
&& matrix.length < matrix[0].length);
int height = matrix.length;
int width = matrix[0].length;
for (int i = 0; i < height; i++) {
boolean pivotFound = false;
// scan the column for a nonzero pivot and swap it to the diagonal
for (int j = i; j < height; j++) {
if (matrix[i][j] != 0) {
int[] tmp = matrix[i];
matrix[i] = matrix[j];
matrix[j] = tmp;
pivotFound = true;
break;
}
}
if (!pivotFound) {
continue;
}
int pivot = matrix[i][i];
for (int j = i; j < width; j++) {
matrix[i][j] = divide(matrix[i][j], pivot);
}
for (int j = i + 1; j < height; j++) {
int lead = matrix[j][i];
for (int k = i; k < width; k++) {
matrix[j][k] = add(matrix[j][k], multiply(lead, matrix[i][k]));
}
}
}
for (int i = height - 1; i >=0; i--) {
for (int j = 0; j < i; j++) {
int lead = matrix[j][i];
for (int k = i; k < width; k++) {
matrix[j][k] = add(matrix[j][k], multiply(lead, matrix[i][k]));
}
}
}
}
}

View File

@ -1,144 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.InputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.List;
import java.util.LinkedList;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.io.Text;
/**
* Represents the contents of a HAR Index file. The HAR is assumed to be
* comprising of RAID parity files only and no directories.
*/
public class HarIndex {
public static final String indexFileName = "_index";
private List<IndexEntry> entries = new LinkedList<IndexEntry>();
/**
* Represents information in a single line of the HAR index file.
*/
public static class IndexEntry {
String fileName; // Name of the file in the part file.
long startOffset; // Start offset within the part file.
long length; // Length of this file within the part file.
long mtime; // Modification time of the file.
String partFileName; // Name of the part file.
IndexEntry(String fileName, long startOffset, long length,
long mtime, String partFileName) {
this.fileName = fileName;
this.startOffset = startOffset;
this.length = length;
this.mtime = mtime;
this.partFileName = partFileName;
}
public String toString() {
return "fileName=" + fileName +
", startOffset=" + startOffset +
", length=" + length +
", mtime=" + mtime +
", partFileName=" + partFileName;
}
}
/**
* Constructor that reads the contents of the index file.
* @param in An input stream to the index file.
* @param max The size of the index file.
* @throws IOException
*/
public HarIndex(InputStream in, long max) throws IOException {
LineReader lineReader = new LineReader(in);
Text text = new Text();
long nread = 0;
while (nread < max) {
int n = lineReader.readLine(text);
nread += n;
String line = text.toString();
try {
parseLine(line);
} catch (UnsupportedEncodingException e) {
throw new IOException("UnsupportedEncodingException after reading " +
nread + "bytes");
}
}
}
/**
* Parses each line and extracts relevant information.
* @param line
* @throws UnsupportedEncodingException
*/
void parseLine(String line) throws UnsupportedEncodingException {
String[] splits = line.split(" ");
boolean isDir = "dir".equals(splits[1]) ? true: false;
if (!isDir && splits.length >= 6) {
String name = URLDecoder.decode(splits[0], "UTF-8");
String partName = URLDecoder.decode(splits[2], "UTF-8");
long startIndex = Long.parseLong(splits[3]);
long length = Long.parseLong(splits[4]);
String[] newsplits = URLDecoder.decode(splits[5],"UTF-8").split(" ");
if (newsplits != null && newsplits.length >= 4) {
long mtime = Long.parseLong(newsplits[0]);
IndexEntry entry = new IndexEntry(
name, startIndex, length, mtime, partName);
entries.add(entry);
}
}
}
/**
* Finds the index entry corresponding to a HAR partFile at an offset.
* @param partName The name of the part file (part-*).
* @param partFileOffset The offset into the part file.
* @return The entry corresponding to partName:partFileOffset.
*/
public IndexEntry findEntry(String partName, long partFileOffset) {
for (IndexEntry e: entries) {
boolean nameMatch = partName.equals(e.partFileName);
boolean inRange = (partFileOffset >= e.startOffset) &&
(partFileOffset < e.startOffset + e.length);
if (nameMatch && inRange) {
return e;
}
}
return null;
}
/**
* Finds the index entry corresponding to a file in the archive
*/
public IndexEntry findEntryByFileName(String fileName) {
for (IndexEntry e: entries) {
if (fileName.equals(e.fileName)) {
return e;
}
}
return null;
}
}

View File

@ -1,211 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.LinkedList;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.StringUtils;
/**
* Periodically monitors the status of jobs registered with it.
*
* Jobs that are submitted for the same policy name are kept in the same list,
* and the list itself is kept in a map that has the policy name as the key and
* the list as value.
*/
class JobMonitor implements Runnable {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.JobMonitor");
volatile boolean running = true;
private Map<String, List<DistRaid>> jobs;
public static final String JOBMONITOR_INTERVAL_KEY = "raid.jobmonitor.interval";
private long jobMonitorInterval;
private volatile long jobsMonitored = 0;
private volatile long jobsSucceeded = 0;
public JobMonitor(Configuration conf) {
jobMonitorInterval = conf.getLong(JOBMONITOR_INTERVAL_KEY, 60000);
jobs = new java.util.HashMap<String, List<DistRaid>>();
}
public void run() {
while (running) {
try {
LOG.info("JobMonitor thread continuing to run...");
doMonitor();
} catch (Throwable e) {
LOG.error("JobMonitor encountered exception " +
StringUtils.stringifyException(e));
// All expected exceptions are caught by doMonitor(). It is better
// to exit now, this will prevent RaidNode from submitting more jobs
// since the number of running jobs will never decrease.
return;
}
}
}
/**
* Periodically checks status of running map-reduce jobs.
*/
public void doMonitor() {
while (running) {
String[] keys = null;
// Make a copy of the names of the current jobs.
synchronized(jobs) {
keys = jobs.keySet().toArray(new String[0]);
}
// Check all the jobs. We do not want to block access to `jobs`
// because that will prevent new jobs from being added.
// This is safe because JobMonitor.run is the only code that can
// remove a job from `jobs`. Thus all elements in `keys` will have
// valid values.
Map<String, List<DistRaid>> finishedJobs =
new HashMap<String, List<DistRaid>>();
for (String key: keys) {
// For each policy being monitored, get the list of jobs running.
DistRaid[] jobListCopy = null;
synchronized(jobs) {
List<DistRaid> jobList = jobs.get(key);
synchronized(jobList) {
jobListCopy = jobList.toArray(new DistRaid[jobList.size()]);
}
}
// The code that actually contacts the JobTracker is not synchronized,
// it uses copies of the list of jobs.
for (DistRaid job: jobListCopy) {
// Check each running job.
try {
boolean complete = job.checkComplete();
if (complete) {
addJob(finishedJobs, key, job);
if (job.successful()) {
jobsSucceeded++;
}
}
} catch (IOException ioe) {
// If there was an error, consider the job finished.
addJob(finishedJobs, key, job);
LOG.error("JobMonitor exception", ioe);
}
}
}
if (finishedJobs.size() > 0) {
for (String key: finishedJobs.keySet()) {
List<DistRaid> finishedJobList = finishedJobs.get(key);
// Iterate through finished jobs and remove from jobs.
// removeJob takes care of locking.
for (DistRaid job: finishedJobList) {
removeJob(jobs, key, job);
}
}
}
try {
Thread.sleep(jobMonitorInterval);
} catch (InterruptedException ie) {
}
}
}
public int runningJobsCount(String key) {
int count = 0;
synchronized(jobs) {
if (jobs.containsKey(key)) {
List<DistRaid> jobList = jobs.get(key);
synchronized(jobList) {
count = jobList.size();
}
}
}
return count;
}
public void monitorJob(String key, DistRaid job) {
addJob(jobs, key, job);
jobsMonitored++;
}
public long jobsMonitored() {
return this.jobsMonitored;
}
public long jobsSucceeded() {
return this.jobsSucceeded;
}
// For test code
int runningJobsCount() {
int total = 0;
synchronized(jobs) {
for (String key: jobs.keySet()) {
total += jobs.get(key).size();
}
}
return total;
}
private static void addJob(Map<String, List<DistRaid>> jobsMap,
String jobName, DistRaid job) {
synchronized(jobsMap) {
List<DistRaid> list = null;
if (jobsMap.containsKey(jobName)) {
list = jobsMap.get(jobName);
} else {
list = new LinkedList<DistRaid>();
jobsMap.put(jobName, list);
}
synchronized(list) {
list.add(job);
}
}
}
private static void removeJob(Map<String, List<DistRaid>> jobsMap,
String jobName, DistRaid job) {
synchronized(jobsMap) {
if (jobsMap.containsKey(jobName)) {
List<DistRaid> list = jobsMap.get(jobName);
synchronized(list) {
for (Iterator<DistRaid> it = list.iterator(); it.hasNext(); ) {
DistRaid val = it.next();
if (val == job) {
it.remove();
}
}
if (list.size() == 0) {
jobsMap.remove(jobName);
}
}
}
}
}
}

View File

@ -1,171 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.RaidDFSUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.raid.RaidNode;
import org.apache.hadoop.raid.RaidUtils;
import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType;
/**
* This class fixes source file blocks using the parity file,
* and parity file blocks using the source file.
* It periodically fetches the list of corrupt files from the namenode,
* and figures out the location of the bad block by reading through
* the corrupt file.
*/
public class LocalBlockFixer extends BlockFixer {
public static final Log LOG = LogFactory.getLog(LocalBlockFixer.class);
private java.util.HashMap<String, java.util.Date> history;
private BlockFixerHelper helper;
public LocalBlockFixer(Configuration conf) throws IOException {
super(conf);
history = new java.util.HashMap<String, java.util.Date>();
helper = new BlockFixerHelper(getConf());
}
public void run() {
while (running) {
try {
LOG.info("LocalBlockFixer continuing to run...");
doFix();
} catch (Exception e) {
LOG.error(StringUtils.stringifyException(e));
} catch (Error err) {
LOG.error("Exiting after encountering " +
StringUtils.stringifyException(err));
throw err;
}
}
}
void doFix() throws InterruptedException, IOException {
while (running) {
// Sleep before proceeding to fix files.
Thread.sleep(blockFixInterval);
// Purge history older than the history interval.
purgeHistory();
List<Path> corruptFiles = getCorruptFiles();
filterUnfixableSourceFiles(corruptFiles.iterator());
if (corruptFiles.isEmpty()) {
// If there are no corrupt files, retry after some time.
continue;
}
LOG.info("Found " + corruptFiles.size() + " corrupt files.");
helper.sortCorruptFiles(corruptFiles);
for (Path srcPath: corruptFiles) {
if (!running) break;
try {
boolean fixed = helper.fixFile(srcPath);
LOG.info("Adding " + srcPath + " to history");
history.put(srcPath.toString(), new java.util.Date());
if (fixed) {
incrFilesFixed();
}
} catch (IOException ie) {
LOG.error("Hit error while processing " + srcPath +
": " + StringUtils.stringifyException(ie));
// Do nothing, move on to the next file.
}
}
}
}
/**
* We maintain history of fixed files because a fixed file may appear in
* the list of corrupt files if we loop around too quickly.
* This function removes the old items in the history so that we can
* recognize files that have actually become corrupt since being fixed.
*/
void purgeHistory() {
java.util.Date cutOff = new java.util.Date(Time.now() -
historyInterval);
List<String> toRemove = new java.util.ArrayList<String>();
for (String key: history.keySet()) {
java.util.Date item = history.get(key);
if (item.before(cutOff)) {
toRemove.add(key);
}
}
for (String key: toRemove) {
LOG.info("Removing " + key + " from history");
history.remove(key);
}
}
/**
* @return A list of corrupt files as obtained from the namenode
*/
List<Path> getCorruptFiles() throws IOException {
DistributedFileSystem dfs = helper.getDFS(new Path("/"));
String[] files = RaidDFSUtil.getCorruptFiles(dfs);
List<Path> corruptFiles = new LinkedList<Path>();
for (String f: files) {
Path p = new Path(f);
if (!history.containsKey(p.toString())) {
corruptFiles.add(p);
}
}
RaidUtils.filterTrash(getConf(), corruptFiles);
return corruptFiles;
}
}

View File

@ -1,60 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.raid.protocol.PolicyInfo;
/**
* Implementation of {@link RaidNode} that performs raiding locally.
*/
public class LocalRaidNode extends RaidNode {
public static final Log LOG = LogFactory.getLog(LocalRaidNode.class);
public LocalRaidNode(Configuration conf) throws IOException {
super(conf);
}
/**
* {@inheritDocs}
*/
@Override
void raidFiles(PolicyInfo info, List<FileStatus> paths) throws IOException {
doRaid(conf, info, paths);
}
/**
* {@inheritDocs}
*/
@Override
int getRunningJobsForPolicy(String policyName) {
return 0;
}
}

View File

@ -1,151 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.IOException;
import java.util.Arrays;
import org.apache.hadoop.util.Progressable;
/**
* Wraps over multiple input streams and provides an input stream that is
* an XOR of the streams.
*/
class ParityInputStream extends InputStream {
private static final int DEFAULT_BUFSIZE = 5*1024*1024;
private InputStream[] streams;
private byte[] xor;
private byte[] buf;
private int bufSize;
private long remaining;
private int available = 0;
private int readPos = 0;
public ParityInputStream(
InputStream[] streams, long parityBlockSize, byte[] buf, byte[] xor) {
assert buf.length == xor.length;
bufSize = buf.length;
this.streams = streams;
remaining = parityBlockSize;
this.buf = buf;
this.xor = xor;
}
@Override
public int read() throws IOException {
makeAvailable();
if (available == 0) {
return -1;
}
int ret = xor[readPos];
readPos++;
available--;
return ret;
}
@Override
public int read(byte b[], int off, int len) throws IOException {
makeAvailable();
if (available == 0) {
return -1;
}
int ret = Math.min(len, available);
for (int i = 0; i < ret; ++i) {
b[off+i] = xor[readPos+i];
}
readPos += ret;
available -= ret;
return ret;
}
public void close() throws IOException {
for (InputStream i: streams) {
i.close();
}
}
/**
* Send the contents of the stream to the sink.
* @param sink
* @param reporter
* @throws IOException
*/
public void drain(OutputStream sink, Progressable reporter)
throws IOException {
while (true) {
makeAvailable();
if (available == 0) {
break;
}
sink.write(xor, readPos, available);
available = 0;
if (reporter != null) {
reporter.progress();
}
}
}
/**
* Make some bytes available for reading in the internal buffer.
* @throws IOException
*/
private void makeAvailable() throws IOException {
if (available > 0 || remaining <= 0) {
return;
}
// Read some bytes from the first stream.
int xorlen = (int)Math.min(remaining, bufSize);
readExact(streams[0], xor, xorlen);
// Read bytes from all the other streams and xor them.
for (int i = 1; i < streams.length; i++) {
readExact(streams[i], buf, xorlen);
for (int j = 0; j < xorlen; j++) {
xor[j] ^= buf[j];
}
}
remaining -= xorlen;
available = xorlen;
readPos = 0;
readPos = 0;
}
private static void readExact(InputStream in, byte[] bufs, int toRead)
throws IOException {
int tread = 0;
while (tread < toRead) {
int read = in.read(bufs, tread, toRead - tread);
if (read == -1) {
// If the stream ends, fill in zeros.
Arrays.fill(bufs, tread, toRead, (byte)0);
tread = toRead;
} else {
tread += read;
}
}
assert tread == toRead;
}
}

View File

@ -1,30 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
/**
* Thrown when the config file for {@link RaidNode} is malformed.
*/
public class RaidConfigurationException extends Exception {
private static final long serialVersionUID = 4046516718965587999L;
public RaidConfigurationException(String message) {
super(message);
}
}

View File

@ -1,259 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.raid.protocol.PolicyInfo;
public class RaidFilter {
static class Statistics {
long numRaided = 0;
long numTooNew = 0;
long sizeTooNew = 0;
long numTooSmall = 0;
long sizeTooSmall = 0;
public void aggregate(Statistics other) {
this.numRaided += other.numRaided;
this.numTooNew += other.numTooNew;
this.sizeTooNew += other.sizeTooNew;
this.numTooSmall += other.numTooSmall;
this.sizeTooSmall += other.sizeTooSmall;
}
public String toString() {
return "numRaided = " + numRaided +
", numTooNew = " + numTooNew +
", sizeTooNew = " + sizeTooNew +
", numTooSmall = " + numTooSmall +
", sizeTooSmall = " + sizeTooSmall;
}
}
static class TimeBasedFilter extends Configured
implements DirectoryTraversal.FileFilter {
int targetRepl;
Path raidDestPrefix;
long modTimePeriod;
long startTime;
Statistics stats = new Statistics();
String currentSrcPath = null;
long[] modTimePeriods = new long[0];
String[] otherSrcPaths = new String[0];
TimeBasedFilter(Configuration conf, Path destPrefix, int targetRepl,
long startTime, long modTimePeriod) {
super(conf);
this.raidDestPrefix = destPrefix;
this.targetRepl = targetRepl;
this.startTime = startTime;
this.modTimePeriod = modTimePeriod;
}
TimeBasedFilter(Configuration conf,
Path destPrefix, PolicyInfo info,
List<PolicyInfo> allPolicies, long startTime, Statistics stats) {
super(conf);
this.raidDestPrefix = destPrefix;
this.targetRepl = Integer.parseInt(info.getProperty("targetReplication"));
this.modTimePeriod = Long.parseLong(info.getProperty("modTimePeriod"));
this.startTime = startTime;
this.stats = stats;
this.currentSrcPath = info.getSrcPath().toUri().getPath();
initializeOtherPaths(allPolicies);
}
private void initializeOtherPaths(List<PolicyInfo> allPolicies) {
ArrayList<PolicyInfo> tmp = new ArrayList<PolicyInfo>(allPolicies);
// Remove all policies where srcPath <= currentSrcPath or
// matchingPrefixLength is < length(currentSrcPath)
// The policies remaining are the only ones that could better
// select a file chosen by the current policy.
for (Iterator<PolicyInfo> it = tmp.iterator(); it.hasNext(); ) {
String src = it.next().getSrcPath().toUri().getPath();
if (src.compareTo(currentSrcPath) <= 0) {
it.remove();
continue;
}
int matchLen = matchingPrefixLength(src, currentSrcPath);
if (matchLen < currentSrcPath.length()) {
it.remove();
}
}
// Sort in reverse lexicographic order.
Collections.sort(tmp, new Comparator() {
public int compare(Object o1, Object o2) {
return 0 -
((PolicyInfo)o1).getSrcPath().toUri().getPath().compareTo(
((PolicyInfo)o1).getSrcPath().toUri().getPath());
}
});
otherSrcPaths = new String[tmp.size()];
modTimePeriods = new long[otherSrcPaths.length];
for (int i = 0; i < otherSrcPaths.length; i++) {
otherSrcPaths[i] = tmp.get(i).getSrcPath().toUri().getPath();
modTimePeriods[i] = Long.parseLong(
tmp.get(i).getProperty("modTimePeriod"));
}
}
public boolean check(FileStatus f) throws IOException {
if (!canChooseForCurrentPolicy(f)) {
return false;
}
// If the source file has fewer than or equal to 2 blocks, then skip it.
long blockSize = f.getBlockSize();
if (2 * blockSize >= f.getLen()) {
stats.numTooSmall++;
stats.sizeTooSmall += f.getLen();
return false;
}
boolean select = false;
try {
Object ppair = RaidNode.getParityFile(
raidDestPrefix, f.getPath(), getConf());
// Is there is a valid parity file?
if (ppair != null) {
// Is the source at the target replication?
if (f.getReplication() != targetRepl) {
// Select the file so that its replication can be set.
select = true;
} else {
stats.numRaided++;
// Nothing to do, don't select the file.
select = false;
}
} else {
// No parity file.
if (f.getModificationTime() + modTimePeriod < startTime) {
// If the file is not too new, choose it for raiding.
select = true;
} else {
select = false;
stats.numTooNew++;
stats.sizeTooNew += f.getLen();
}
}
} catch (java.io.FileNotFoundException e) {
select = true; // destination file does not exist
} catch (java.io.IOException e) {
// If there is a problem with the har path, this will let us continue.
DirectoryTraversal.LOG.error(
"Error while selecting " + StringUtils.stringifyException(e));
}
return select;
}
/**
* Checks if a file can be chosen for the current policy.
*/
boolean canChooseForCurrentPolicy(FileStatus stat) {
boolean choose = true;
if (otherSrcPaths.length > 0) {
String fileStr = stat.getPath().toUri().getPath();
// For a given string, find the best matching srcPath.
int matchWithCurrent = matchingPrefixLength(fileStr, currentSrcPath);
for (int i = 0; i < otherSrcPaths.length; i++) {
// If the file is too new, move to the next.
if (stat.getModificationTime() > startTime - modTimePeriods[i]) {
continue;
}
int matchLen = matchingPrefixLength(fileStr, otherSrcPaths[i]);
if (matchLen > 0 &&
fileStr.charAt(matchLen - 1) == Path.SEPARATOR_CHAR) {
matchLen--;
}
if (matchLen > matchWithCurrent) {
choose = false;
break;
}
}
}
return choose;
}
int matchingPrefixLength(final String s1, final String s2) {
int len = 0;
for (int j = 0; j < s1.length() && j < s2.length(); j++) {
if (s1.charAt(j) == s2.charAt(j)) {
len++;
} else {
break;
}
}
return len;
}
}
static class PreferenceFilter extends Configured
implements DirectoryTraversal.FileFilter {
Path firstChoicePrefix;
DirectoryTraversal.FileFilter secondChoiceFilter;
PreferenceFilter(Configuration conf,
Path firstChoicePrefix, Path secondChoicePrefix,
int targetRepl, long startTime, long modTimePeriod) {
super(conf);
this.firstChoicePrefix = firstChoicePrefix;
this.secondChoiceFilter = new TimeBasedFilter(conf,
secondChoicePrefix, targetRepl, startTime, modTimePeriod);
}
PreferenceFilter(Configuration conf,
Path firstChoicePrefix, Path secondChoicePrefix,
PolicyInfo info, List<PolicyInfo> allPolicies, long startTime,
Statistics stats) {
super(conf);
this.firstChoicePrefix = firstChoicePrefix;
this.secondChoiceFilter = new TimeBasedFilter(
conf, secondChoicePrefix, info, allPolicies, startTime, stats);
}
public boolean check(FileStatus f) throws IOException {
Object firstChoicePPair =
RaidNode.getParityFile(firstChoicePrefix, f.getPath(), getConf());
if (firstChoicePPair == null) {
// The decision is upto the the second choice filter.
return secondChoiceFilter.check(f);
} else {
// There is already a parity file under the first choice path.
// We dont want to choose this file.
return false;
}
}
}
}

View File

@ -1,682 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.util.Collection;
import java.util.Map;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.LinkedHashMap;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.net.InetSocketAddress;
import javax.security.auth.login.LoginException;
import org.apache.hadoop.ipc.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryProxy;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.HarFileSystem;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.DistributedRaidFileSystem;
import org.apache.hadoop.hdfs.RaidDFSUtil;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.raid.protocol.PolicyList;
import org.apache.hadoop.raid.protocol.RaidProtocol;
/**
* A {@link RaidShell} that allows browsing configured raid policies.
*/
public class RaidShell extends Configured implements Tool {
static {
Configuration.addDefaultResource("hdfs-default.xml");
Configuration.addDefaultResource("hdfs-site.xml");
}
public static final Log LOG = LogFactory.getLog( "org.apache.hadoop.RaidShell");
public RaidProtocol raidnode;
RaidProtocol rpcRaidnode;
private UserGroupInformation ugi;
volatile boolean clientRunning = true;
private Configuration conf;
/**
* Start RaidShell.
* <p>
* The RaidShell connects to the specified RaidNode and performs basic
* configuration options.
* @throws IOException
*/
public RaidShell(Configuration conf) throws IOException {
super(conf);
this.conf = conf;
}
void initializeRpc(Configuration conf, InetSocketAddress address) throws IOException {
this.ugi = UserGroupInformation.getCurrentUser();
this.rpcRaidnode = createRPCRaidnode(address, conf, ugi);
this.raidnode = createRaidnode(rpcRaidnode);
}
void initializeLocal(Configuration conf) throws IOException {
this.ugi = UserGroupInformation.getCurrentUser();
}
public static RaidProtocol createRaidnode(Configuration conf) throws IOException {
return createRaidnode(RaidNode.getAddress(conf), conf);
}
public static RaidProtocol createRaidnode(InetSocketAddress raidNodeAddr,
Configuration conf) throws IOException {
return createRaidnode(createRPCRaidnode(raidNodeAddr, conf,
UserGroupInformation.getCurrentUser()));
}
private static RaidProtocol createRPCRaidnode(InetSocketAddress raidNodeAddr,
Configuration conf, UserGroupInformation ugi)
throws IOException {
LOG.debug("RaidShell connecting to " + raidNodeAddr);
return (RaidProtocol)RPC.getProxy(RaidProtocol.class,
RaidProtocol.versionID, raidNodeAddr, ugi, conf,
NetUtils.getSocketFactory(conf, RaidProtocol.class));
}
private static RaidProtocol createRaidnode(RaidProtocol rpcRaidnode)
throws IOException {
RetryPolicy createPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
5, 5000, TimeUnit.MILLISECONDS);
Map<Class<? extends Exception>,RetryPolicy> remoteExceptionToPolicyMap =
new HashMap<Class<? extends Exception>, RetryPolicy>();
Map<Class<? extends Exception>,RetryPolicy> exceptionToPolicyMap =
new HashMap<Class<? extends Exception>, RetryPolicy>();
exceptionToPolicyMap.put(RemoteException.class,
RetryPolicies.retryByRemoteException(
RetryPolicies.TRY_ONCE_THEN_FAIL, remoteExceptionToPolicyMap));
RetryPolicy methodPolicy = RetryPolicies.retryByException(
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
Map<String,RetryPolicy> methodNameToPolicyMap = new HashMap<String,RetryPolicy>();
methodNameToPolicyMap.put("create", methodPolicy);
return (RaidProtocol) RetryProxy.create(RaidProtocol.class,
rpcRaidnode, methodNameToPolicyMap);
}
private void checkOpen() throws IOException {
if (!clientRunning) {
IOException result = new IOException("RaidNode closed");
throw result;
}
}
/**
* Close the connection to the raidNode.
*/
public synchronized void close() throws IOException {
if(clientRunning) {
clientRunning = false;
RPC.stopProxy(rpcRaidnode);
}
}
/**
* Displays format of commands.
*/
private static void printUsage(String cmd) {
String prefix = "Usage: java " + RaidShell.class.getSimpleName();
if ("-showConfig".equals(cmd)) {
System.err.println("Usage: java RaidShell" +
" [-showConfig]");
} else if ("-recover".equals(cmd)) {
System.err.println("Usage: java RaidShell" +
" [-recover srcPath1 corruptOffset]");
} else if ("-recoverBlocks".equals(cmd)) {
System.err.println("Usage: java RaidShell" +
" [-recoverBlocks path1 path2...]");
} else {
System.err.println("Usage: java RaidShell");
System.err.println(" [-showConfig ]");
System.err.println(" [-help [cmd]]");
System.err.println(" [-recover srcPath1 corruptOffset]");
System.err.println(" [-recoverBlocks path1 path2...]");
System.err.println(" [-fsck [path]]");
System.err.println();
ToolRunner.printGenericCommandUsage(System.err);
}
}
/**
* run
*/
public int run(String argv[]) throws Exception {
if (argv.length < 1) {
printUsage("");
return -1;
}
int exitCode = -1;
int i = 0;
String cmd = argv[i++];
//
// verify that we have enough command line parameters
//
if ("-showConfig".equals(cmd)) {
if (argv.length < 1) {
printUsage(cmd);
return exitCode;
}
} else if ("-recover".equals(cmd)) {
if (argv.length < 3) {
printUsage(cmd);
return exitCode;
}
} else if ("-fsck".equals(cmd)) {
if ((argv.length < 1) || (argv.length > 2)) {
printUsage(cmd);
return exitCode;
}
}
try {
if ("-showConfig".equals(cmd)) {
initializeRpc(conf, RaidNode.getAddress(conf));
exitCode = showConfig(cmd, argv, i);
} else if ("-recover".equals(cmd)) {
initializeRpc(conf, RaidNode.getAddress(conf));
exitCode = recoverAndPrint(cmd, argv, i);
} else if ("-recoverBlocks".equals(cmd)) {
initializeLocal(conf);
recoverBlocks(argv, i);
exitCode = 0;
} else if ("-fsck".equals(cmd)) {
if (argv.length == 1) {
// if there are no args, check the whole file system
exitCode = fsck("/");
} else {
// argv.length == 2
// otherwise, check the path passed
exitCode = fsck(argv[1]);
}
} else {
exitCode = -1;
System.err.println(cmd.substring(1) + ": Unknown command");
printUsage("");
}
} catch (IllegalArgumentException arge) {
exitCode = -1;
System.err.println(cmd.substring(1) + ": " + arge.getLocalizedMessage());
printUsage(cmd);
} catch (RemoteException e) {
//
// This is a error returned by raidnode server. Print
// out the first line of the error mesage, ignore the stack trace.
exitCode = -1;
try {
String[] content;
content = e.getLocalizedMessage().split("\n");
System.err.println(cmd.substring(1) + ": " +
content[0]);
} catch (Exception ex) {
System.err.println(cmd.substring(1) + ": " +
ex.getLocalizedMessage());
}
} catch (IOException e) {
//
// IO exception encountered locally.
//
exitCode = -1;
System.err.println(cmd.substring(1) + ": " +
e.getLocalizedMessage());
} catch (Exception re) {
exitCode = -1;
System.err.println(cmd.substring(1) + ": " + re.getLocalizedMessage());
} finally {
}
return exitCode;
}
/**
* Apply operation specified by 'cmd' on all parameters
* starting from argv[startindex].
*/
private int showConfig(String cmd, String argv[], int startindex) throws IOException {
int exitCode = 0;
int i = startindex;
PolicyList[] all = raidnode.getAllPolicies();
for (PolicyList list: all) {
for (PolicyInfo p : list.getAll()) {
System.out.println(p);
}
}
return exitCode;
}
/**
* Recovers the specified path from the parity file
*/
public Path[] recover(String cmd, String argv[], int startindex)
throws IOException {
Path[] paths = new Path[(argv.length - startindex) / 2];
int j = 0;
for (int i = startindex; i < argv.length; i = i + 2) {
String path = argv[i];
long corruptOffset = Long.parseLong(argv[i+1]);
LOG.info("RaidShell recoverFile for " + path + " corruptOffset " + corruptOffset);
Path recovered = new Path("/tmp/recovered." + Time.now());
FileSystem fs = recovered.getFileSystem(conf);
DistributedFileSystem dfs = (DistributedFileSystem)fs;
Configuration raidConf = new Configuration(conf);
raidConf.set("fs.hdfs.impl",
"org.apache.hadoop.hdfs.DistributedRaidFileSystem");
raidConf.set("fs.raid.underlyingfs.impl",
"org.apache.hadoop.hdfs.DistributedFileSystem");
raidConf.setBoolean("fs.hdfs.impl.disable.cache", true);
java.net.URI dfsUri = dfs.getUri();
FileSystem raidFs = FileSystem.get(dfsUri, raidConf);
FileUtil.copy(raidFs, new Path(path), fs, recovered, false, conf);
paths[j] = recovered;
LOG.info("Raidshell created recovery file " + paths[j]);
j++;
}
return paths;
}
public int recoverAndPrint(String cmd, String argv[], int startindex)
throws IOException {
int exitCode = 0;
for (Path p : recover(cmd,argv,startindex)) {
System.out.println(p);
}
return exitCode;
}
public void recoverBlocks(String[] args, int startIndex)
throws IOException {
LOG.debug("Recovering blocks for " + (args.length - startIndex) + " files");
BlockFixer.BlockFixerHelper fixer = new BlockFixer.BlockFixerHelper(conf);
for (int i = startIndex; i < args.length; i++) {
String path = args[i];
fixer.fixFile(new Path(path));
}
}
/**
* checks whether a file has more than the allowable number of
* corrupt blocks and must therefore be considered corrupt
*/
private boolean isFileCorrupt(final DistributedFileSystem dfs,
final Path filePath)
throws IOException {
// corruptBlocksPerStripe:
// map stripe # -> # of corrupt blocks in that stripe (data + parity)
HashMap<Integer, Integer> corruptBlocksPerStripe =
new LinkedHashMap<Integer, Integer>();
// read conf
final int stripeBlocks = RaidNode.getStripeLength(conf);
// figure out which blocks are missing/corrupted
final FileStatus fileStatus = dfs.getFileStatus(filePath);
final long blockSize = fileStatus.getBlockSize();
final long fileLength = fileStatus.getLen();
final long fileLengthInBlocks = (fileLength / blockSize) +
(((fileLength % blockSize) == 0) ? 0L : 1L);
final long fileStripes = (fileLengthInBlocks / stripeBlocks) +
(((fileLengthInBlocks % stripeBlocks) == 0) ? 0L : 1L);
final BlockLocation[] fileBlocks =
dfs.getFileBlockLocations(fileStatus, 0, fileLength);
// figure out which stripes these corrupted blocks belong to
for (BlockLocation fileBlock: fileBlocks) {
int blockNo = (int) (fileBlock.getOffset() / blockSize);
final int stripe = (int) (blockNo / stripeBlocks);
if (fileBlock.isCorrupt() ||
(fileBlock.getNames().length == 0 && fileBlock.getLength() > 0)) {
if (corruptBlocksPerStripe.get(stripe) == null) {
corruptBlocksPerStripe.put(stripe, 1);
} else {
corruptBlocksPerStripe.put(stripe, corruptBlocksPerStripe.
get(stripe) + 1);
}
LOG.debug("file " + filePath.toString() + " corrupt in block " +
blockNo + "/" + fileLengthInBlocks + ", stripe " + stripe +
"/" + fileStripes);
} else {
LOG.debug("file " + filePath.toString() + " OK in block " + blockNo +
"/" + fileLengthInBlocks + ", stripe " + stripe + "/" +
fileStripes);
}
}
RaidInfo raidInfo = getFileRaidInfo(dfs, filePath);
// now check parity blocks
if (raidInfo.raidType != RaidType.NONE) {
checkParityBlocks(filePath, corruptBlocksPerStripe, blockSize,
fileStripes, raidInfo);
}
final int maxCorruptBlocksPerStripe = raidInfo.parityBlocksPerStripe;
for (int corruptBlocksInStripe: corruptBlocksPerStripe.values()) {
if (corruptBlocksInStripe > maxCorruptBlocksPerStripe) {
return true;
}
}
return false;
}
/**
* holds the type of raid used for a particular file
*/
private enum RaidType {
XOR,
RS,
NONE
}
/**
* holds raid type and parity file pair
*/
private class RaidInfo {
public RaidInfo(final RaidType raidType,
final RaidNode.ParityFilePair parityPair,
final int parityBlocksPerStripe) {
this.raidType = raidType;
this.parityPair = parityPair;
this.parityBlocksPerStripe = parityBlocksPerStripe;
}
public final RaidType raidType;
public final RaidNode.ParityFilePair parityPair;
public final int parityBlocksPerStripe;
}
/**
* returns the raid for a given file
*/
private RaidInfo getFileRaidInfo(final DistributedFileSystem dfs,
final Path filePath)
throws IOException {
// now look for the parity file
Path destPath = null;
RaidNode.ParityFilePair ppair = null;
try {
// look for xor parity file first
destPath = RaidNode.xorDestinationPath(conf);
ppair = RaidNode.getParityFile(destPath, filePath, conf);
} catch (FileNotFoundException ignore) {
}
if (ppair != null) {
return new RaidInfo(RaidType.XOR, ppair, 1);
} else {
// failing that, look for rs parity file
try {
destPath = RaidNode.rsDestinationPath(conf);
ppair = RaidNode.getParityFile(destPath, filePath, conf);
} catch (FileNotFoundException ignore) {
}
if (ppair != null) {
return new RaidInfo(RaidType.RS, ppair, RaidNode.rsParityLength(conf));
} else {
return new RaidInfo(RaidType.NONE, null, 0);
}
}
}
/**
* gets the parity blocks corresponding to file
* returns the parity blocks in case of DFS
* and the part blocks containing parity blocks
* in case of HAR FS
*/
private BlockLocation[] getParityBlocks(final Path filePath,
final long blockSize,
final long fileStripes,
final RaidInfo raidInfo)
throws IOException {
final String parityPathStr = raidInfo.parityPair.getPath().toUri().
getPath();
FileSystem parityFS = raidInfo.parityPair.getFileSystem();
// get parity file metadata
FileStatus parityFileStatus = parityFS.
getFileStatus(new Path(parityPathStr));
long parityFileLength = parityFileStatus.getLen();
if (parityFileLength != fileStripes * raidInfo.parityBlocksPerStripe *
blockSize) {
throw new IOException("expected parity file of length" +
(fileStripes * raidInfo.parityBlocksPerStripe *
blockSize) +
" but got parity file of length " +
parityFileLength);
}
BlockLocation[] parityBlocks =
parityFS.getFileBlockLocations(parityFileStatus, 0L, parityFileLength);
if (parityFS instanceof DistributedFileSystem ||
parityFS instanceof DistributedRaidFileSystem) {
long parityBlockSize = parityFileStatus.getBlockSize();
if (parityBlockSize != blockSize) {
throw new IOException("file block size is " + blockSize +
" but parity file block size is " +
parityBlockSize);
}
} else if (parityFS instanceof HarFileSystem) {
LOG.debug("HAR FS found");
} else {
LOG.warn("parity file system is not of a supported type");
}
return parityBlocks;
}
/**
* checks the parity blocks for a given file and modifies
* corruptBlocksPerStripe accordingly
*/
private void checkParityBlocks(final Path filePath,
final HashMap<Integer, Integer>
corruptBlocksPerStripe,
final long blockSize,
final long fileStripes,
final RaidInfo raidInfo)
throws IOException {
// get the blocks of the parity file
// because of har, multiple blocks may be returned as one container block
BlockLocation[] containerBlocks = getParityBlocks(filePath, blockSize,
fileStripes, raidInfo);
long parityStripeLength = blockSize *
((long) raidInfo.parityBlocksPerStripe);
long parityFileLength = parityStripeLength * fileStripes;
long parityBlocksFound = 0L;
for (BlockLocation cb: containerBlocks) {
if (cb.getLength() % blockSize != 0) {
throw new IOException("container block size is not " +
"multiple of parity block size");
}
int blocksInContainer = (int) (cb.getLength() / blockSize);
LOG.debug("found container with offset " + cb.getOffset() +
", length " + cb.getLength());
for (long offset = cb.getOffset();
offset < cb.getOffset() + cb.getLength();
offset += blockSize) {
long block = offset / blockSize;
int stripe = (int) (offset / parityStripeLength);
if (stripe < 0) {
// before the beginning of the parity file
continue;
}
if (stripe >= fileStripes) {
// past the end of the parity file
break;
}
parityBlocksFound++;
if (cb.isCorrupt() ||
(cb.getNames().length == 0 && cb.getLength() > 0)) {
LOG.debug("parity file for " + filePath.toString() +
" corrupt in block " + block +
", stripe " + stripe + "/" + fileStripes);
if (corruptBlocksPerStripe.get(stripe) == null) {
corruptBlocksPerStripe.put(stripe, 1);
} else {
corruptBlocksPerStripe.put(stripe,
corruptBlocksPerStripe.get(stripe) +
1);
}
} else {
LOG.debug("parity file for " + filePath.toString() +
" OK in block " + block +
", stripe " + stripe + "/" + fileStripes);
}
}
}
long parityBlocksExpected = raidInfo.parityBlocksPerStripe * fileStripes;
if (parityBlocksFound != parityBlocksExpected ) {
throw new IOException("expected " + parityBlocksExpected +
" parity blocks but got " + parityBlocksFound);
}
}
/**
* checks the raided file system, prints a list of corrupt files to
* System.out and returns the number of corrupt files
*/
public int fsck(final String path) throws IOException {
FileSystem fs = (new Path(path)).getFileSystem(conf);
// if we got a raid fs, get the underlying fs
if (fs instanceof DistributedRaidFileSystem) {
fs = ((DistributedRaidFileSystem) fs).getFileSystem();
}
// check that we have a distributed fs
if (!(fs instanceof DistributedFileSystem)) {
throw new IOException("expected DistributedFileSystem but got " +
fs.getClass().getName());
}
final DistributedFileSystem dfs = (DistributedFileSystem) fs;
// get conf settings
String xorPrefix = RaidNode.xorDestinationPath(conf).toUri().getPath();
String rsPrefix = RaidNode.rsDestinationPath(conf).toUri().getPath();
if (!xorPrefix.endsWith("/")) {
xorPrefix = xorPrefix + "/";
}
if (!rsPrefix.endsWith("/")) {
rsPrefix = rsPrefix + "/";
}
LOG.debug("prefixes: " + xorPrefix + ", " + rsPrefix);
// get a list of corrupted files (not considering parity blocks just yet)
// from the name node
// these are the only files we need to consider:
// if a file has no corrupted data blocks, it is OK even if some
// of its parity blocks are corrupted, so no further checking is
// necessary
final String[] files = RaidDFSUtil.getCorruptFiles(dfs);
final List<Path> corruptFileCandidates = new LinkedList<Path>();
for (final String f: files) {
final Path p = new Path(f);
// if this file is a parity file
// or if it does not start with the specified path,
// ignore it
if (!p.toString().startsWith(xorPrefix) &&
!p.toString().startsWith(rsPrefix) &&
p.toString().startsWith(path)) {
corruptFileCandidates.add(p);
}
}
// filter files marked for deletion
RaidUtils.filterTrash(conf, corruptFileCandidates);
int numberOfCorruptFiles = 0;
for (final Path corruptFileCandidate: corruptFileCandidates) {
if (isFileCorrupt(dfs, corruptFileCandidate)) {
System.out.println(corruptFileCandidate.toString());
numberOfCorruptFiles++;
}
}
return numberOfCorruptFiles;
}
/**
* main() has some simple utility methods
*/
public static void main(String argv[]) throws Exception {
RaidShell shell = null;
try {
shell = new RaidShell(new Configuration());
int res = ToolRunner.run(shell, argv);
System.exit(res);
} catch (RPC.VersionMismatch v) {
System.err.println("Version Mismatch between client and server" +
"... command aborted.");
System.exit(-1);
} catch (IOException e) {
System.err.
println("Bad connection to RaidNode or NameNode. command aborted.");
System.err.println(e.getMessage());
System.exit(-1);
} finally {
shell.close();
}
}
}

View File

@ -1,171 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
public class RaidUtils {
/**
* A {@link Progressable} that does nothing.
*
* We could have used Reporter.NULL here but that would introduce
* a dependency on mapreduce.
*/
public static class DummyProgressable implements Progressable {
/**
* Do nothing.
*/
@Override
public void progress() {
}
}
/**
* Removes files matching the trash file pattern.
*/
public static void filterTrash(Configuration conf, List<Path> files) {
// Remove files under Trash.
String trashPattern = conf.get("raid.blockfixer.trash.pattern",
"^/user/.*/\\.Trash.*");
for (Iterator<Path> it = files.iterator(); it.hasNext(); ) {
String pathStr = it.next().toString();
if (Pattern.matches(trashPattern, pathStr)) {
it.remove();
}
}
}
public static void readTillEnd(InputStream in, byte[] buf, boolean eofOK)
throws IOException {
int toRead = buf.length;
int numRead = 0;
while (numRead < toRead) {
int nread = in.read(buf, numRead, toRead - numRead);
if (nread < 0) {
if (eofOK) {
// EOF hit, fill with zeros
Arrays.fill(buf, numRead, toRead, (byte)0);
numRead = toRead;
} else {
// EOF hit, throw.
throw new IOException("Premature EOF");
}
} else {
numRead += nread;
}
}
}
public static void copyBytes(
InputStream in, OutputStream out, byte[] buf, long count)
throws IOException {
for (long bytesRead = 0; bytesRead < count; ) {
int toRead = Math.min(buf.length, (int)(count - bytesRead));
IOUtils.readFully(in, buf, 0, toRead);
bytesRead += toRead;
out.write(buf, 0, toRead);
}
}
public static class ZeroInputStream extends InputStream
implements Seekable, PositionedReadable {
private long endOffset;
private long pos;
public ZeroInputStream(long endOffset) {
this.endOffset = endOffset;
this.pos = 0;
}
@Override
public int read() throws IOException {
if (pos < endOffset) {
pos++;
return 0;
}
return -1;
}
@Override
public int available() throws IOException {
return (int)(endOffset - pos);
}
@Override
public long getPos() throws IOException {
return pos;
}
@Override
public void seek(long seekOffset) throws IOException {
if (seekOffset < endOffset) {
pos = seekOffset;
} else {
throw new IOException("Illegal Offset" + pos);
}
}
@Override
public boolean seekToNewSource(long targetPos) throws IOException {
return false;
}
@Override
public int read(long position, byte[] buffer, int offset, int length)
throws IOException {
int count = 0;
for (; position < endOffset && count < length; position++) {
buffer[offset + count] = 0;
count++;
}
return count;
}
@Override
public void readFully(long position, byte[] buffer, int offset, int length)
throws IOException {
int count = 0;
for (; position < endOffset && count < length; position++) {
buffer[offset + count] = 0;
count++;
}
if (count < length) {
throw new IOException("Premature EOF");
}
}
@Override
public void readFully(long position, byte[] buffer) throws IOException {
readFully(position, buffer, 0, buffer.length);
}
}
}

View File

@ -1,183 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.util.Set;
public class ReedSolomonCode implements ErasureCode {
private final int stripeSize;
private final int paritySize;
private final int[] generatingPolynomial;
private final int PRIMITIVE_ROOT = 2;
private final int[] primitivePower;
private final GaloisField GF = GaloisField.getInstance();
private int[] errSignature;
private final int[] paritySymbolLocations;
private final int[] dataBuff;
public ReedSolomonCode(int stripeSize, int paritySize) {
assert(stripeSize + paritySize < GF.getFieldSize());
this.stripeSize = stripeSize;
this.paritySize = paritySize;
this.errSignature = new int[paritySize];
this.paritySymbolLocations = new int[paritySize];
this.dataBuff = new int[paritySize + stripeSize];
for (int i = 0; i < paritySize; i++) {
paritySymbolLocations[i] = i;
}
this.primitivePower = new int[stripeSize + paritySize];
// compute powers of the primitive root
for (int i = 0; i < stripeSize + paritySize; i++) {
primitivePower[i] = GF.power(PRIMITIVE_ROOT, i);
}
// compute generating polynomial
int[] gen = {1};
int[] poly = new int[2];
for (int i = 0; i < paritySize; i++) {
poly[0] = primitivePower[i];
poly[1] = 1;
gen = GF.multiply(gen, poly);
}
// generating polynomial has all generating roots
generatingPolynomial = gen;
}
@Override
public void encode(int[] message, int[] parity) {
assert(message.length == stripeSize && parity.length == paritySize);
for (int i = 0; i < paritySize; i++) {
dataBuff[i] = 0;
}
for (int i = 0; i < stripeSize; i++) {
dataBuff[i + paritySize] = message[i];
}
GF.remainder(dataBuff, generatingPolynomial);
for (int i = 0; i < paritySize; i++) {
parity[i] = dataBuff[i];
}
}
@Override
public void decode(int[] data, int[] erasedLocation, int[] erasedValue) {
if (erasedLocation.length == 0) {
return;
}
assert(erasedLocation.length == erasedValue.length);
for (int i = 0; i < erasedLocation.length; i++) {
data[erasedLocation[i]] = 0;
}
for (int i = 0; i < erasedLocation.length; i++) {
errSignature[i] = primitivePower[erasedLocation[i]];
erasedValue[i] = GF.substitute(data, primitivePower[i]);
}
GF.solveVandermondeSystem(errSignature, erasedValue, erasedLocation.length);
}
@Override
public int stripeSize() {
return this.stripeSize;
}
@Override
public int paritySize() {
return this.paritySize;
}
@Override
public int symbolSize() {
return (int) Math.round(Math.log(GF.getFieldSize()) / Math.log(2));
}
/**
* Given parity symbols followed by message symbols, return the locations of
* symbols that are corrupted. Can resolve up to (parity length / 2) error
* locations.
* @param data The message and parity. The parity should be placed in the
* first part of the array. In each integer, the relevant portion
* is present in the least significant bits of each int.
* The number of elements in data is stripeSize() + paritySize().
* <b>Note that data may be changed after calling this method.</b>
* @param errorLocations The set to put the error location results
* @return true If the locations can be resolved, return true.
*/
public boolean computeErrorLocations(int[] data,
Set<Integer> errorLocations) {
assert(data.length == paritySize + stripeSize && errorLocations != null);
errorLocations.clear();
int maxError = paritySize / 2;
int[][] syndromeMatrix = new int[maxError][];
for (int i = 0; i < syndromeMatrix.length; ++i) {
syndromeMatrix[i] = new int[maxError + 1];
}
int[] syndrome = new int[paritySize];
if (computeSyndrome(data, syndrome)) {
// Parity check OK. No error location added.
return true;
}
for (int i = 0; i < maxError; ++i) {
for (int j = 0; j < maxError + 1; ++j) {
syndromeMatrix[i][j] = syndrome[i + j];
}
}
GF.gaussianElimination(syndromeMatrix);
int[] polynomial = new int[maxError + 1];
polynomial[0] = 1;
for (int i = 0; i < maxError; ++i) {
polynomial[i + 1] = syndromeMatrix[maxError - 1 - i][maxError];
}
for (int i = 0; i < paritySize + stripeSize; ++i) {
int possibleRoot = GF.divide(1, primitivePower[i]);
if (GF.substitute(polynomial, possibleRoot) == 0) {
errorLocations.add(i);
}
}
// Now recover with error locations and check the syndrome again
int[] locations = new int[errorLocations.size()];
int k = 0;
for (int loc : errorLocations) {
locations[k++] = loc;
}
int [] erasedValue = new int[locations.length];
decode(data, locations, erasedValue);
for (int i = 0; i < locations.length; ++i) {
data[locations[i]] = erasedValue[i];
}
return computeSyndrome(data, syndrome);
}
/**
* Compute the syndrome of the input [parity, message]
* @param data [parity, message]
* @param syndrome The syndromes (checksums) of the data
* @return true If syndromes are all zeros
*/
private boolean computeSyndrome(int[] data, int [] syndrome) {
boolean corruptionFound = false;
for (int i = 0; i < paritySize; i++) {
syndrome[i] = GF.substitute(data, primitivePower[i]);
if (syndrome[i] != 0) {
corruptionFound = true;
}
}
return !corruptionFound;
}
}

View File

@ -1,226 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.OutputStream;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.BlockMissingException;
public class ReedSolomonDecoder extends Decoder {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.ReedSolomonDecoder");
private ErasureCode reedSolomonCode;
public ReedSolomonDecoder(
Configuration conf, int stripeSize, int paritySize) {
super(conf, stripeSize, paritySize);
this.reedSolomonCode = new ReedSolomonCode(stripeSize, paritySize);
}
@Override
protected void fixErasedBlock(
FileSystem fs, Path srcFile,
FileSystem parityFs, Path parityFile,
long blockSize, long errorOffset, long bytesToSkip, long limit,
OutputStream out) throws IOException {
FSDataInputStream[] inputs = new FSDataInputStream[stripeSize + paritySize];
int[] erasedLocations = buildInputs(fs, srcFile, parityFs, parityFile,
errorOffset, inputs);
int blockIdxInStripe = ((int)(errorOffset/blockSize)) % stripeSize;
int erasedLocationToFix = paritySize + blockIdxInStripe;
writeFixedBlock(inputs, erasedLocations, erasedLocationToFix,
bytesToSkip, limit, out);
}
protected int[] buildInputs(FileSystem fs, Path srcFile,
FileSystem parityFs, Path parityFile,
long errorOffset, FSDataInputStream[] inputs)
throws IOException {
LOG.info("Building inputs to recover block starting at " + errorOffset);
FileStatus srcStat = fs.getFileStatus(srcFile);
long blockSize = srcStat.getBlockSize();
long blockIdx = (int)(errorOffset / blockSize);
long stripeIdx = blockIdx / stripeSize;
LOG.info("FileSize = " + srcStat.getLen() + ", blockSize = " + blockSize +
", blockIdx = " + blockIdx + ", stripeIdx = " + stripeIdx);
ArrayList<Integer> erasedLocations = new ArrayList<Integer>();
// First open streams to the parity blocks.
for (int i = 0; i < paritySize; i++) {
long offset = blockSize * (stripeIdx * paritySize + i);
FSDataInputStream in = parityFs.open(
parityFile, conf.getInt("io.file.buffer.size", 64 * 1024));
in.seek(offset);
LOG.info("Adding " + parityFile + ":" + offset + " as input " + i);
inputs[i] = in;
}
// Now open streams to the data blocks.
for (int i = paritySize; i < paritySize + stripeSize; i++) {
long offset = blockSize * (stripeIdx * stripeSize + i - paritySize);
if (offset == errorOffset) {
LOG.info(srcFile + ":" + offset +
" is known to have error, adding zeros as input " + i);
inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(
offset + blockSize));
erasedLocations.add(i);
} else if (offset > srcStat.getLen()) {
LOG.info(srcFile + ":" + offset +
" is past file size, adding zeros as input " + i);
inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(
offset + blockSize));
} else {
FSDataInputStream in = fs.open(
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
in.seek(offset);
LOG.info("Adding " + srcFile + ":" + offset + " as input " + i);
inputs[i] = in;
}
}
if (erasedLocations.size() > paritySize) {
String msg = "Too many erased locations: " + erasedLocations.size();
LOG.error(msg);
throw new IOException(msg);
}
int[] locs = new int[erasedLocations.size()];
for (int i = 0; i < locs.length; i++) {
locs[i] = erasedLocations.get(i);
}
return locs;
}
/**
* Decode the inputs provided and write to the output.
* @param inputs array of inputs.
* @param erasedLocations indexes in the inputs which are known to be erased.
* @param erasedLocationToFix index in the inputs which needs to be fixed.
* @param skipBytes number of bytes to skip before writing to output.
* @param limit maximum number of bytes to be written/skipped.
* @param out the output.
* @throws IOException
*/
void writeFixedBlock(
FSDataInputStream[] inputs,
int[] erasedLocations,
int erasedLocationToFix,
long skipBytes,
long limit,
OutputStream out) throws IOException {
LOG.info("Need to write " + (limit - skipBytes) +
" bytes for erased location index " + erasedLocationToFix);
int[] tmp = new int[inputs.length];
int[] decoded = new int[erasedLocations.length];
long toDiscard = skipBytes;
// Loop while the number of skipped + written bytes is less than the max.
for (long written = 0; skipBytes + written < limit; ) {
erasedLocations = readFromInputs(inputs, erasedLocations, limit);
if (decoded.length != erasedLocations.length) {
decoded = new int[erasedLocations.length];
}
int toWrite = (int)Math.min((long)bufSize, limit - (skipBytes + written));
if (toDiscard >= toWrite) {
toDiscard -= toWrite;
continue;
}
// Decoded bufSize amount of data.
for (int i = 0; i < bufSize; i++) {
performDecode(readBufs, writeBufs, i, tmp, erasedLocations, decoded);
}
for (int i = 0; i < erasedLocations.length; i++) {
if (erasedLocations[i] == erasedLocationToFix) {
toWrite -= toDiscard;
out.write(writeBufs[i], (int)toDiscard, toWrite);
toDiscard = 0;
written += toWrite;
LOG.debug("Wrote " + toWrite + " bytes for erased location index " +
erasedLocationToFix);
break;
}
}
}
}
int[] readFromInputs(
FSDataInputStream[] inputs,
int[] erasedLocations,
long limit) throws IOException {
// For every input, read some data = bufSize.
for (int i = 0; i < inputs.length; i++) {
long curPos = inputs[i].getPos();
try {
RaidUtils.readTillEnd(inputs[i], readBufs[i], true);
continue;
} catch (BlockMissingException e) {
LOG.error("Encountered BlockMissingException in stream " + i);
} catch (ChecksumException e) {
LOG.error("Encountered ChecksumException in stream " + i);
}
// Found a new erased location.
if (erasedLocations.length == paritySize) {
String msg = "Too many read errors";
LOG.error(msg);
throw new IOException(msg);
}
// Add this stream to the set of erased locations.
int[] newErasedLocations = new int[erasedLocations.length + 1];
for (int j = 0; j < erasedLocations.length; j++) {
newErasedLocations[j] = erasedLocations[j];
}
newErasedLocations[newErasedLocations.length - 1] = i;
erasedLocations = newErasedLocations;
LOG.info("Using zeros for stream " + i);
inputs[i] = new FSDataInputStream(
new RaidUtils.ZeroInputStream(curPos + limit));
inputs[i].seek(curPos);
RaidUtils.readTillEnd(inputs[i], readBufs[i], true);
}
return erasedLocations;
}
void performDecode(byte[][] readBufs, byte[][] writeBufs,
int idx, int[] inputs,
int[] erasedLocations, int[] decoded) {
for (int i = 0; i < decoded.length; i++) {
decoded[i] = 0;
}
for (int i = 0; i < inputs.length; i++) {
inputs[i] = readBufs[i][idx] & 0x000000FF;
}
reedSolomonCode.decode(inputs, erasedLocations, decoded);
for (int i = 0; i < decoded.length; i++) {
writeBufs[i][idx] = (byte)decoded[i];
}
}
}

View File

@ -1,96 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Progressable;
public class ReedSolomonEncoder extends Encoder {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.ReedSolomonEncoder");
private ErasureCode reedSolomonCode;
public ReedSolomonEncoder(
Configuration conf, int stripeSize, int paritySize) {
super(conf, stripeSize, paritySize);
this.reedSolomonCode = new ReedSolomonCode(stripeSize, paritySize);
}
protected void encodeStripe(
InputStream[] blocks,
long stripeStartOffset,
long blockSize,
OutputStream[] outs,
Progressable reporter) throws IOException {
int[] data = new int[stripeSize];
int[] code = new int[paritySize];
for (long encoded = 0; encoded < blockSize; encoded += bufSize) {
// Read some data from each block = bufSize.
for (int i = 0; i < blocks.length; i++) {
RaidUtils.readTillEnd(blocks[i], readBufs[i], true);
}
// Encode the data read.
for (int j = 0; j < bufSize; j++) {
performEncode(readBufs, writeBufs, j, data, code);
}
// Now that we have some data to write, send it to the temp files.
for (int i = 0; i < paritySize; i++) {
outs[i].write(writeBufs[i], 0, bufSize);
}
if (reporter != null) {
reporter.progress();
}
}
}
void performEncode(byte[][] readBufs, byte[][] writeBufs, int idx,
int[] data, int[] code) {
for (int i = 0; i < paritySize; i++) {
code[i] = 0;
}
for (int i = 0; i < stripeSize; i++) {
data[i] = readBufs[i][idx] & 0x000000FF;
}
reedSolomonCode.encode(data, code);
for (int i = 0; i < paritySize; i++) {
writeBufs[i][idx] = (byte)code[i];
}
}
@Override
public Path getParityTempPath() {
return new Path(RaidNode.rsTempPrefix(conf));
}
}

View File

@ -1,92 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.OutputStream;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
public class XORDecoder extends Decoder {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.XORDecoder");
public XORDecoder(
Configuration conf, int stripeSize) {
super(conf, stripeSize, 1);
}
@Override
protected void fixErasedBlock(
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
long blockSize, long errorOffset, long bytesToSkip, long limit,
OutputStream out) throws IOException {
LOG.info("Fixing block at " + srcFile + ":" + errorOffset +
", skipping " + bytesToSkip + ", limit " + limit);
FileStatus srcStat = fs.getFileStatus(srcFile);
ArrayList<FSDataInputStream> xorinputs = new ArrayList<FSDataInputStream>();
FSDataInputStream parityFileIn = parityFs.open(parityFile);
parityFileIn.seek(parityOffset(errorOffset, blockSize));
xorinputs.add(parityFileIn);
long errorBlockOffset = (errorOffset / blockSize) * blockSize;
long[] srcOffsets = stripeOffsets(errorOffset, blockSize);
for (int i = 0; i < srcOffsets.length; i++) {
if (srcOffsets[i] == errorBlockOffset) {
LOG.info("Skipping block at " + srcFile + ":" + errorBlockOffset);
continue;
}
if (srcOffsets[i] < srcStat.getLen()) {
FSDataInputStream in = fs.open(srcFile);
in.seek(srcOffsets[i]);
xorinputs.add(in);
}
}
FSDataInputStream[] inputs = xorinputs.toArray(
new FSDataInputStream[]{null});
ParityInputStream recovered =
new ParityInputStream(inputs, limit, readBufs[0], writeBufs[0]);
recovered.skip(bytesToSkip);
recovered.drain(out, null);
}
protected long[] stripeOffsets(long errorOffset, long blockSize) {
long[] offsets = new long[stripeSize];
long stripeIdx = errorOffset / (blockSize * stripeSize);
long startOffsetOfStripe = stripeIdx * stripeSize * blockSize;
for (int i = 0; i < stripeSize; i++) {
offsets[i] = startOffsetOfStripe + i * blockSize;
}
return offsets;
}
protected long parityOffset(long errorOffset, long blockSize) {
long stripeIdx = errorOffset / (blockSize * stripeSize);
return stripeIdx * blockSize;
}
}

View File

@ -1,63 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.OutputStream;
import java.io.InputStream;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Progressable;
public class XOREncoder extends Encoder {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.XOREncoder");
public XOREncoder(
Configuration conf, int stripeSize) {
super(conf, stripeSize, 1);
}
@Override
protected void encodeStripe(
InputStream[] blocks,
long stripeStartOffset,
long blockSize,
OutputStream[] outs,
Progressable reporter) throws IOException {
LOG.info("Peforming XOR ");
ParityInputStream parityIn =
new ParityInputStream(blocks, blockSize, readBufs[0], writeBufs[0]);
try {
parityIn.drain(outs[0], reporter);
} finally {
parityIn.close();
}
}
@Override
public Path getParityTempPath() {
return new Path(RaidNode.unraidTmpDirectory(conf));
}
}

View File

@ -1,256 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid.protocol;
import java.io.IOException;
import java.io.DataInput;
import java.io.DataOutput;
import java.util.Properties;
import java.util.Enumeration;
import java.lang.Math;
import java.text.SimpleDateFormat;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableFactories;
import org.apache.hadoop.io.WritableFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
/**
* Maintains information about one policy
*/
public class PolicyInfo implements Writable {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.protocol.PolicyInfo");
protected static final SimpleDateFormat dateFormat =
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private Path srcPath; // the specified src path
private String policyName; // name of policy
private ErasureCodeType codeType;// the erasure code used
private String description; // A verbose description of this policy
private Configuration conf; // Hadoop configuration
private Properties properties; // Policy-dependent properties
private ReentrantReadWriteLock plock; // protects policy operations.
public static enum ErasureCodeType {
XOR, RS;
public static ErasureCodeType fromString(String s) {
if (XOR.toString().equalsIgnoreCase(s)) {
return XOR;
}
if (RS.toString().equalsIgnoreCase(s)) {
return RS;
}
return null;
}
}
/**
* Create the empty object
*/
public PolicyInfo() {
this.conf = null;
this.policyName = "";
this.description = "";
this.srcPath = null;
this.properties = new Properties();
this.plock = new ReentrantReadWriteLock();
}
/**
* Create the metadata that describes a policy
*/
public PolicyInfo(String policyName, Configuration conf) {
this.conf = conf;
this.policyName = policyName;
this.description = "";
this.srcPath = null;
this.properties = new Properties();
this.plock = new ReentrantReadWriteLock();
}
/**
* Copy fields from another PolicyInfo
*/
public void copyFrom(PolicyInfo other) {
if (other.conf != null) {
this.conf = other.conf;
}
if (other.policyName != null && other.policyName.length() > 0) {
this.policyName = other.policyName;
}
if (other.description != null && other.description.length() > 0) {
this.description = other.description;
}
if (other.codeType != null) {
this.codeType = other.codeType;
}
if (other.srcPath != null) {
this.srcPath = other.srcPath;
}
for (Object key : other.properties.keySet()) {
String skey = (String) key;
this.properties.setProperty(skey, other.properties.getProperty(skey));
}
}
/**
* Sets the input path on which this policy has to be applied
*/
public void setSrcPath(String in) throws IOException {
srcPath = new Path(in);
srcPath = srcPath.makeQualified(srcPath.getFileSystem(conf));
}
/**
* Set the erasure code type used in this policy
*/
public void setErasureCode(String code) {
this.codeType = ErasureCodeType.fromString(code);
}
/**
* Set the description of this policy.
*/
public void setDescription(String des) {
this.description = des;
}
/**
* Sets an internal property.
* @param name property name.
* @param value property value.
*/
public void setProperty(String name, String value) {
properties.setProperty(name, value);
}
/**
* Returns the value of an internal property.
* @param name property name.
*/
public String getProperty(String name) {
return properties.getProperty(name);
}
/**
* Get the name of this policy.
*/
public String getName() {
return this.policyName;
}
/**
* Get the destination path of this policy.
*/
public ErasureCodeType getErasureCode() {
return this.codeType;
}
/**
* Get the srcPath
*/
public Path getSrcPath() {
return srcPath;
}
/**
* Get the expanded (unglobbed) forms of the srcPaths
*/
public Path[] getSrcPathExpanded() throws IOException {
FileSystem fs = srcPath.getFileSystem(conf);
// globbing on srcPath
FileStatus[] gpaths = fs.globStatus(srcPath);
if (gpaths == null) {
return null;
}
Path[] values = new Path[gpaths.length];
for (int i = 0; i < gpaths.length; i++) {
Path p = gpaths[i].getPath();
values[i] = p.makeQualified(fs);
}
return values;
}
/**
* Convert this policy into a printable form
*/
public String toString() {
StringBuffer buff = new StringBuffer();
buff.append("Policy Name:\t" + policyName + " --------------------\n");
buff.append("Source Path:\t" + srcPath + "\n");
buff.append("Erasure Code:\t" + codeType + "\n");
for (Enumeration<?> e = properties.propertyNames(); e.hasMoreElements();) {
String name = (String) e.nextElement();
buff.append( name + ":\t" + properties.getProperty(name) + "\n");
}
if (description.length() > 0) {
int len = Math.min(description.length(), 80);
String sub = description.substring(0, len).trim();
sub = sub.replaceAll("\n", " ");
buff.append("Description:\t" + sub + "...\n");
}
return buff.toString();
}
//////////////////////////////////////////////////
// Writable
//////////////////////////////////////////////////
static { // register a ctor
WritableFactories.setFactory
(PolicyInfo.class,
new WritableFactory() {
public Writable newInstance() { return new PolicyInfo(); }
});
}
public void write(DataOutput out) throws IOException {
Text.writeString(out, srcPath.toString());
Text.writeString(out, policyName);
Text.writeString(out, codeType.toString());
Text.writeString(out, description);
out.writeInt(properties.size());
for (Enumeration<?> e = properties.propertyNames(); e.hasMoreElements();) {
String name = (String) e.nextElement();
Text.writeString(out, name);
Text.writeString(out, properties.getProperty(name));
}
}
public void readFields(DataInput in) throws IOException {
this.srcPath = new Path(Text.readString(in));
this.policyName = Text.readString(in);
this.codeType = ErasureCodeType.fromString(Text.readString(in));
this.description = Text.readString(in);
for (int n = in.readInt(); n>0; n--) {
String name = Text.readString(in);
String value = Text.readString(in);
properties.setProperty(name,value);
}
}
}

View File

@ -1,106 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid.protocol;
import java.io.IOException;
import java.io.DataInput;
import java.io.DataOutput;
import java.util.Collection;
import java.util.List;
import java.util.LinkedList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableFactories;
import org.apache.hadoop.io.WritableFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.conf.Configuration;
/**
* Maintains informatiom about all policies that belong to a category.
* These policies have to be applied one-at-a-time and cannot be run
* simultaneously.
*/
public class PolicyList implements Writable {
public static final Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.protocol.PolicyList");
private List<PolicyInfo> category; // list of policies
private Path srcPath;
/**
* Create a new category of policies.
*/
public PolicyList() {
this.category = new LinkedList<PolicyInfo>();
this.srcPath = null;
}
/**
* Add a new policy to this category.
*/
public void add(PolicyInfo info) {
category.add(info);
}
public void setSrcPath(Configuration conf, String src) throws IOException {
srcPath = new Path(src);
srcPath = srcPath.makeQualified(srcPath.getFileSystem(conf));
}
public Path getSrcPath() {
return srcPath;
}
/**
* Returns the policies in this category
*/
public Collection<PolicyInfo> getAll() {
return category;
}
//////////////////////////////////////////////////
// Writable
//////////////////////////////////////////////////
static { // register a ctor
WritableFactories.setFactory
(PolicyList.class,
new WritableFactory() {
public Writable newInstance() { return new PolicyList(); }
});
}
public void write(DataOutput out) throws IOException {
out.writeInt(category.size());
for (PolicyInfo p : category) {
p.write(out);
}
}
public void readFields(DataInput in) throws IOException {
int count = in.readInt();
for (int i = 0; i < count; i++) {
PolicyInfo p = new PolicyInfo();
p.readFields(in);
add(p);
}
}
}

View File

@ -1,58 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid.protocol;
import java.util.Collection;
import java.io.IOException;
import org.apache.hadoop.ipc.VersionedProtocol;
import org.apache.hadoop.fs.Path;
/**********************************************************************
* RaidProtocol is used by user code
* {@link org.apache.hadoop.raid.RaidShell} class to communicate
* with the RaidNode. User code can manipulate the configured policies.
*
**********************************************************************/
public interface RaidProtocol extends VersionedProtocol {
/**
* Compared to the previous version the following changes have been introduced:
* Only the latest change is reflected.
* 1: new protocol introduced
*/
public static final long versionID = 1L;
/**
* Get a listing of all configured policies
* @throws IOException
* return all categories of configured policies
*/
public PolicyList[] getAllPolicies() throws IOException;
/**
* Unraid the specified input path. This is called when the specified file
* is corrupted. This call will move the specified file to file.old
* and then recover it from the RAID subsystem.
*
* @param inputPath The absolute pathname of the file to be recovered.
* @param corruptOffset The offset that has the corruption
*/
public String recoverFile(String inputPath, long corruptOffset) throws IOException;
}

View File

@ -1,45 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Start hadoop RaidNode process on machine specified on file conf/raidnode
usage="Usage: start-raidnode-remote.sh"
params=$#
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
# get arguments
if [ $# -ge 1 ]; then
echo $usage
exit
fi
if [ -f "${HADOOP_CONF_DIR}/raidnode" ]; then
export HADOOP_SLAVES="${HADOOP_CONF_DIR}/raidnode"
echo "Starting raidnode at "`cat ${HADOOP_SLAVES}`
"$bin"/slaves.sh --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/start-raidnode.sh"
else
echo "No raidnode file in ${HADOOP_CONF_DIR}/raidnode"
fi

View File

@ -1,42 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Start hadoop RaidNode process
# Run this on RaidNode machine
usage="Usage: start-raidnode.sh"
params=$#
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
# get arguments
if [ $# -ge 1 ]; then
echo $usage
fi
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
fi
export HADOOP_OPTS="$HADOOP_OPTS $HADOOP_RAIDNODE_OPTS"
"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start org.apache.hadoop.raid.RaidNode

View File

@ -1,42 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Stop hadoop RaidNode process on machine specified on file conf/raidnode
usage="Usage: stop-raidnode-remote.sh"
params=$#
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
# get arguments
if [ $# -ge 1 ]; then
echo $usage
fi
if [ -f "${HADOOP_CONF_DIR}/raidnode" ]; then
export HADOOP_SLAVES="${HADOOP_CONF_DIR}/raidnode"
echo "Stopping raidnode at "`cat ${HADOOP_SLAVES}`
"$bin"/slaves.sh --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/stop-raidnode.sh"
else
echo "No raidnode file in ${HADOOP_CONF_DIR}/raidnode"
fi

View File

@ -1,39 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Stop hadoop RaidNode process
# Run this on RaidNode machine.
usage="Usage: stop-raidnode.sh"
params=$#
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
# get arguments
if [ $# -ge 1 ]; then
echo $usage
fi
export HADOOP_OPTS="$HADOOP_OPTS $HADOOP_RAIDNODE_OPTS"
"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop org.apache.hadoop.raid.RaidNode

View File

@ -1,501 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.net.URI;
import java.util.Random;
import java.util.regex.Pattern;
import java.util.zip.CRC32;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.raid.RaidNode;
import org.apache.hadoop.raid.RaidUtils;
import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType;
import org.apache.hadoop.util.StringUtils;
import org.junit.Test;
public class TestRaidDfs {
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"target/test-data")).getAbsolutePath();
final static String LOG_DIR = "target/raidlog";
final static long RELOAD_INTERVAL = 1000;
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidDfs");
final static int NUM_DATANODES = 3;
Configuration conf;
String namenode = null;
String hftp = null;
MiniDFSCluster dfs = null;
FileSystem fileSys = null;
String jobTrackerName = null;
ErasureCodeType code;
int stripeLength;
private void mySetup(
String erasureCode, int rsParityLength) throws Exception {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
conf.set("fs.raid.recoverylogdir", LOG_DIR);
conf.setInt(RaidNode.RS_PARITY_LENGTH_KEY, rsParityLength);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// make all deletions not go through Trash
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
// do not use map-reduce cluster for Raiding
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
conf.set("raid.server.address", "localhost:0");
conf.setInt("hdfs.raid.stripeLength", stripeLength);
conf.set("xor".equals(erasureCode) ? RaidNode.RAID_LOCATION_KEY :
RaidNode.RAIDRS_LOCATION_KEY, "/destraid");
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
dfs.waitActive();
fileSys = dfs.getFileSystem();
namenode = fileSys.getUri().toString();
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
FileSystem.setDefaultUri(conf, namenode);
}
private void myTearDown() throws Exception {
if (dfs != null) { dfs.shutdown(); }
}
private LocatedBlocks getBlockLocations(Path file, long length)
throws IOException {
DistributedFileSystem dfs = (DistributedFileSystem) fileSys;
return RaidDFSUtil.getBlockLocations(
dfs, file.toUri().getPath(), 0, length);
}
private LocatedBlocks getBlockLocations(Path file)
throws IOException {
FileStatus stat = fileSys.getFileStatus(file);
return getBlockLocations(file, stat.getLen());
}
private DistributedRaidFileSystem getRaidFS() throws IOException {
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
Configuration clientConf = new Configuration(conf);
clientConf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedRaidFileSystem");
clientConf.set("fs.raid.underlyingfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
URI dfsUri = dfs.getUri();
return (DistributedRaidFileSystem)FileSystem.get(dfsUri, clientConf);
}
public static void waitForFileRaided(
Log logger, FileSystem fileSys, Path file, Path destPath)
throws IOException, InterruptedException {
FileStatus parityStat = null;
String fileName = file.getName().toString();
// wait till file is raided
while (parityStat == null) {
logger.info("Waiting for files to be raided.");
try {
FileStatus[] listPaths = fileSys.listStatus(destPath);
if (listPaths != null) {
for (FileStatus f : listPaths) {
logger.info("File raided so far : " + f.getPath());
String found = f.getPath().getName().toString();
if (fileName.equals(found)) {
parityStat = f;
break;
}
}
}
} catch (FileNotFoundException e) {
//ignore
}
Thread.sleep(1000); // keep waiting
}
while (true) {
LocatedBlocks locations = null;
DistributedFileSystem dfs = (DistributedFileSystem) fileSys;
locations = RaidDFSUtil.getBlockLocations(
dfs, file.toUri().getPath(), 0, parityStat.getLen());
if (!locations.isUnderConstruction()) {
break;
}
Thread.sleep(1000);
}
while (true) {
FileStatus stat = fileSys.getFileStatus(file);
if (stat.getReplication() == 1) break;
Thread.sleep(1000);
}
}
private void corruptBlockAndValidate(Path srcFile, Path destPath,
int[] listBlockNumToCorrupt, long blockSize, int numBlocks)
throws IOException, InterruptedException {
int repl = 1;
long crc = createTestFilePartialLastBlock(fileSys, srcFile, repl,
numBlocks, blockSize);
long length = fileSys.getFileStatus(srcFile).getLen();
RaidNode.doRaid(conf, fileSys.getFileStatus(srcFile),
destPath, code, new RaidNode.Statistics(), new RaidUtils.DummyProgressable(),
false, repl, repl, stripeLength);
// Delete first block of file
for (int blockNumToCorrupt : listBlockNumToCorrupt) {
LOG.info("Corrupt block " + blockNumToCorrupt + " of file " + srcFile);
LocatedBlocks locations = getBlockLocations(srcFile);
corruptBlock(dfs, srcFile, locations.get(blockNumToCorrupt).getBlock(),
NUM_DATANODES, true);
}
// Validate
DistributedRaidFileSystem raidfs = getRaidFS();
assertTrue(validateFile(raidfs, srcFile, length, crc));
validateLogFile(getRaidFS(), new Path(LOG_DIR));
}
/**
* Create a file, corrupt several blocks in it and ensure that the file can be
* read through DistributedRaidFileSystem by ReedSolomon coding.
*/
@Test
public void testRaidDfsRs() throws Exception {
LOG.info("Test testRaidDfs started.");
code = ErasureCodeType.RS;
long blockSize = 8192L;
int numBlocks = 8;
stripeLength = 3;
mySetup("rs", 3);
int[][] corrupt = {{1, 2, 3}, {1, 4, 7}, {3, 6, 7}};
try {
for (int i = 0; i < corrupt.length; i++) {
Path file = new Path("/user/dhruba/raidtest/file" + i);
corruptBlockAndValidate(
file, new Path("/destraid"), corrupt[i], blockSize, numBlocks);
}
} catch (Exception e) {
LOG.info("testRaidDfs Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
LOG.info("Test testRaidDfs completed.");
}
/**
* Test DistributedRaidFileSystem.readFully()
*/
@Test
public void testReadFully() throws Exception {
code = ErasureCodeType.XOR;
stripeLength = 3;
mySetup("xor", 1);
try {
Path file = new Path("/user/raid/raidtest/file1");
long crc = createTestFile(fileSys, file, 1, 8, 8192L);
FileStatus stat = fileSys.getFileStatus(file);
LOG.info("Created " + file + ", crc=" + crc + ", len=" + stat.getLen());
byte[] filebytes = new byte[(int)stat.getLen()];
// Test that readFully returns the correct CRC when there are no errors.
DistributedRaidFileSystem raidfs = getRaidFS();
FSDataInputStream stm = raidfs.open(file);
stm.readFully(0, filebytes);
assertEquals(crc, bufferCRC(filebytes));
stm.close();
// Generate parity.
RaidNode.doRaid(conf, fileSys.getFileStatus(file),
new Path("/destraid"), code, new RaidNode.Statistics(),
new RaidUtils.DummyProgressable(),
false, 1, 1, stripeLength);
int[] corrupt = {0, 4, 7}; // first, last and middle block
for (int blockIdx : corrupt) {
LOG.info("Corrupt block " + blockIdx + " of file " + file);
LocatedBlocks locations = getBlockLocations(file);
corruptBlock(dfs, file, locations.get(blockIdx).getBlock(),
NUM_DATANODES, true);
}
// Test that readFully returns the correct CRC when there are errors.
stm = raidfs.open(file);
stm.readFully(0, filebytes);
assertEquals(crc, bufferCRC(filebytes));
} finally {
myTearDown();
}
}
/**
* Test that access time and mtime of a source file do not change after
* raiding.
*/
@Test
public void testAccessTime() throws Exception {
LOG.info("Test testAccessTime started.");
code = ErasureCodeType.XOR;
long blockSize = 8192L;
int numBlocks = 8;
int repl = 1;
stripeLength = 3;
mySetup("xor", 1);
Path file = new Path("/user/dhruba/raidtest/file");
createTestFilePartialLastBlock(fileSys, file, repl, numBlocks, blockSize);
FileStatus stat = fileSys.getFileStatus(file);
try {
RaidNode.doRaid(conf, fileSys.getFileStatus(file),
new Path("/destraid"), code, new RaidNode.Statistics(),
new RaidUtils.DummyProgressable(), false, repl, repl, stripeLength);
FileStatus newStat = fileSys.getFileStatus(file);
assertEquals(stat.getModificationTime(), newStat.getModificationTime());
assertEquals(stat.getAccessTime(), newStat.getAccessTime());
} finally {
myTearDown();
}
}
/**
* Create a file, corrupt a block in it and ensure that the file can be
* read through DistributedRaidFileSystem by XOR code.
*/
@Test
public void testRaidDfsXor() throws Exception {
LOG.info("Test testRaidDfs started.");
code = ErasureCodeType.XOR;
long blockSize = 8192L;
int numBlocks = 8;
stripeLength = 3;
mySetup("xor", 1);
int[][] corrupt = {{0}, {4}, {7}}; // first, last and middle block
try {
for (int i = 0; i < corrupt.length; i++) {
Path file = new Path("/user/dhruba/raidtest/" + i);
corruptBlockAndValidate(
file, new Path("/destraid"), corrupt[i], blockSize, numBlocks);
}
} catch (Exception e) {
LOG.info("testRaidDfs Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
LOG.info("Test testRaidDfs completed.");
}
//
// creates a file and populate it with random data. Returns its crc.
//
public static long createTestFile(FileSystem fileSys, Path name, int repl,
int numBlocks, long blocksize)
throws IOException {
CRC32 crc = new CRC32();
Random rand = new Random();
FSDataOutputStream stm = fileSys.create(name, true,
fileSys.getConf().getInt("io.file.buffer.size", 4096),
(short)repl, blocksize);
// fill random data into file
final byte[] b = new byte[(int)blocksize];
for (int i = 0; i < numBlocks; i++) {
rand.nextBytes(b);
stm.write(b);
crc.update(b);
}
stm.close();
return crc.getValue();
}
//
// Creates a file with partially full last block. Populate it with random
// data. Returns its crc.
//
public static long createTestFilePartialLastBlock(
FileSystem fileSys, Path name, int repl, int numBlocks, long blocksize)
throws IOException {
CRC32 crc = new CRC32();
Random rand = new Random();
FSDataOutputStream stm = fileSys.create(name, true,
fileSys.getConf().getInt("io.file.buffer.size", 4096),
(short)repl, blocksize);
// Write whole blocks.
byte[] b = new byte[(int)blocksize];
for (int i = 1; i < numBlocks; i++) {
rand.nextBytes(b);
stm.write(b);
crc.update(b);
}
// Write partial block.
b = new byte[(int)blocksize/2 - 1];
rand.nextBytes(b);
stm.write(b);
crc.update(b);
stm.close();
return crc.getValue();
}
static long bufferCRC(byte[] buf) {
CRC32 crc = new CRC32();
crc.update(buf, 0, buf.length);
return crc.getValue();
}
//
// validates that file matches the crc.
//
public static boolean validateFile(FileSystem fileSys, Path name, long length,
long crc)
throws IOException {
long numRead = 0;
CRC32 newcrc = new CRC32();
FSDataInputStream stm = fileSys.open(name);
final byte[] b = new byte[4192];
int num = 0;
while (num >= 0) {
num = stm.read(b);
if (num < 0) {
break;
}
numRead += num;
newcrc.update(b, 0, num);
}
stm.close();
if (numRead != length) {
LOG.info("Number of bytes read " + numRead +
" does not match file size " + length);
return false;
}
LOG.info(" Newcrc " + newcrc.getValue() + " old crc " + crc);
if (newcrc.getValue() != crc) {
LOG.info("CRC mismatch of file " + name + ": " + newcrc + " vs. " + crc);
return false;
}
return true;
}
//
// validates the contents of raid recovery log file
//
public static void validateLogFile(FileSystem fileSys, Path logDir)
throws IOException {
FileStatus f = fileSys.listStatus(logDir)[0];
FSDataInputStream stm = fileSys.open(f.getPath());
try {
BufferedReader reader = new BufferedReader(new InputStreamReader(stm));
assertEquals("Recovery attempt log", reader.readLine());
assertTrue(Pattern.matches("Source path : /user/dhruba/raidtest/.*",
reader.readLine()));
assertTrue(Pattern.matches("Alternate path : .*/destraid",
reader.readLine()));
assertEquals("Stripe lentgh : 3", reader.readLine());
assertTrue(Pattern.matches("Corrupt offset : \\d*", reader.readLine()));
assertTrue(Pattern.matches("Output from unRaid : " +
"hdfs://.*/tmp/raid/user/dhruba/raidtest/.*recovered",
reader.readLine()));
} finally {
stm.close();
}
LOG.info("Raid HDFS Recovery log verified");
}
//
// Delete/Corrupt specified block of file
//
public static void corruptBlock(MiniDFSCluster dfs, Path file, ExtendedBlock blockNum,
int numDataNodes, boolean delete) throws IOException {
// Now deliberately remove/truncate replicas of blocks
int numDeleted = 0;
int numCorrupted = 0;
for (int i = 0; i < numDataNodes; i++) {
File block = MiniDFSCluster.getBlockFile(i, blockNum);
if (block == null || !block.exists()) {
continue;
}
if (delete) {
block.delete();
LOG.info("Deleted block " + block);
numDeleted++;
} else {
// Corrupt
long seekPos = block.length()/2;
RandomAccessFile raf = new RandomAccessFile(block, "rw");
raf.seek(seekPos);
int data = raf.readInt();
raf.seek(seekPos);
raf.writeInt(data+1);
LOG.info("Corrupted block " + block);
numCorrupted++;
}
}
assertTrue("Nothing corrupted or deleted",
(numCorrupted + numDeleted) > 0);
}
public static void corruptBlock(Path file, ExtendedBlock blockNum,
int numDataNodes, long offset) throws IOException {
// Now deliberately corrupt replicas of the the block.
for (int i = 0; i < numDataNodes; i++) {
File block = MiniDFSCluster.getBlockFile(i, blockNum);
if (block == null || !block.exists()) {
continue;
}
RandomAccessFile raf = new RandomAccessFile(block, "rw");
raf.seek(offset);
int data = raf.readInt();
raf.seek(offset);
raf.writeInt(data+1);
LOG.info("Corrupted block " + block);
}
}
}

View File

@ -1,518 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.blockmanagement;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRaid.CachedFullPathNames;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRaid.CachedLocatedBlocks;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRaid.FileType;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.NameNodeRaidTestUtil;
import org.apache.hadoop.hdfs.server.namenode.NameNodeRaidUtil;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.raid.RaidNode;
import org.junit.Assert;
import org.junit.Test;
public class TestBlockPlacementPolicyRaid {
private Configuration conf = null;
private MiniDFSCluster cluster = null;
private FSNamesystem namesystem = null;
private BlockManager blockManager;
private NetworkTopology networktopology;
private BlockPlacementPolicyRaid policy = null;
private FileSystem fs = null;
String[] rack1 = {"/rack1"};
String[] rack2 = {"/rack2"};
String[] host1 = {"host1.rack1.com"};
String[] host2 = {"host2.rack2.com"};
String xorPrefix = null;
String raidTempPrefix = null;
String raidrsTempPrefix = null;
String raidrsHarTempPrefix = null;
final static Log LOG =
LogFactory.getLog(TestBlockPlacementPolicyRaid.class);
protected void setupCluster() throws IOException {
conf = new Configuration();
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
conf.set("dfs.replication.pending.timeout.sec", "2");
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1L);
conf.set("dfs.block.replicator.classname",
BlockPlacementPolicyRaid.class.getName());
conf.set(RaidNode.STRIPE_LENGTH_KEY, "2");
conf.set(RaidNode.RS_PARITY_LENGTH_KEY, "3");
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 1);
// start the cluster with one datanode first
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).
format(true).racks(rack1).hosts(host1).build();
cluster.waitActive();
namesystem = cluster.getNameNode().getNamesystem();
blockManager = namesystem.getBlockManager();
networktopology = blockManager.getDatanodeManager().getNetworkTopology();
Assert.assertTrue("BlockPlacementPolicy type is not correct.",
blockManager.getBlockPlacementPolicy() instanceof BlockPlacementPolicyRaid);
policy = (BlockPlacementPolicyRaid)blockManager.getBlockPlacementPolicy();
fs = cluster.getFileSystem();
xorPrefix = RaidNode.xorDestinationPath(conf).toUri().getPath();
raidTempPrefix = RaidNode.xorTempPrefix(conf);
raidrsTempPrefix = RaidNode.rsTempPrefix(conf);
raidrsHarTempPrefix = RaidNode.rsHarTempPrefix(conf);
}
/**
* Test that the parity files will be placed at the good locations when we
* create them.
*/
@Test
public void testChooseTargetForRaidFile() throws IOException {
setupCluster();
try {
String src = "/dir/file";
String parity = raidrsTempPrefix + src;
DFSTestUtil.createFile(fs, new Path(src), 4, (short)1, 0L);
DFSTestUtil.waitReplication(fs, new Path(src), (short)1);
refreshPolicy();
setBlockPlacementPolicy(namesystem, policy);
// start 3 more datanodes
String[] racks = {"/rack2", "/rack2", "/rack2",
"/rack2", "/rack2", "/rack2"};
String[] hosts =
{"host2.rack2.com", "host3.rack2.com", "host4.rack2.com",
"host5.rack2.com", "host6.rack2.com", "host7.rack2.com"};
cluster.startDataNodes(conf, 6, true, null, racks, hosts, null);
int numBlocks = 6;
DFSTestUtil.createFile(fs, new Path(parity), numBlocks, (short)2, 0L);
DFSTestUtil.waitReplication(fs, new Path(parity), (short)2);
FileStatus srcStat = fs.getFileStatus(new Path(src));
BlockLocation[] srcLoc =
fs.getFileBlockLocations(srcStat, 0, srcStat.getLen());
FileStatus parityStat = fs.getFileStatus(new Path(parity));
BlockLocation[] parityLoc =
fs.getFileBlockLocations(parityStat, 0, parityStat.getLen());
int parityLen = RaidNode.rsParityLength(conf);
for (int i = 0; i < numBlocks / parityLen; i++) {
Set<String> locations = new HashSet<String>();
for (int j = 0; j < srcLoc.length; j++) {
String [] names = srcLoc[j].getNames();
for (int k = 0; k < names.length; k++) {
LOG.info("Source block location: " + names[k]);
locations.add(names[k]);
}
}
for (int j = 0 ; j < parityLen; j++) {
String[] names = parityLoc[j + i * parityLen].getNames();
for (int k = 0; k < names.length; k++) {
LOG.info("Parity block location: " + names[k]);
Assert.assertTrue(locations.add(names[k]));
}
}
}
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
/**
* Test that the har parity files will be placed at the good locations when we
* create them.
*/
@Test
public void testChooseTargetForHarRaidFile() throws IOException {
setupCluster();
try {
String[] racks = {"/rack2", "/rack2", "/rack2",
"/rack2", "/rack2", "/rack2"};
String[] hosts =
{"host2.rack2.com", "host3.rack2.com", "host4.rack2.com",
"host5.rack2.com", "host6.rack2.com", "host7.rack2.com"};
cluster.startDataNodes(conf, 6, true, null, racks, hosts, null);
String harParity = raidrsHarTempPrefix + "/dir/file";
int numBlocks = 11;
DFSTestUtil.createFile(fs, new Path(harParity), numBlocks, (short)1, 0L);
DFSTestUtil.waitReplication(fs, new Path(harParity), (short)1);
FileStatus stat = fs.getFileStatus(new Path(harParity));
BlockLocation[] loc = fs.getFileBlockLocations(stat, 0, stat.getLen());
int rsParityLength = RaidNode.rsParityLength(conf);
for (int i = 0; i < numBlocks - rsParityLength; i++) {
Set<String> locations = new HashSet<String>();
for (int j = 0; j < rsParityLength; j++) {
for (int k = 0; k < loc[i + j].getNames().length; k++) {
// verify that every adjacent 4 blocks are on differnt nodes
String name = loc[i + j].getNames()[k];
LOG.info("Har Raid block location: " + name);
Assert.assertTrue(locations.add(name));
}
}
}
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
/**
* Test BlockPlacementPolicyRaid.CachedLocatedBlocks
* Verify that the results obtained from cache is the same as
* the results obtained directly
*/
@Test
public void testCachedBlocks() throws IOException {
setupCluster();
try {
String file1 = "/dir/file1";
String file2 = "/dir/file2";
DFSTestUtil.createFile(fs, new Path(file1), 3, (short)1, 0L);
DFSTestUtil.createFile(fs, new Path(file2), 4, (short)1, 0L);
// test blocks cache
CachedLocatedBlocks cachedBlocks = new CachedLocatedBlocks(namesystem);
verifyCachedBlocksResult(cachedBlocks, namesystem, file1);
verifyCachedBlocksResult(cachedBlocks, namesystem, file1);
verifyCachedBlocksResult(cachedBlocks, namesystem, file2);
verifyCachedBlocksResult(cachedBlocks, namesystem, file2);
try {
Thread.sleep(1200L);
} catch (InterruptedException e) {
}
verifyCachedBlocksResult(cachedBlocks, namesystem, file2);
verifyCachedBlocksResult(cachedBlocks, namesystem, file1);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
/**
* Test BlockPlacementPolicyRaid.CachedFullPathNames
* Verify that the results obtained from cache is the same as
* the results obtained directly
*/
@Test
public void testCachedPathNames() throws IOException {
setupCluster();
try {
String file1 = "/dir/file1";
String file2 = "/dir/file2";
DFSTestUtil.createFile(fs, new Path(file1), 3, (short)1, 0L);
DFSTestUtil.createFile(fs, new Path(file2), 4, (short)1, 0L);
// test full path cache
CachedFullPathNames cachedFullPathNames =
new CachedFullPathNames(namesystem);
final BlockCollection[] bcs = NameNodeRaidTestUtil.getBlockCollections(
namesystem, file1, file2);
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[0]);
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[0]);
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[1]);
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[1]);
try {
Thread.sleep(1200L);
} catch (InterruptedException e) {
}
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[1]);
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[0]);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
/**
* Test the result of getCompanionBlocks() on the unraided files
*/
@Test
public void testGetCompanionBLocks() throws IOException {
setupCluster();
try {
String file1 = "/dir/file1";
String file2 = "/raid/dir/file2";
String file3 = "/raidrs/dir/file3";
// Set the policy to default policy to place the block in the default way
setBlockPlacementPolicy(namesystem, new BlockPlacementPolicyDefault(
conf, namesystem, networktopology));
DFSTestUtil.createFile(fs, new Path(file1), 3, (short)1, 0L);
DFSTestUtil.createFile(fs, new Path(file2), 4, (short)1, 0L);
DFSTestUtil.createFile(fs, new Path(file3), 8, (short)1, 0L);
Collection<LocatedBlock> companionBlocks;
companionBlocks = getCompanionBlocks(
namesystem, policy, getBlocks(namesystem, file1).get(0).getBlock());
Assert.assertTrue(companionBlocks == null || companionBlocks.size() == 0);
companionBlocks = getCompanionBlocks(
namesystem, policy, getBlocks(namesystem, file1).get(2).getBlock());
Assert.assertTrue(companionBlocks == null || companionBlocks.size() == 0);
companionBlocks = getCompanionBlocks(
namesystem, policy, getBlocks(namesystem, file2).get(0).getBlock());
Assert.assertEquals(1, companionBlocks.size());
companionBlocks = getCompanionBlocks(
namesystem, policy, getBlocks(namesystem, file2).get(3).getBlock());
Assert.assertEquals(1, companionBlocks.size());
int rsParityLength = RaidNode.rsParityLength(conf);
companionBlocks = getCompanionBlocks(
namesystem, policy, getBlocks(namesystem, file3).get(0).getBlock());
Assert.assertEquals(rsParityLength, companionBlocks.size());
companionBlocks = getCompanionBlocks(
namesystem, policy, getBlocks(namesystem, file3).get(4).getBlock());
Assert.assertEquals(rsParityLength, companionBlocks.size());
companionBlocks = getCompanionBlocks(
namesystem, policy, getBlocks(namesystem, file3).get(6).getBlock());
Assert.assertEquals(2, companionBlocks.size());
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
static void setBlockPlacementPolicy(
FSNamesystem namesystem, BlockPlacementPolicy policy) {
namesystem.writeLock();
try {
namesystem.getBlockManager().setBlockPlacementPolicy(policy);
} finally {
namesystem.writeUnlock();
}
}
/**
* Test BlockPlacementPolicyRaid actually deletes the correct replica.
* Start 2 datanodes and create 1 source file and its parity file.
* 1) Start host1, create the parity file with replication 1
* 2) Start host2, create the source file with replication 2
* 3) Set repliation of source file to 1
* Verify that the policy should delete the block with more companion blocks.
*/
@Test
public void testDeleteReplica() throws IOException {
setupCluster();
try {
// Set the policy to default policy to place the block in the default way
setBlockPlacementPolicy(namesystem, new BlockPlacementPolicyDefault(
conf, namesystem, networktopology));
DatanodeDescriptor datanode1 = blockManager.getDatanodeManager(
).getDatanodeCyclicIteration("").iterator().next().getValue();
String source = "/dir/file";
String parity = xorPrefix + source;
final Path parityPath = new Path(parity);
DFSTestUtil.createFile(fs, parityPath, 3, (short)1, 0L);
DFSTestUtil.waitReplication(fs, parityPath, (short)1);
// start one more datanode
cluster.startDataNodes(conf, 1, true, null, rack2, host2, null);
DatanodeDescriptor datanode2 = null;
for(Map.Entry<String, DatanodeDescriptor> e : blockManager.getDatanodeManager(
).getDatanodeCyclicIteration("")) {
final DatanodeDescriptor d = e.getValue();
if (!d.getName().equals(datanode1.getName())) {
datanode2 = d;
}
}
Assert.assertTrue(datanode2 != null);
cluster.waitActive();
final Path sourcePath = new Path(source);
DFSTestUtil.createFile(fs, sourcePath, 5, (short)2, 0L);
DFSTestUtil.waitReplication(fs, sourcePath, (short)2);
refreshPolicy();
Assert.assertEquals(parity,
policy.getParityFile(source));
Assert.assertEquals(source,
policy.getSourceFile(parity, xorPrefix));
List<LocatedBlock> sourceBlocks = getBlocks(namesystem, source);
List<LocatedBlock> parityBlocks = getBlocks(namesystem, parity);
Assert.assertEquals(5, sourceBlocks.size());
Assert.assertEquals(3, parityBlocks.size());
// verify the result of getCompanionBlocks()
Collection<LocatedBlock> companionBlocks;
companionBlocks = getCompanionBlocks(
namesystem, policy, sourceBlocks.get(0).getBlock());
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
new int[]{0, 1}, new int[]{0});
companionBlocks = getCompanionBlocks(
namesystem, policy, sourceBlocks.get(1).getBlock());
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
new int[]{0, 1}, new int[]{0});
companionBlocks = getCompanionBlocks(
namesystem, policy, sourceBlocks.get(2).getBlock());
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
new int[]{2, 3}, new int[]{1});
companionBlocks = getCompanionBlocks(
namesystem, policy, sourceBlocks.get(3).getBlock());
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
new int[]{2, 3}, new int[]{1});
companionBlocks = getCompanionBlocks(
namesystem, policy, sourceBlocks.get(4).getBlock());
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
new int[]{4}, new int[]{2});
companionBlocks = getCompanionBlocks(
namesystem, policy, parityBlocks.get(0).getBlock());
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
new int[]{0, 1}, new int[]{0});
companionBlocks = getCompanionBlocks(
namesystem, policy, parityBlocks.get(1).getBlock());
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
new int[]{2, 3}, new int[]{1});
companionBlocks = getCompanionBlocks(
namesystem, policy, parityBlocks.get(2).getBlock());
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
new int[]{4}, new int[]{2});
// Set the policy back to raid policy. We have to create a new object
// here to clear the block location cache
refreshPolicy();
setBlockPlacementPolicy(namesystem, policy);
// verify policy deletes the correct blocks. companion blocks should be
// evenly distributed.
fs.setReplication(sourcePath, (short)1);
DFSTestUtil.waitReplication(fs, sourcePath, (short)1);
Map<String, Integer> counters = new HashMap<String, Integer>();
refreshPolicy();
for (int i = 0; i < parityBlocks.size(); i++) {
companionBlocks = getCompanionBlocks(
namesystem, policy, parityBlocks.get(i).getBlock());
counters = BlockPlacementPolicyRaid.countCompanionBlocks(
companionBlocks, false);
Assert.assertTrue(counters.get(datanode1.getName()) >= 1 &&
counters.get(datanode1.getName()) <= 2);
Assert.assertTrue(counters.get(datanode1.getName()) +
counters.get(datanode2.getName()) ==
companionBlocks.size());
counters = BlockPlacementPolicyRaid.countCompanionBlocks(
companionBlocks, true);
Assert.assertTrue(counters.get(datanode1.getParent().getName()) >= 1 &&
counters.get(datanode1.getParent().getName()) <= 2);
Assert.assertTrue(counters.get(datanode1.getParent().getName()) +
counters.get(datanode2.getParent().getName()) ==
companionBlocks.size());
}
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
// create a new BlockPlacementPolicyRaid to clear the cache
private void refreshPolicy() {
policy = new BlockPlacementPolicyRaid();
policy.initialize(conf, namesystem, networktopology);
}
private void verifyCompanionBlocks(Collection<LocatedBlock> companionBlocks,
List<LocatedBlock> sourceBlocks, List<LocatedBlock> parityBlocks,
int[] sourceBlockIndexes, int[] parityBlockIndexes) {
Set<ExtendedBlock> blockSet = new HashSet<ExtendedBlock>();
for (LocatedBlock b : companionBlocks) {
blockSet.add(b.getBlock());
}
Assert.assertEquals(sourceBlockIndexes.length + parityBlockIndexes.length,
blockSet.size());
for (int index : sourceBlockIndexes) {
Assert.assertTrue(blockSet.contains(sourceBlocks.get(index).getBlock()));
}
for (int index : parityBlockIndexes) {
Assert.assertTrue(blockSet.contains(parityBlocks.get(index).getBlock()));
}
}
private void verifyCachedFullPathNameResult(
CachedFullPathNames cachedFullPathNames, BlockCollection bc)
throws IOException {
String res1 = bc.getName();
String res2 = cachedFullPathNames.get(bc);
LOG.info("Actual path name: " + res1);
LOG.info("Cached path name: " + res2);
Assert.assertEquals(cachedFullPathNames.get(bc),
bc.getName());
}
private void verifyCachedBlocksResult(CachedLocatedBlocks cachedBlocks,
FSNamesystem namesystem, String file) throws IOException{
long len = NameNodeRaidUtil.getFileInfo(namesystem, file, true).getLen();
List<LocatedBlock> res1 = NameNodeRaidUtil.getBlockLocations(namesystem,
file, 0L, len, false, false).getLocatedBlocks();
List<LocatedBlock> res2 = cachedBlocks.get(file);
for (int i = 0; i < res1.size(); i++) {
LOG.info("Actual block: " + res1.get(i).getBlock());
LOG.info("Cached block: " + res2.get(i).getBlock());
Assert.assertEquals(res1.get(i).getBlock(), res2.get(i).getBlock());
}
}
private Collection<LocatedBlock> getCompanionBlocks(
FSNamesystem namesystem, BlockPlacementPolicyRaid policy,
ExtendedBlock block) throws IOException {
INodeFile inode = (INodeFile)blockManager.blocksMap.getBlockCollection(block
.getLocalBlock());
FileType type = policy.getFileType(inode.getFullPathName());
return policy.getCompanionBlocks(inode.getFullPathName(), type,
block.getLocalBlock());
}
private List<LocatedBlock> getBlocks(FSNamesystem namesystem, String file)
throws IOException {
long len = NameNodeRaidUtil.getFileInfo(namesystem, file, true).getLen();
return NameNodeRaidUtil.getBlockLocations(namesystem,
file, 0, len, false, false).getLocatedBlocks();
}
}

View File

@ -1,38 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import org.apache.hadoop.fs.UnresolvedLinkException;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
public class NameNodeRaidTestUtil {
public static BlockCollection[] getBlockCollections(final FSNamesystem namesystem,
final String... files) throws UnresolvedLinkException {
final BlockCollection[] inodes = new BlockCollection[files.length];
final FSDirectory dir = namesystem.dir;
dir.readLock();
try {
for(int i = 0; i < files.length; i++) {
inodes[i] = (BlockCollection)dir.rootDir.getNode(files[i], true);
}
return inodes;
} finally {
dir.readUnlock();
}
}
}

View File

@ -1,671 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URI;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.zip.CRC32;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.RaidDFSUtil;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.util.JarFinder;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.junit.Test;
public class TestBlockFixer {
final static Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.TestBlockFixer");
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"target/test-data")).getAbsolutePath();
final static String CONFIG_FILE = new File(TEST_DIR,
"test-raid.xml").getAbsolutePath();
public static final String DistBlockFixer_JAR =
JarFinder.getJar(DistBlockFixer.class);
final static long RELOAD_INTERVAL = 1000;
final static int NUM_DATANODES = 3;
Configuration conf;
String namenode = null;
MiniDFSCluster dfs = null;
String hftp = null;
MiniMRCluster mr = null;
FileSystem fileSys = null;
RaidNode cnode = null;
String jobTrackerName = null;
Random rand = new Random();
/**
* Tests isXorParityFile and isRsParityFile
*/
@Test
public void testIsParityFile() throws IOException {
Configuration testConf = new Configuration();
testConf.set("hdfs.raid.locations", "/raid");
testConf.set("hdfs.raidrs.locations", "/raidrs");
BlockFixer.BlockFixerHelper helper =
new BlockFixer.BlockFixerHelper(testConf);
assertFalse("incorrectly identified rs parity file as xor parity file",
helper.isXorParityFile(new Path("/raidrs/test/test")));
assertTrue("could not identify rs parity file",
helper.isRsParityFile(new Path("/raidrs/test/test")));
assertTrue("could not identify xor parity file",
helper.isXorParityFile(new Path("/raid/test/test")));
assertFalse("incorrectly identified xor parity file as rs parity file",
helper.isRsParityFile(new Path("/raid/test/test")));
}
/**
* Test the filtering of trash files from the list of corrupt files.
*/
@Test
public void testTrashFilter() {
List<Path> files = new LinkedList<Path>();
// Paths that do not match the trash pattern.
Path p1 = new Path("/user/raid/raidtest/f1");
Path p2 = new Path("/user/.Trash/");
// Paths that match the trash pattern.
Path p3 = new Path("/user/raid/.Trash/raidtest/f1");
Path p4 = new Path("/user/raid/.Trash/");
files.add(p1);
files.add(p3);
files.add(p4);
files.add(p2);
Configuration conf = new Configuration();
RaidUtils.filterTrash(conf, files);
assertEquals("expected 2 non-trash files but got " + files.size(),
2, files.size());
for (Path p: files) {
assertTrue("wrong file returned by filterTrash",
p == p1 || p == p2);
}
}
@Test
public void testBlockFixLocal() throws Exception {
implBlockFix(true);
}
/**
* Create a file with three stripes, corrupt a block each in two stripes,
* and wait for the the file to be fixed.
*/
protected void implBlockFix(boolean local) throws Exception {
LOG.info("Test testBlockFix started.");
long blockSize = 8192L;
int stripeLength = 3;
mySetup(stripeLength, -1); // never har
Path file1 = new Path("/user/dhruba/raidtest/file1");
Path destPath = new Path("/destraid/user/dhruba/raidtest");
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
1, 7, blockSize);
long file1Len = fileSys.getFileStatus(file1).getLen();
LOG.info("Test testBlockFix created test files");
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
localConf.setInt("raid.blockfix.interval", 1000);
if (local) {
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.LocalBlockFixer");
} else {
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.DistBlockFixer");
}
localConf.setLong("raid.blockfix.filespertask", 2L);
try {
cnode = RaidNode.createRaidNode(null, localConf);
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
cnode.stop(); cnode.join();
FileStatus srcStat = fileSys.getFileStatus(file1);
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
dfs, file1.toUri().getPath(), 0, srcStat.getLen());
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals("no corrupt files expected", 0, corruptFiles.length);
assertEquals("filesFixed() should return 0 before fixing files",
0, cnode.blockFixer.filesFixed());
// Corrupt blocks in two different stripes. We can fix them.
int[] corruptBlockIdxs = new int[]{0, 4, 6};
for (int idx: corruptBlockIdxs)
corruptBlock(locs.get(idx).getBlock());
reportCorruptBlocks(dfs, file1, corruptBlockIdxs, blockSize);
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals("file not corrupted", 1, corruptFiles.length);
assertEquals("wrong file corrupted",
corruptFiles[0], file1.toUri().getPath());
assertEquals("wrong number of corrupt blocks", 3,
RaidDFSUtil.corruptBlocksInFile(dfs, file1.toUri().getPath(), 0,
srcStat.getLen()).size());
cnode = RaidNode.createRaidNode(null, localConf);
long start = Time.now();
while (cnode.blockFixer.filesFixed() < 1 &&
Time.now() - start < 120000) {
LOG.info("Test testBlockFix waiting for files to be fixed.");
Thread.sleep(1000);
}
assertEquals("file not fixed", 1, cnode.blockFixer.filesFixed());
dfs = getDFS(conf, dfs);
assertTrue("file not fixed",
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
} catch (Exception e) {
LOG.info("Test testBlockFix Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
LOG.info("Test testBlockFix completed.");
}
/**
* Tests integrity of generated block.
* Create a file and delete a block entirely. Wait for the block to be
* regenerated. Now stop RaidNode and corrupt the generated block.
* Test that corruption in the generated block can be detected by clients.
*/
protected void generatedBlockTestCommon(String testName, int blockToCorrupt,
boolean local) throws Exception {
LOG.info("Test " + testName + " started.");
long blockSize = 8192L;
int stripeLength = 3;
mySetup(stripeLength, -1); // never har
Path file1 = new Path("/user/dhruba/raidtest/file1");
Path destPath = new Path("/destraid/user/dhruba/raidtest");
long crc1 = TestRaidDfs.createTestFile(fileSys, file1, 1, 7, blockSize);
long file1Len = fileSys.getFileStatus(file1).getLen();
LOG.info("Test " + testName + " created test files");
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
localConf.setInt("raid.blockfix.interval", 1000);
if (local) {
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.LocalBlockFixer");
} else {
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.DistBlockFixer");
}
localConf.setLong("raid.blockfix.filespertask", 2L);
try {
cnode = RaidNode.createRaidNode(null, localConf);
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
cnode.stop(); cnode.join();
FileStatus srcStat = fileSys.getFileStatus(file1);
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
dfs, file1.toUri().getPath(), 0, srcStat.getLen());
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals("no corrupt files expected", 0, corruptFiles.length);
assertEquals("filesFixed() should return 0 before fixing files",
0, cnode.blockFixer.filesFixed());
corruptBlock(locs.get(0).getBlock());
reportCorruptBlocks(dfs, file1, new int[]{0}, blockSize);
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals("file not corrupted",
1, corruptFiles.length);
assertEquals("wrong file corrupted",
corruptFiles[0], file1.toUri().getPath());
cnode = RaidNode.createRaidNode(null, localConf);
long start = Time.now();
while (cnode.blockFixer.filesFixed() < 1 &&
Time.now() - start < 120000) {
LOG.info("Test " + testName + " waiting for files to be fixed.");
Thread.sleep(1000);
}
assertEquals("file not fixed",
1, cnode.blockFixer.filesFixed());
// Stop RaidNode
cnode.stop(); cnode.join(); cnode = null;
// The block has successfully been reconstructed.
dfs = getDFS(conf, dfs);
assertTrue("file not fixed",
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
// Now corrupt the generated block.
locs = RaidDFSUtil.getBlockLocations(
dfs, file1.toUri().getPath(), 0, srcStat.getLen());
corruptBlock(locs.get(0).getBlock());
reportCorruptBlocks(dfs, file1, new int[]{0}, blockSize);
try {
Thread.sleep(5*1000);
} catch (InterruptedException ignore) {
}
try {
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1);
fail("Expected exception not thrown");
} catch (org.apache.hadoop.fs.ChecksumException ce) {
} catch (org.apache.hadoop.hdfs.BlockMissingException bme) {
}
} catch (Exception e) {
LOG.info("Test " + testName + " Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
LOG.info("Test " + testName + " completed.");
}
/**
* Tests integrity of generated block.
* Create a file and delete a block entirely. Wait for the block to be
* regenerated. Now stop RaidNode and corrupt the generated block.
* Test that corruption in the generated block can be detected by clients.
*/
@Test
public void testGeneratedBlockLocal() throws Exception {
generatedBlockTestCommon("testGeneratedBlock", 3, true);
}
/**
* Tests integrity of generated last block.
* Create a file and delete a block entirely. Wait for the block to be
* regenerated. Now stop RaidNode and corrupt the generated block.
* Test that corruption in the generated block can be detected by clients.
*/
@Test
public void testGeneratedLastBlockLocal() throws Exception {
generatedBlockTestCommon("testGeneratedLastBlock", 6, true);
}
@Test
public void testParityBlockFixLocal() throws Exception {
implParityBlockFix("testParityBlockFixLocal", true);
}
/**
* Corrupt a parity file and wait for it to get fixed.
*/
protected void implParityBlockFix(String testName, boolean local)
throws Exception {
LOG.info("Test " + testName + " started.");
long blockSize = 8192L;
int stripeLength = 3;
mySetup(stripeLength, -1); // never har
Path file1 = new Path("/user/dhruba/raidtest/file1");
Path destPath = new Path("/destraid/user/dhruba/raidtest");
Path parityFile = new Path("/destraid/user/dhruba/raidtest/file1");
TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
1, 7, blockSize);
LOG.info("Test " + testName + " created test files");
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
localConf.setInt("raid.blockfix.interval", 1000);
if (local) {
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.LocalBlockFixer");
} else {
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.DistBlockFixer");
}
localConf.setLong("raid.blockfix.filespertask", 2L);
try {
cnode = RaidNode.createRaidNode(null, localConf);
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
cnode.stop(); cnode.join();
long parityCRC = getCRC(fileSys, parityFile);
FileStatus parityStat = fileSys.getFileStatus(parityFile);
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
dfs, parityFile.toUri().getPath(), 0, parityStat.getLen());
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals("no corrupt files expected", 0, corruptFiles.length);
assertEquals("filesFixed() should return 0 before fixing files",
0, cnode.blockFixer.filesFixed());
// Corrupt parity blocks for different stripes.
int[] corruptBlockIdxs = new int[]{0, 1, 2};
for (int idx: corruptBlockIdxs)
corruptBlock(locs.get(idx).getBlock());
reportCorruptBlocks(dfs, parityFile, corruptBlockIdxs, blockSize);
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals("file not corrupted",
1, corruptFiles.length);
assertEquals("wrong file corrupted",
corruptFiles[0], parityFile.toUri().getPath());
cnode = RaidNode.createRaidNode(null, localConf);
long start = Time.now();
while (cnode.blockFixer.filesFixed() < 1 &&
Time.now() - start < 120000) {
LOG.info("Test " + testName + " waiting for files to be fixed.");
Thread.sleep(1000);
}
assertEquals("file not fixed",
1, cnode.blockFixer.filesFixed());
long checkCRC = getCRC(fileSys, parityFile);
assertEquals("file not fixed",
parityCRC, checkCRC);
} catch (Exception e) {
LOG.info("Test " + testName + " Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
LOG.info("Test " + testName + " completed.");
}
@Test
public void testParityHarBlockFixLocal() throws Exception {
implParityHarBlockFix("testParityHarBlockFixLocal", true);
}
protected void implParityHarBlockFix(String testName, boolean local)
throws Exception {
LOG.info("Test " + testName + " started.");
long blockSize = 8192L;
int stripeLength = 3;
mySetup(stripeLength, 0); // Time before har = 0 days.
Path file1 = new Path("/user/dhruba/raidtest/file1");
// Parity file will have 7 blocks.
TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
1, 20, blockSize);
LOG.info("Test " + testName + " created test files");
// create an instance of the RaidNode
// HAR block size = 2 * src block size = 2 * parity block size.
Configuration localConf = new Configuration(conf);
localConf.setLong("har.block.size", blockSize * 2);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
localConf.setInt("raid.blockfix.interval", 1000);
if (local) {
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.LocalBlockFixer");
} else {
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.DistBlockFixer");
}
localConf.setLong("raid.blockfix.filespertask", 2L);
try {
cnode = RaidNode.createRaidNode(null, localConf);
Path harDirectory =
new Path("/destraid/user/dhruba/raidtest/raidtest" +
RaidNode.HAR_SUFFIX);
long start = Time.now();
while (Time.now() - start < 1000 * 120) {
if (fileSys.exists(harDirectory)) {
break;
}
LOG.info("Test " + testName + " waiting for har");
Thread.sleep(1000);
}
Path partFile = new Path(harDirectory, "part-0");
long partCRC = getCRC(fileSys, partFile);
FileStatus partStat = fileSys.getFileStatus(partFile);
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
dfs, partFile.toUri().getPath(), 0, partStat.getLen());
// 7 parity blocks => 4 har blocks.
assertEquals("wrong number of har blocks",
4, locs.getLocatedBlocks().size());
cnode.stop(); cnode.join();
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals("no corrupt files expected", 0, corruptFiles.length);
assertEquals("filesFixed() should return 0 before fixing files",
0, cnode.blockFixer.filesFixed());
// Corrupt parity blocks for different stripes.
int[] corruptBlockIdxs = new int[]{0, 3};
for (int idx: corruptBlockIdxs)
corruptBlock(locs.get(idx).getBlock());
reportCorruptBlocks(dfs, partFile, corruptBlockIdxs,
partStat.getBlockSize());
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals("file not corrupted", 1, corruptFiles.length);
assertEquals("wrong file corrupted",
corruptFiles[0], partFile.toUri().getPath());
cnode = RaidNode.createRaidNode(null, localConf);
start = Time.now();
while (cnode.blockFixer.filesFixed() < 1 &&
Time.now() - start < 120000) {
LOG.info("Test " + testName + " waiting for files to be fixed.");
Thread.sleep(1000);
}
assertEquals("file not fixed",
1, cnode.blockFixer.filesFixed());
long checkCRC = getCRC(fileSys, partFile);
assertEquals("file not fixed",
partCRC, checkCRC);
} catch (Exception e) {
LOG.info("Test " + testName + " Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
LOG.info("Test " + testName + " completed.");
}
protected static DistributedFileSystem getDFS(
Configuration conf, FileSystem dfs) throws IOException {
Configuration clientConf = new Configuration(conf);
clientConf.set("fs.hdfs.impl",
"org.apache.hadoop.hdfs.DistributedFileSystem");
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
URI dfsUri = dfs.getUri();
FileSystem.closeAll();
return (DistributedFileSystem) FileSystem.get(dfsUri, clientConf);
}
protected void mySetup(int stripeLength, int timeBeforeHar) throws Exception {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
conf.set("raid.config.file", CONFIG_FILE);
conf.setBoolean("raid.config.reload", true);
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// make all deletions not go through Trash
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
// do not use map-reduce cluster for Raiding
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
conf.set("raid.server.address", "localhost:0");
conf.setInt("hdfs.raid.stripeLength", stripeLength);
conf.set("hdfs.raid.locations", "/destraid");
conf.setBoolean("dfs.permissions", false);
conf.set("mapreduce.framework.name", "yarn");
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
dfs.waitActive();
fileSys = dfs.getFileSystem();
namenode = fileSys.getUri().toString();
FileSystem.setDefaultUri(conf, namenode);
mr = new MiniMRCluster(4, namenode, 3);
JobConf jobConf = mr.createJobConf();
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
FileSystem.setDefaultUri(conf, namenode);
conf.set("mapred.job.tracker", jobTrackerName);
conf.set("mapreduce.framework.name", "yarn");
String rmAdress = jobConf.get("yarn.resourcemanager.address");
if (rmAdress != null) {
conf.set("yarn.resourcemanager.address", rmAdress);
}
String schedulerAdress =
jobConf.get("yarn.resourcemanager.scheduler.address");
if (schedulerAdress != null) {
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
}
String jobHistoryAddress =
jobConf.get("mapreduce.jobhistory.address");
if (jobHistoryAddress != null) {
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
}
conf.set(JobContext.JAR, TestBlockFixer.DistBlockFixer_JAR);
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
fileWriter.write("<?xml version=\"1.0\"?>\n");
String str = "<configuration> " +
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
"<policy name = \"RaidTest1\"> " +
"<erasureCode>xor</erasureCode> " +
"<destPath> /destraid</destPath> " +
"<property> " +
"<name>targetReplication</name> " +
"<value>1</value> " +
"<description>after RAIDing, decrease the replication factor of a file to this value." +
"</description> " +
"</property> " +
"<property> " +
"<name>metaReplication</name> " +
"<value>1</value> " +
"<description> replication factor of parity file" +
"</description> " +
"</property> " +
"<property> " +
"<name>modTimePeriod</name> " +
"<value>2000</value> " +
"<description> time (milliseconds) after a file is modified to make it " +
"a candidate for RAIDing " +
"</description> " +
"</property> ";
if (timeBeforeHar >= 0) {
str +=
"<property> " +
"<name>time_before_har</name> " +
"<value>" + timeBeforeHar + "</value> " +
"<description> amount of time waited before har'ing parity files" +
"</description> " +
"</property> ";
}
str +=
"</policy>" +
"</srcPath>" +
"</configuration>";
fileWriter.write(str);
fileWriter.close();
}
protected void myTearDown() throws Exception {
if (cnode != null) { cnode.stop(); cnode.join(); }
if (mr != null) { mr.shutdown(); }
if (dfs != null) { dfs.shutdown(); }
}
public static long getCRC(FileSystem fs, Path p) throws IOException {
CRC32 crc = new CRC32();
FSDataInputStream stm = fs.open(p);
int b;
while ((b = stm.read())>=0) {
crc.update(b);
}
stm.close();
return crc.getValue();
}
void corruptBlock(ExtendedBlock block) throws IOException {
assertTrue("Could not corrupt block",
dfs.corruptBlockOnDataNodes(block) > 0);
}
static void reportCorruptBlocks(FileSystem fs, Path file, int[] idxs,
long blockSize) throws IOException {
FSDataInputStream in = fs.open(file);
for (int idx: idxs) {
long offset = idx * blockSize;
LOG.info("Reporting corrupt block " + file + ":" + offset);
in.seek(offset);
try {
in.readFully(new byte[(int)blockSize]);
fail("Expected exception not thrown for " + file + ":" + offset);
} catch (org.apache.hadoop.fs.ChecksumException e) {
} catch (org.apache.hadoop.hdfs.BlockMissingException bme) {
}
}
}
}

View File

@ -1,26 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import org.junit.Test;
public class TestBlockFixerBlockFixDist extends TestBlockFixer {
@Test
public void testBlockFixDist() throws Exception {
implBlockFix(false);
}
}

View File

@ -1,245 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.RaidDFSUtil;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.junit.Test;
public class TestBlockFixerDistConcurrency extends TestBlockFixer {
/**
* tests that we can have 2 concurrent jobs fixing files
* (dist block fixer)
*/
@Test
public void testConcurrentJobs() throws Exception {
LOG.info("Test testConcurrentJobs started.");
long blockSize = 8192L;
int stripeLength = 3;
mySetup(stripeLength, -1); // never har
Path file1 = new Path("/user/dhruba/raidtest/file1");
Path file2 = new Path("/user/dhruba/raidtest/file2");
Path destPath = new Path("/destraid/user/dhruba/raidtest");
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
1, 20, blockSize);
long crc2 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file2,
1, 20, blockSize);
long file1Len = fileSys.getFileStatus(file1).getLen();
long file2Len = fileSys.getFileStatus(file2).getLen();
LOG.info("Test testConcurrentJobs created test files");
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
localConf.setInt("raid.blockfix.interval", 1000);
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.DistBlockFixer");
localConf.setLong("raid.blockfix.filespertask", 2L);
try {
cnode = RaidNode.createRaidNode(null, localConf);
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
TestRaidDfs.waitForFileRaided(LOG, fileSys, file2, destPath);
cnode.stop(); cnode.join();
FileStatus file1Stat = fileSys.getFileStatus(file1);
FileStatus file2Stat = fileSys.getFileStatus(file2);
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
LocatedBlocks file1Loc =
RaidDFSUtil.getBlockLocations(dfs, file1.toUri().getPath(),
0, file1Stat.getLen());
LocatedBlocks file2Loc =
RaidDFSUtil.getBlockLocations(dfs, file2.toUri().getPath(),
0, file2Stat.getLen());
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals("no corrupt files expected", 0, corruptFiles.length);
assertEquals("filesFixed() should return 0 before fixing files",
0, cnode.blockFixer.filesFixed());
// corrupt file1
int[] corruptBlockIdxs = new int[]{0, 4, 6};
for (int idx: corruptBlockIdxs)
corruptBlock(file1Loc.get(idx).getBlock());
reportCorruptBlocks(dfs, file1, corruptBlockIdxs, blockSize);
cnode = RaidNode.createRaidNode(null, localConf);
DistBlockFixer blockFixer = (DistBlockFixer) cnode.blockFixer;
long start = Time.now();
while (blockFixer.jobsRunning() < 1 &&
Time.now() - start < 240000) {
LOG.info("Test testBlockFix waiting for fixing job 1 to start");
Thread.sleep(10);
}
assertEquals("job 1 not running", 1, blockFixer.jobsRunning());
// corrupt file2
for (int idx: corruptBlockIdxs)
corruptBlock(file2Loc.get(idx).getBlock());
reportCorruptBlocks(dfs, file2, corruptBlockIdxs, blockSize);
while (blockFixer.jobsRunning() < 2 &&
Time.now() - start < 240000) {
LOG.info("Test testBlockFix waiting for fixing job 2 to start");
Thread.sleep(10);
}
assertEquals("2 jobs not running", 2, blockFixer.jobsRunning());
while (blockFixer.filesFixed() < 2 &&
Time.now() - start < 240000) {
LOG.info("Test testBlockFix waiting for files to be fixed.");
Thread.sleep(10);
}
assertEquals("files not fixed", 2, blockFixer.filesFixed());
dfs = getDFS(conf, dfs);
try {
Thread.sleep(5*1000);
} catch (InterruptedException ignore) {
}
assertTrue("file not fixed",
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
assertTrue("file not fixed",
TestRaidDfs.validateFile(dfs, file2, file2Len, crc2));
} catch (Exception e) {
LOG.info("Test testConcurrentJobs exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
}
/**
* tests that the distributed block fixer obeys
* the limit on how many files to fix simultaneously
*/
@Test
public void testMaxPendingFiles() throws Exception {
LOG.info("Test testMaxPendingFiles started.");
long blockSize = 8192L;
int stripeLength = 3;
mySetup(stripeLength, -1); // never har
Path file1 = new Path("/user/dhruba/raidtest/file1");
Path file2 = new Path("/user/dhruba/raidtest/file2");
Path destPath = new Path("/destraid/user/dhruba/raidtest");
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
1, 20, blockSize);
long crc2 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file2,
1, 20, blockSize);
long file1Len = fileSys.getFileStatus(file1).getLen();
long file2Len = fileSys.getFileStatus(file2).getLen();
LOG.info("Test testMaxPendingFiles created test files");
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
localConf.setInt("raid.blockfix.interval", 1000);
localConf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.DistBlockFixer");
localConf.setLong("raid.blockfix.filespertask", 2L);
localConf.setLong("raid.blockfix.maxpendingfiles", 1L);
try {
cnode = RaidNode.createRaidNode(null, localConf);
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
TestRaidDfs.waitForFileRaided(LOG, fileSys, file2, destPath);
cnode.stop(); cnode.join();
FileStatus file1Stat = fileSys.getFileStatus(file1);
FileStatus file2Stat = fileSys.getFileStatus(file2);
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
LocatedBlocks file1Loc =
RaidDFSUtil.getBlockLocations(dfs, file1.toUri().getPath(),
0, file1Stat.getLen());
LocatedBlocks file2Loc =
RaidDFSUtil.getBlockLocations(dfs, file2.toUri().getPath(),
0, file2Stat.getLen());
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals("no corrupt files expected", 0, corruptFiles.length);
assertEquals("filesFixed() should return 0 before fixing files",
0, cnode.blockFixer.filesFixed());
// corrupt file1
int[] corruptBlockIdxs = new int[]{0, 4, 6};
for (int idx: corruptBlockIdxs)
corruptBlock(file1Loc.get(idx).getBlock());
reportCorruptBlocks(dfs, file1, corruptBlockIdxs, blockSize);
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
cnode = RaidNode.createRaidNode(null, localConf);
DistBlockFixer blockFixer = (DistBlockFixer) cnode.blockFixer;
long start = Time.now();
while (blockFixer.jobsRunning() < 1 &&
Time.now() - start < 240000) {
LOG.info("Test testBlockFix waiting for fixing job 1 to start");
Thread.sleep(10);
}
assertEquals("job not running", 1, blockFixer.jobsRunning());
// corrupt file2
for (int idx: corruptBlockIdxs)
corruptBlock(file2Loc.get(idx).getBlock());
reportCorruptBlocks(dfs, file2, corruptBlockIdxs, blockSize);
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
// wait until both files are fixed
while (blockFixer.filesFixed() < 2 &&
Time.now() - start < 240000) {
// make sure the block fixer does not start a second job while
// the first one is still running
assertTrue("too many jobs running", blockFixer.jobsRunning() <= 1);
Thread.sleep(10);
}
assertEquals("files not fixed", 2, blockFixer.filesFixed());
dfs = getDFS(conf, dfs);
try {
Thread.sleep(5*1000);
} catch (InterruptedException ignore) {
}
assertTrue("file not fixed",
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
assertTrue("file not fixed",
TestRaidDfs.validateFile(dfs, file2, file2Len, crc2));
} catch (Exception e) {
LOG.info("Test testMaxPendingFiles exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
}
}

View File

@ -1,45 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import org.junit.Test;
public class TestBlockFixerGeneratedBlockDist extends TestBlockFixer {
/**
* Tests integrity of generated block.
* Create a file and delete a block entirely. Wait for the block to be
* regenerated. Now stop RaidNode and corrupt the generated block.
* Test that corruption in the generated block can be detected by clients.
*/
@Test
public void testGeneratedBlockDist() throws Exception {
generatedBlockTestCommon("testGeneratedBlock", 3, false);
}
/**
* Tests integrity of generated last block.
* Create a file and delete a block entirely. Wait for the block to be
* regenerated. Now stop RaidNode and corrupt the generated block.
* Test that corruption in the generated block can be detected by clients.
*/
@Test
public void testGeneratedLastBlockDist() throws Exception {
generatedBlockTestCommon("testGeneratedLastBlock", 6, false);
}
}

View File

@ -1,32 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import org.junit.Test;
public class TestBlockFixerParityBlockFixDist extends TestBlockFixer {
@Test
public void testParityBlockFixDist() throws Exception {
implParityBlockFix("testParityBlockFixDist", false);
}
@Test
public void testParityHarBlockFixDist() throws Exception {
implParityHarBlockFix("testParityHarBlockFixDist", false);
}
}

View File

@ -1,228 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.util.Time;
import org.junit.Test;
public class TestDirectoryTraversal {
final static Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.TestDirectoryTraversal");
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"target/test-data")).getAbsolutePath();
MiniDFSCluster dfs = null;
FileSystem fs = null;
Configuration conf = null;
/**
* Test basic enumeration.
*/
@Test
public void testEnumeration() throws IOException {
mySetup();
try {
Path topDir = new Path(TEST_DIR + "/testenumeration");
createTestTree(topDir);
LOG.info("Enumerating files");
List<FileStatus> startPaths = new LinkedList<FileStatus>();
startPaths.add(fs.getFileStatus(topDir));
DirectoryTraversal dt = new DirectoryTraversal(fs, startPaths, 2);
List<FileStatus> selected = new LinkedList<FileStatus>();
while (true) {
FileStatus f = dt.getNextFile();
if (f == null) break;
assertEquals(false, f.isDir());
LOG.info(f.getPath());
selected.add(f);
}
assertEquals(5, selected.size());
LOG.info("Enumerating directories");
startPaths.clear();
startPaths.add(fs.getFileStatus(topDir));
dt = new DirectoryTraversal(fs, startPaths);
selected.clear();
while (true) {
FileStatus dir = dt.getNextDirectory();
if (dir == null) break;
assertEquals(true, dir.isDir());
LOG.info(dir.getPath());
selected.add(dir);
}
assertEquals(4, selected.size());
} finally {
myTearDown();
}
}
@Test
public void testSuspension() throws IOException {
LOG.info("Starting testSuspension");
mySetup();
try {
Path topDir = new Path(TEST_DIR + "/testenumeration");
createTestTree(topDir);
String top = topDir.toString();
List<FileStatus> startPaths = new LinkedList<FileStatus>();
startPaths.add(fs.getFileStatus(new Path(top + "/a")));
startPaths.add(fs.getFileStatus(new Path(top + "/b")));
DirectoryTraversal dt = new DirectoryTraversal(fs, startPaths);
int limit = 2;
short targetRepl = 1;
Path raid = new Path("/raid");
DirectoryTraversal.FileFilter filter =
new RaidFilter.TimeBasedFilter(conf,
RaidNode.xorDestinationPath(conf), 1, Time.now(), 0);
List<FileStatus> selected = dt.getFilteredFiles(filter, limit);
for (FileStatus f: selected) {
LOG.info(f.getPath());
}
assertEquals(limit, selected.size());
selected = dt.getFilteredFiles(filter, limit);
for (FileStatus f: selected) {
LOG.info(f.getPath());
}
assertEquals(limit, selected.size());
} finally {
myTearDown();
}
}
@Test
public void testFileFilter() throws IOException {
mySetup();
try {
Path topDir = new Path(TEST_DIR + "/testFileFilter");
int targetRepl = 1;
createTestTree(topDir);
Path file = new Path(topDir.toString() + "/a/f1");
FileStatus stat = fs.getFileStatus(file);
PolicyInfo info = new PolicyInfo("testFileFilter", conf);
info.setSrcPath(topDir.toString());
info.setErasureCode("rs");
info.setDescription("test policy");
info.setProperty("targetReplication", "1");
info.setProperty("metaReplication", "1");
DirectoryTraversal.FileFilter timeBasedXORFilter =
new RaidFilter.TimeBasedFilter(conf,
RaidNode.xorDestinationPath(conf), targetRepl,
Time.now(), 0);
DirectoryTraversal.FileFilter timeBasedRSFilter =
new RaidFilter.TimeBasedFilter(conf,
RaidNode.rsDestinationPath(conf), targetRepl,
Time.now(), 0);
DirectoryTraversal.FileFilter preferenceForRSFilter =
new RaidFilter.PreferenceFilter(
conf, RaidNode.rsDestinationPath(conf),
RaidNode.xorDestinationPath(conf), 1, Time.now(), 0);
assertTrue(timeBasedXORFilter.check(stat));
assertTrue(timeBasedRSFilter.check(stat));
assertTrue(preferenceForRSFilter.check(stat));
RaidNode.doRaid(
conf, info, stat, new RaidNode.Statistics(), Reporter.NULL);
assertTrue(timeBasedXORFilter.check(stat));
assertFalse(timeBasedRSFilter.check(stat));
assertFalse(preferenceForRSFilter.check(stat));
} finally {
myTearDown();
}
}
/**
* Creates a test directory tree.
* top
* / | \
* / | f5
* a b___
* / \ |\ \
* f1 f2 f3f4 c
*/
private void createTestTree(Path topDir) throws IOException {
String top = topDir.toString();
fs.delete(topDir, true);
fs.mkdirs(topDir);
fs.create(new Path(top + "/f5")).close();
fs.mkdirs(new Path(top + "/a"));
createTestFile(new Path(top + "/a/f1"));
createTestFile(new Path(top + "/a/f2"));
fs.mkdirs(new Path(top + "/b"));
fs.mkdirs(new Path(top + "/b/c"));
createTestFile(new Path(top + "/b/f3"));
createTestFile(new Path(top + "/b/f4"));
}
private void createTestFile(Path file) throws IOException {
long blockSize = 8192;
byte[] bytes = new byte[(int)blockSize];
FSDataOutputStream stm = fs.create(file, false, 4096, (short)1, blockSize);
stm.write(bytes);
stm.write(bytes);
stm.write(bytes);
stm.close();
FileStatus stat = fs.getFileStatus(file);
assertEquals(blockSize, stat.getBlockSize());
}
private void mySetup() throws IOException {
conf = new Configuration();
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
dfs.waitActive();
fs = dfs.getFileSystem();
}
private void myTearDown() {
if (dfs != null) { dfs.shutdown(); }
}
}

View File

@ -1,245 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.apache.hadoop.util.Time;
import org.junit.Test;
public class TestErasureCodes {
final int TEST_CODES = 100;
final int TEST_TIMES = 1000;
final Random RAND = new Random();
@Test
public void testEncodeDecode() {
for (int n = 0; n < TEST_CODES; n++) {
int stripeSize = RAND.nextInt(99) + 1; // 1, 2, 3, ... 100
int paritySize = RAND.nextInt(9) + 1; //1, 2, 3, 4, ... 10
ErasureCode ec = new ReedSolomonCode(stripeSize, paritySize);
for (int m = 0; m < TEST_TIMES; m++) {
int symbolMax = (int) Math.pow(2, ec.symbolSize());
int[] message = new int[stripeSize];
for (int i = 0; i < stripeSize; i++) {
message[i] = RAND.nextInt(symbolMax);
}
int[] parity = new int[paritySize];
ec.encode(message, parity);
int[] data = new int[stripeSize + paritySize];
int[] copy = new int[data.length];
for (int i = 0; i < paritySize; i++) {
data[i] = parity[i];
copy[i] = parity[i];
}
for (int i = 0; i < stripeSize; i++) {
data[i + paritySize] = message[i];
copy[i + paritySize] = message[i];
}
int erasedLen = paritySize == 1 ? 1 : RAND.nextInt(paritySize - 1) + 1;
int[] erasedLocations = randomErasedLocation(erasedLen, data.length);
for (int i = 0; i < erasedLocations.length; i++) {
data[erasedLocations[i]] = 0;
}
int[] erasedValues = new int[erasedLen];
ec.decode(data, erasedLocations, erasedValues);
for (int i = 0; i < erasedLen; i++) {
assertEquals("Decode failed", copy[erasedLocations[i]], erasedValues[i]);
}
}
}
}
@Test
public void testRSPerformance() {
int stripeSize = 10;
int paritySize = 4;
ErasureCode ec = new ReedSolomonCode(stripeSize, paritySize);
int symbolMax = (int) Math.pow(2, ec.symbolSize());
byte[][] message = new byte[stripeSize][];
int bufsize = 1024 * 1024 * 10;
for (int i = 0; i < stripeSize; i++) {
message[i] = new byte[bufsize];
for (int j = 0; j < bufsize; j++) {
message[i][j] = (byte) RAND.nextInt(symbolMax);
}
}
byte[][] parity = new byte[paritySize][];
for (int i = 0; i < paritySize; i++) {
parity[i] = new byte[bufsize];
}
long encodeStart = Time.now();
int[] tmpIn = new int[stripeSize];
int[] tmpOut = new int[paritySize];
for (int i = 0; i < bufsize; i++) {
// Copy message.
for (int j = 0; j < stripeSize; j++) tmpIn[j] = 0x000000FF & message[j][i];
ec.encode(tmpIn, tmpOut);
// Copy parity.
for (int j = 0; j < paritySize; j++) parity[j][i] = (byte)tmpOut[j];
}
long encodeEnd = Time.now();
float encodeMSecs = (encodeEnd - encodeStart);
System.out.println("Time to encode rs = " + encodeMSecs +
"msec (" + message[0].length / (1000 * encodeMSecs) + " MB/s)");
// Copy erased array.
int[] data = new int[paritySize + stripeSize];
// 4th location is the 0th symbol in the message
int[] erasedLocations = new int[]{4, 1, 5, 7};
int[] erasedValues = new int[erasedLocations.length];
byte[] copy = new byte[bufsize];
for (int j = 0; j < bufsize; j++) {
copy[j] = message[0][j];
message[0][j] = 0;
}
long decodeStart = Time.now();
for (int i = 0; i < bufsize; i++) {
// Copy parity first.
for (int j = 0; j < paritySize; j++) {
data[j] = 0x000000FF & parity[j][i];
}
// Copy message. Skip 0 as the erased symbol
for (int j = 1; j < stripeSize; j++) {
data[j + paritySize] = 0x000000FF & message[j][i];
}
// Use 0, 2, 3, 6, 8, 9, 10, 11, 12, 13th symbol to reconstruct the data
ec.decode(data, erasedLocations, erasedValues);
message[0][i] = (byte)erasedValues[0];
}
long decodeEnd = Time.now();
float decodeMSecs = (decodeEnd - decodeStart);
System.out.println("Time to decode = " + decodeMSecs +
"msec (" + message[0].length / (1000 * decodeMSecs) + " MB/s)");
assertTrue("Decode failed", java.util.Arrays.equals(copy, message[0]));
}
@Test
public void testXorPerformance() {
java.util.Random RAND = new java.util.Random();
int stripeSize = 10;
byte[][] message = new byte[stripeSize][];
int bufsize = 1024 * 1024 * 10;
for (int i = 0; i < stripeSize; i++) {
message[i] = new byte[bufsize];
for (int j = 0; j < bufsize; j++) {
message[i][j] = (byte)RAND.nextInt(256);
}
}
byte[] parity = new byte[bufsize];
long encodeStart = Time.now();
for (int i = 0; i < bufsize; i++) {
for (int j = 0; j < stripeSize; j++) parity[i] ^= message[j][i];
}
long encodeEnd = Time.now();
float encodeMSecs = encodeEnd - encodeStart;
System.out.println("Time to encode xor = " + encodeMSecs +
" msec (" + message[0].length / (1000 * encodeMSecs) + "MB/s)");
byte[] copy = new byte[bufsize];
for (int j = 0; j < bufsize; j++) {
copy[j] = message[0][j];
message[0][j] = 0;
}
long decodeStart = Time.now();
for (int i = 0; i < bufsize; i++) {
for (int j = 1; j < stripeSize; j++) message[0][i] ^= message[j][i];
message[0][i] ^= parity[i];
}
long decodeEnd = Time.now();
float decodeMSecs = decodeEnd - decodeStart;
System.out.println("Time to decode xor = " + decodeMSecs +
" msec (" + message[0].length / (1000 * decodeMSecs) + "MB/s)");
assertTrue("Decode failed", java.util.Arrays.equals(copy, message[0]));
}
@Test
public void testComputeErrorLocations() {
for (int i = 0; i < TEST_TIMES; ++i) {
verifyErrorLocations(10, 4, 1);
verifyErrorLocations(10, 4, 2);
}
}
public void verifyErrorLocations(int stripeSize, int paritySize, int errors) {
int[] message = new int[stripeSize];
int[] parity = new int[paritySize];
Set<Integer> errorLocations = new HashSet<Integer>();
for (int i = 0; i < message.length; ++i) {
message[i] = RAND.nextInt(256);
}
while (errorLocations.size() < errors) {
int loc = RAND.nextInt(stripeSize + paritySize);
errorLocations.add(loc);
}
ReedSolomonCode codec = new ReedSolomonCode(stripeSize, paritySize);
codec.encode(message, parity);
int[] data = combineArrays(parity, message);
for (Integer i : errorLocations) {
data[i] = randError(data[i]);
}
Set<Integer> recoveredLocations = new HashSet<Integer>();
boolean resolved = codec.computeErrorLocations(data, recoveredLocations);
if (resolved) {
assertEquals(errorLocations, recoveredLocations);
}
}
private int randError(int actual) {
while (true) {
int r = RAND.nextInt(256);
if (r != actual) {
return r;
}
}
}
private int[] combineArrays(int[] array1, int[] array2) {
int[] result = new int[array1.length + array2.length];
for (int i = 0; i < array1.length; ++i) {
result[i] = array1[i];
}
for (int i = 0; i < array2.length; ++i) {
result[i + array1.length] = array2[i];
}
return result;
}
private int[] randomErasedLocation(int erasedLen, int dataLen) {
int[] erasedLocations = new int[erasedLen];
for (int i = 0; i < erasedLen; i++) {
Set<Integer> s = new HashSet<Integer>();
while (s.size() != erasedLen) {
s.add(RAND.nextInt(dataLen));
}
int t = 0;
for (int erased : s) {
erasedLocations[t++] = erased;
}
}
return erasedLocations;
}
}

View File

@ -1,190 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertTrue;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.junit.Test;
public class TestGaloisField {
final int TEST_TIMES = 10000;
final Random RAND = new Random();
final static GaloisField GF = GaloisField.getInstance();
private int randGF() {
return 0x000000FF & RAND.nextInt(GF.getFieldSize());
}
private int[] randGFPoly(int len) {
int[] result = new int[len];
for (int i = 0; i < len; i++) {
result[i] = randGF();
}
return result;
}
@Test
public void testGetInstance() {
GaloisField gf1 = GaloisField.getInstance(256, 285);
GaloisField gf2 = GaloisField.getInstance();
GaloisField gf3 = GaloisField.getInstance(128, 137);
GaloisField gf4 = GaloisField.getInstance(128, 137);
GaloisField gf5 = GaloisField.getInstance(512, 529);
GaloisField gf6 = GaloisField.getInstance(512, 529);
assertTrue(gf1 == gf2);
assertTrue(gf3 == gf4);
assertTrue(gf5 == gf6);
}
@Test
public void testDistributivity() {
for (int i = 0; i < TEST_TIMES; i++) {
int a = RAND.nextInt(GF.getFieldSize());
int b = RAND.nextInt(GF.getFieldSize());
int c = RAND.nextInt(GF.getFieldSize());
int result1 = GF.multiply(a, GF.add(b, c));
int result2 = GF.add(GF.multiply(a, b), GF.multiply(a, c));
assertTrue("Distributivity test #" + i + " failed: " + a + ", " + b + ", "
+ c, result1 == result2);
}
}
@Test
public void testDevision() {
for (int i = 0; i < TEST_TIMES; i++) {
int a = RAND.nextInt(GF.getFieldSize());
int b = RAND.nextInt(GF.getFieldSize());
if (b == 0) {
continue;
}
int c = GF.divide(a, b);
assertTrue("Division test #" + i + " failed: " + a + "/" + b + " = " + c,
a == GF.multiply(c, b));
}
}
@Test
public void testPower() {
for (int i = 0; i < TEST_TIMES; i++) {
int a = randGF();
int n = RAND.nextInt(10);
int result1 = GF.power(a, n);
int result2 = 1;
for (int j = 0; j < n; j++) {
result2 = GF.multiply(result2, a);
}
assert(result1 == result2);
}
}
@Test
public void testPolynomialDistributivity() {
final int TEST_LEN = 15;
for (int i = 0; i < TEST_TIMES; i++) {
int[] a = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
int[] b = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
int[] c = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
int[] result1 = GF.multiply(a, GF.add(b, c));
int[] result2 = GF.add(GF.multiply(a, b), GF.multiply(a, c));
assertTrue("Distributivity test on polynomials failed",
java.util.Arrays.equals(result1, result2));
}
}
@Test
public void testSubstitute() {
final int TEST_LEN = 15;
for (int i = 0; i < TEST_TIMES; i++) {
int[] a = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
int[] b = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
int[] c = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
int x = randGF();
// (a * b * c)(x)
int result1 = GF.substitute(GF.multiply(GF.multiply(a, b), c), x);
// a(x) * b(x) * c(x)
int result2 =
GF.multiply(GF.multiply(GF.substitute(a, x), GF.substitute(b, x)),
GF.substitute(c, x));
assertTrue("Substitute test on polynomial failed",
result1 == result2);
}
}
@Test
public void testSolveVandermondeSystem() {
final int TEST_LEN = 15;
for (int i = 0; i < TEST_TIMES; i++) {
int[] z = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
// generate distinct values for x
int[] x = new int[z.length];
Set<Integer> s = new HashSet<Integer>();
while (s.size() != z.length) {
s.add(randGF());
}
int t = 0;
for (int v : s) {
x[t++] = v;
}
// compute the output for the Vandermonde system
int[] y = new int[x.length];
for (int j = 0; j < x.length; j++) {
y[j] = 0;
for (int k = 0; k < x.length; k++) {
//y[j] = y[j] + z[k] * pow(x[k], j);
y[j] = GF.add(y[j], GF.multiply(GF.power(x[k], j), z[k]));
}
}
GF.solveVandermondeSystem(x, y);
assertTrue("Solving Vandermonde system failed",
java.util.Arrays.equals(y, z));
}
}
@Test
public void testRemainder() {
final int TEST_LEN = 15;
for (int i = 0; i < TEST_TIMES; i++) {
int[] quotient = null;
int[] divisor = null;
int[] remainder = null;
int[] dividend = null;
while (true) {
quotient = randGFPoly(RAND.nextInt(TEST_LEN - 3) + 3);
divisor = randGFPoly(RAND.nextInt(quotient.length - 2) + 2);
remainder = randGFPoly(RAND.nextInt(divisor.length - 1) + 1);
dividend = GF.add(remainder, GF.multiply(quotient, divisor));
if (quotient[quotient.length - 1] != 0 &&
divisor[divisor.length - 1] != 0 &&
remainder[remainder.length - 1] != 0) {
// make sure all the leading terms are not zero
break;
}
}
GF.remainder(dividend, divisor);
for (int j = 0; j < remainder.length; j++) {
assertTrue("Distributivity test on polynomials failed",
dividend[j] == remainder[j]);
}
}
}
}

View File

@ -1,79 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertEquals;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class TestHarIndexParser {
final static Log LOG = LogFactory.getLog(TestHarIndexParser.class);
File indexFile = null;
@Before
public void setUp() throws FileNotFoundException, IOException {
LOG.info("TestHarIndexParser.setUp()");
indexFile = File.createTempFile("harindex", ".tmp");
indexFile.deleteOnExit();
OutputStreamWriter out = new OutputStreamWriter(
new FileOutputStream(indexFile),
Charset.forName("UTF-8"));
out.write("%2F dir 1282018162460+0+493+hadoop+hadoop 0 0 f1 f2 f3 f4\n");
out.write("%2Ff1 file part-0 0 1024 1282018141145+1282018140822+420+hadoop+hadoop\n");
out.write("%2Ff3 file part-0 2048 1024 1282018148590+1282018148255+420+hadoop+hadoop\n");
out.write("%2Ff2 file part-0 1024 1024 1282018144198+1282018143852+420+hadoop+hadoop\n");
out.write("%2Ff4 file part-1 0 1024000 1282018162959+1282018162460+420+hadoop+hadoop\n");
out.flush();
out.close();
}
@After
public void tearDown() {
LOG.info("TestHarIndexParser.tearDown()");
if (indexFile != null)
indexFile.delete();
}
@Test
public void testHarIndexParser()
throws UnsupportedEncodingException, IOException {
LOG.info("testHarIndexParser started.");
InputStream in = new FileInputStream(indexFile);
long size = indexFile.length();
HarIndex parser = new HarIndex(in, size);
HarIndex.IndexEntry entry = parser.findEntry("part-0", 2100);
assertEquals("/f3", entry.fileName);
LOG.info("testHarIndexParser finished.");
}
}

View File

@ -1,121 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.util.ArrayList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.util.Time;
import org.junit.Test;
public class TestRaidFilter {
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"target/test-data")).getAbsolutePath();
final static Log LOG =
LogFactory.getLog("org.apache.hadoop.raid.TestRaidFilter");
Configuration conf;
MiniDFSCluster dfs = null;
FileSystem fs = null;
private void mySetup() throws Exception {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
dfs = new MiniDFSCluster(conf, 2, true, null);
dfs.waitActive();
fs = dfs.getFileSystem();
String namenode = fs.getUri().toString();
FileSystem.setDefaultUri(conf, namenode);
}
private void myTearDown() throws Exception {
if (dfs != null) { dfs.shutdown(); }
}
@Test
public void testLayeredPolicies() throws Exception {
mySetup();
Path src1 = new Path("/user/foo");
Path src2 = new Path("/user/foo/bar");
PolicyInfo info1 = new PolicyInfo("p1", conf);
info1.setSrcPath(src1.toString());
info1.setErasureCode("xor");
info1.setDescription("test policy");
info1.setProperty("targetReplication", "1");
info1.setProperty("metaReplication", "1");
info1.setProperty("modTimePeriod", "0");
PolicyInfo info2 = new PolicyInfo("p2", conf);
info2.setSrcPath(src2.toString());
info2.setErasureCode("xor");
info2.setDescription("test policy");
info2.setProperty("targetReplication", "1");
info2.setProperty("metaReplication", "1");
info2.setProperty("modTimePeriod", "0");
ArrayList<PolicyInfo> all = new ArrayList<PolicyInfo>();
all.add(info1);
all.add(info2);
try {
long blockSize = 1024;
byte[] bytes = new byte[(int)blockSize];
Path f1 = new Path(src1, "f1");
Path f2 = new Path(src2, "f2");
FSDataOutputStream stm1 = fs.create(f1, false, 4096, (short)1, blockSize);
FSDataOutputStream stm2 = fs.create(f2, false, 4096, (short)1, blockSize);
FSDataOutputStream[] stms = new FSDataOutputStream[]{stm1, stm2};
for (FSDataOutputStream stm: stms) {
stm.write(bytes);
stm.write(bytes);
stm.write(bytes);
stm.close();
}
Thread.sleep(1000);
FileStatus stat1 = fs.getFileStatus(f1);
FileStatus stat2 = fs.getFileStatus(f2);
RaidFilter.Statistics stats = new RaidFilter.Statistics();
RaidFilter.TimeBasedFilter filter = new RaidFilter.TimeBasedFilter(
conf, RaidNode.xorDestinationPath(conf), info1, all,
Time.now(), stats);
System.out.println("Stats " + stats);
assertTrue(filter.check(stat1));
assertFalse(filter.check(stat2));
} finally {
myTearDown();
}
}
}

View File

@ -1,315 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertEquals;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Level;
import org.junit.Test;
/**
* If a file gets deleted, then verify that the parity file gets deleted too.
*/
public class TestRaidHar {
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"target/test-data")).getAbsolutePath();
final static String CONFIG_FILE = new File(TEST_DIR,
"test-raid.xml").getAbsolutePath();
final static long RELOAD_INTERVAL = 1000;
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode");
final Random rand = new Random();
{
((Log4JLogger)RaidNode.LOG).getLogger().setLevel(Level.ALL);
}
Configuration conf;
String namenode = null;
String hftp = null;
MiniDFSCluster dfs = null;
MiniMRCluster mr = null;
FileSystem fileSys = null;
String jobTrackerName = null;
/**
* create mapreduce and dfs clusters
*/
private void createClusters(boolean local) throws Exception {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
conf.set("raid.config.file", CONFIG_FILE);
conf.setBoolean("raid.config.reload", true);
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// make all deletions not go through Trash
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
if (local) {
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
} else {
conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
}
conf.set("raid.server.address", "localhost:0");
conf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
// create a dfs and map-reduce cluster
final int taskTrackers = 4;
dfs = new MiniDFSCluster(conf, 3, true, null);
dfs.waitActive();
fileSys = dfs.getFileSystem();
namenode = fileSys.getUri().toString();
mr = new MiniMRCluster(taskTrackers, namenode, 3);
JobConf jobConf = mr.createJobConf();
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
FileSystem.setDefaultUri(conf, namenode);
conf.set("mapred.job.tracker", jobTrackerName);
conf.set("mapreduce.framework.name", "yarn");
String rmAdress = jobConf.get("yarn.resourcemanager.address");
if (rmAdress != null) {
conf.set("yarn.resourcemanager.address", rmAdress);
}
String schedulerAdress =
jobConf.get("yarn.resourcemanager.scheduler.address");
if (schedulerAdress != null) {
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
}
String jobHistoryAddress =
jobConf.get("mapreduce.jobhistory.address");
if (jobHistoryAddress != null) {
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
}
}
/**
* create raid.xml file for RaidNode
*/
private void mySetup(long targetReplication,
long metaReplication, long stripeLength) throws Exception {
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
fileWriter.write("<?xml version=\"1.0\"?>\n");
String str = "<configuration> " +
"<srcPath prefix=\"/user/test/raidtest\"> " +
"<policy name = \"RaidTest1\"> " +
"<erasureCode>xor</erasureCode> " +
"<property> " +
"<name>targetReplication</name> " +
"<value>" + targetReplication + "</value> " +
"<description>after RAIDing, decrease the replication factor of a file to this value." +
"</description> " +
"</property> " +
"<property> " +
"<name>metaReplication</name> " +
"<value>" + metaReplication + "</value> " +
"<description> replication factor of parity file" +
"</description> " +
"</property> " +
"<property> " +
"<name>stripeLength</name> " +
"<value>" + stripeLength + "</value> " +
"<description> the max number of blocks in a file to RAID together " +
"</description> " +
"</property> " +
"<property> " +
"<name>time_before_har</name> " +
"<value>0</value> " +
"<description> amount of time waited before har'ing parity files" +
"</description> " +
"</property> " +
"<property> " +
"<name>modTimePeriod</name> " +
"<value>2000</value> " +
"<description> time (milliseconds) after a file is modified to make it " +
"a candidate for RAIDing " +
"</description> " +
"</property> " +
"</policy>" +
"</srcPath>" +
"</configuration>";
fileWriter.write(str);
fileWriter.close();
}
/**
* stop clusters created earlier
*/
private void stopClusters() throws Exception {
if (mr != null) { mr.shutdown(); }
if (dfs != null) { dfs.shutdown(); }
}
/**
* Test that parity files that do not have an associated master file
* get deleted.
*/
@Test
public void testRaidHar() throws Exception {
LOG.info("Test testRaidHar started.");
long blockSizes [] = {1024L};
long stripeLengths [] = {5};
long targetReplication = 1;
long metaReplication = 1;
int numBlock = 9;
int iter = 0;
createClusters(true);
try {
for (long blockSize : blockSizes) {
for (long stripeLength : stripeLengths) {
doTestHar(iter, targetReplication, metaReplication,
stripeLength, blockSize, numBlock);
iter++;
}
}
} finally {
stopClusters();
}
LOG.info("Test testRaidHar completed.");
}
/**
* Create parity file, delete original file and then validate that
* parity file is automatically deleted.
*/
private void doTestHar(int iter, long targetReplication,
long metaReplication, long stripeLength,
long blockSize, int numBlock) throws Exception {
LOG.info("doTestHar started---------------------------:" + " iter " + iter +
" blockSize=" + blockSize + " stripeLength=" + stripeLength);
mySetup(targetReplication, metaReplication, stripeLength);
Path dir = new Path("/user/test/raidtest/subdir/");
Path file1 = new Path(dir + "/file" + iter);
RaidNode cnode = null;
try {
Path destPath = new Path("/destraid/user/test/raidtest/subdir");
fileSys.delete(dir, true);
fileSys.delete(destPath, true);
for (int i = 0; i < 10; i++) {
Path file = new Path(dir + "/file" + i);
TestRaidNode.createOldFile(fileSys, file, 1, numBlock, blockSize);
}
LOG.info("doTestHar created test files for iteration " + iter);
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
cnode = RaidNode.createRaidNode(null, localConf);
FileStatus[] listPaths = null;
int maxFilesFound = 0;
// wait till file is raided
while (true) {
try {
listPaths = fileSys.listStatus(destPath);
int count = 0;
Path harPath = null;
int filesFound = 0;
if (listPaths != null) {
for (FileStatus s : listPaths) {
LOG.info("doTestHar found path " + s.getPath());
if (!s.isDir())
filesFound++;
if (filesFound > maxFilesFound)
maxFilesFound = filesFound;
if (s.getPath().toString().endsWith(".har")) {
// If a HAR directory is found, ensure that we have seen
// 10 parity files. We have to keep track of the max # of
// files since some parity files might get deleted by the
// purge thread.
assertEquals(10, maxFilesFound);
harPath = s.getPath();
count++;
}
}
}
if (count == 1 && listPaths.length == 1) {
Path partfile = new Path(harPath, "part-0");
assertEquals(fileSys.getFileStatus(partfile).getReplication(),
targetReplication);
break;
}
} catch (FileNotFoundException e) {
//ignore
}
LOG.info("doTestHar waiting for files to be raided and parity files to be har'ed and deleted. Found " +
(listPaths == null ? "none" : listPaths.length));
Thread.sleep(1000); // keep waiting
}
fileSys.delete(dir, true);
// wait till raid file is deleted
int count = 1;
while (count > 0) {
count = 0;
try {
listPaths = fileSys.listStatus(destPath);
if (listPaths != null) {
for (FileStatus s : listPaths) {
LOG.info("doTestHar found path " + s.getPath());
if (s.getPath().toString().endsWith(".har")) {
count++;
}
}
}
} catch (FileNotFoundException e) { } //ignoring
LOG.info("doTestHar waiting for har file to be deleted. Found " +
(listPaths == null ? "none" : listPaths.length) + " files");
Thread.sleep(1000);
}
} catch (Exception e) {
LOG.info("doTestHar Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
if (cnode != null) { cnode.stop(); cnode.join(); }
}
LOG.info("doTestHar completed:" + " blockSize=" + blockSize +
" stripeLength=" + stripeLength);
}
}

View File

@ -1,738 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
import java.util.Random;
import java.util.zip.CRC32;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.raid.protocol.PolicyList;
import org.apache.hadoop.util.JarFinder;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.junit.Test;
/**
* Test the generation of parity blocks for files with different block
* sizes. Also test that a data block can be regenerated from a raid stripe
* using the parity block
*/
public class TestRaidNode {
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"target/test-data")).getAbsolutePath();
public static final String DistRaid_JAR = JarFinder.getJar(DistRaid.class);
final static String CONFIG_FILE = new File(TEST_DIR,
"test-raid.xml").getAbsolutePath();
final static long RELOAD_INTERVAL = 1000;
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode");
final static Random rand = new Random();
Configuration conf;
String namenode = null;
String hftp = null;
MiniDFSCluster dfs = null;
MiniMRCluster mr = null;
FileSystem fileSys = null;
String jobTrackerName = null;
/**
* create mapreduce and dfs clusters
*/
private void createClusters(boolean local) throws Exception {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
conf.set("raid.config.file", CONFIG_FILE);
conf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
conf.setBoolean("raid.config.reload", true);
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
conf.setBoolean("dfs.permissions.enabled", true);
conf.setLong(JobMonitor.JOBMONITOR_INTERVAL_KEY, 20000);
conf.setLong(RaidNode.TRIGGER_MONITOR_SLEEP_TIME_KEY, 3000L);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// make all deletions not go through Trash
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
if (local) {
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
} else {
conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
}
conf.set("raid.server.address", "localhost:0");
// create a dfs and map-reduce cluster
MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
builder.numDataNodes(6);
builder.format(true);
dfs = builder.build();
dfs.waitActive();
fileSys = dfs.getFileSystem();
namenode = fileSys.getUri().toString();
final int taskTrackers = 4;
mr = new MiniMRCluster(taskTrackers, namenode, 3);
JobConf jobConf = mr.createJobConf();
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
FileSystem.setDefaultUri(conf, namenode);
conf.set("mapred.job.tracker", jobTrackerName);
conf.set("mapreduce.framework.name", "yarn");
String rmAdress = jobConf.get("yarn.resourcemanager.address");
if (rmAdress != null) {
conf.set("yarn.resourcemanager.address", rmAdress);
}
String schedulerAdress =
jobConf.get("yarn.resourcemanager.scheduler.address");
if (schedulerAdress != null) {
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
}
String jobHistoryAddress =
jobConf.get("mapreduce.jobhistory.address");
if (jobHistoryAddress != null) {
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
}
}
class ConfigBuilder {
private List<String> policies;
public ConfigBuilder() {
policies = new java.util.ArrayList<String>();
}
public void addPolicy(String name, String path, String parent) {
String str =
"<srcPath prefix=\"" + path + "\"> " +
"<policy name = \"" + name + "\"> " +
"<parentPolicy>" + parent + "</parentPolicy>" +
"</policy>" +
"</srcPath>";
policies.add(str);
}
public void addPolicy(String name, short srcReplication,
long targetReplication, long metaReplication, long stripeLength) {
String str =
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
"<policy name = \"" + name + "\"> " +
"<erasureCode>xor</erasureCode> " +
"<property> " +
"<name>srcReplication</name> " +
"<value>" + srcReplication + "</value> " +
"<description> pick only files whole replFactor is greater than or equal to " +
"</description> " +
"</property> " +
"<property> " +
"<name>targetReplication</name> " +
"<value>" + targetReplication + "</value> " +
"<description>after RAIDing, decrease the replication factor of a file to this value." +
"</description> " +
"</property> " +
"<property> " +
"<name>metaReplication</name> " +
"<value>" + metaReplication + "</value> " +
"<description> replication factor of parity file" +
"</description> " +
"</property> " +
"<property> " +
"<name>stripeLength</name> " +
"<value>" + stripeLength + "</value> " +
"<description> the max number of blocks in a file to RAID together " +
"</description> " +
"</property> " +
"<property> " +
"<name>modTimePeriod</name> " +
"<value>2000</value> " +
"<description> time (milliseconds) after a file is modified to make it " +
"a candidate for RAIDing " +
"</description> " +
"</property> " +
"</policy>" +
"</srcPath>";
policies.add(str);
}
public void addPolicy(String name, String path, short srcReplication,
long targetReplication, long metaReplication, long stripeLength) {
String str =
"<srcPath prefix=\"" + path + "\"> " +
"<policy name = \"" + name + "\"> " +
"<erasureCode>xor</erasureCode> " +
"<property> " +
"<name>srcReplication</name> " +
"<value>" + srcReplication + "</value> " +
"<description> pick only files whole replFactor is greater than or equal to " +
"</description> " +
"</property> " +
"<property> " +
"<name>targetReplication</name> " +
"<value>" + targetReplication + "</value> " +
"<description>after RAIDing, decrease the replication factor of a file to this value." +
"</description> " +
"</property> " +
"<property> " +
"<name>metaReplication</name> " +
"<value>" + metaReplication + "</value> " +
"<description> replication factor of parity file" +
"</description> " +
"</property> " +
"<property> " +
"<name>stripeLength</name> " +
"<value>" + stripeLength + "</value> " +
"<description> the max number of blocks in a file to RAID together " +
"</description> " +
"</property> " +
"<property> " +
"<name>modTimePeriod</name> " +
"<value>2000</value> " +
"<description> time (milliseconds) after a file is modified to make it " +
"a candidate for RAIDing " +
"</description> " +
"</property> " +
"</policy>" +
"</srcPath>";
policies.add(str);
}
public void persist() throws IOException {
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
fileWriter.write("<?xml version=\"1.0\"?>\n");
fileWriter.write("<configuration>");
for (String policy: policies) {
fileWriter.write(policy);
}
fileWriter.write("</configuration>");
fileWriter.close();
}
}
/**
* stop clusters created earlier
*/
private void stopClusters() throws Exception {
if (mr != null) { mr.shutdown(); }
if (dfs != null) { dfs.shutdown(); }
}
/**
* Test to run a filter
*/
@Test
public void testPathFilter() throws Exception {
LOG.info("Test testPathFilter started.");
long blockSizes [] = {1024L};
int stripeLengths [] = {5, 6, 10, 11, 12};
int targetReplication = 1;
int metaReplication = 1;
int numBlock = 11;
int iter = 0;
createClusters(true);
try {
for (long blockSize : blockSizes) {
for (long stripeLength : stripeLengths) {
doTestPathFilter(iter, targetReplication, metaReplication,
stripeLength, blockSize, numBlock);
iter++;
}
}
doCheckPolicy();
} finally {
stopClusters();
}
LOG.info("Test testPathFilter completed.");
}
/**
* Test to run a filter
*/
private void doTestPathFilter(int iter, long targetReplication,
long metaReplication, long stripeLength,
long blockSize, int numBlock) throws Exception {
LOG.info("doTestPathFilter started---------------------------:" + " iter " + iter +
" blockSize=" + blockSize + " stripeLength=" + stripeLength);
ConfigBuilder cb = new ConfigBuilder();
cb.addPolicy("policy1", "/user/dhruba/raidtest", (short)1, targetReplication, metaReplication, stripeLength);
cb.persist();
RaidShell shell = null;
Path dir = new Path("/user/dhruba/raidtest/");
Path file1 = new Path(dir + "/file" + iter);
RaidNode cnode = null;
try {
Path destPath = new Path("/destraid/user/dhruba/raidtest");
fileSys.delete(dir, true);
fileSys.delete(destPath, true);
long crc1 = createOldFile(fileSys, file1, 1, numBlock, blockSize);
LOG.info("doTestPathFilter created test files for iteration " + iter);
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
cnode = RaidNode.createRaidNode(null, localConf);
FileStatus[] listPaths = null;
// wait till file is raided
while (true) {
try {
listPaths = fileSys.listStatus(destPath);
int count = 0;
if (listPaths != null && listPaths.length == 1) {
for (FileStatus s : listPaths) {
LOG.info("doTestPathFilter found path " + s.getPath());
if (!s.getPath().toString().endsWith(".tmp") &&
fileSys.getFileStatus(file1).getReplication() ==
targetReplication) {
count++;
}
}
}
if (count > 0) {
break;
}
} catch (FileNotFoundException e) {
//ignore
}
LOG.info("doTestPathFilter waiting for files to be raided. Found " +
(listPaths == null ? "none" : listPaths.length));
Thread.sleep(1000); // keep waiting
}
// assertEquals(listPaths.length, 1); // all files raided
LOG.info("doTestPathFilter all files found in Raid.");
// check for error at beginning of file
shell = new RaidShell(conf);
shell.initializeRpc(conf, cnode.getListenerAddress());
if (numBlock >= 1) {
LOG.info("doTestPathFilter Check error at beginning of file.");
simulateError(shell, fileSys, file1, crc1, 0);
}
// check for error at the beginning of second block
if (numBlock >= 2) {
LOG.info("doTestPathFilter Check error at beginning of second block.");
simulateError(shell, fileSys, file1, crc1, blockSize + 1);
}
// check for error at the middle of third block
if (numBlock >= 3) {
LOG.info("doTestPathFilter Check error at middle of third block.");
simulateError(shell, fileSys, file1, crc1, 2 * blockSize + 10);
}
// check for error at the middle of second stripe
if (numBlock >= stripeLength + 1) {
LOG.info("doTestPathFilter Check error at middle of second stripe.");
simulateError(shell, fileSys, file1, crc1,
stripeLength * blockSize + 100);
}
} catch (Exception e) {
LOG.info("doTestPathFilter Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
if (shell != null) shell.close();
if (cnode != null) { cnode.stop(); cnode.join(); }
LOG.info("doTestPathFilter delete file " + file1);
fileSys.delete(file1, true);
}
LOG.info("doTestPathFilter completed:" + " blockSize=" + blockSize +
" stripeLength=" + stripeLength);
}
// Check that raid occurs only on files that have a replication factor
// greater than or equal to the specified value
private void doCheckPolicy() throws Exception {
LOG.info("doCheckPolicy started---------------------------:");
short srcReplication = 1;
long targetReplication = 2;
long metaReplication = 1;
long stripeLength = 2;
long blockSize = 1024;
int numBlock = 3;
ConfigBuilder cb = new ConfigBuilder();
cb.addPolicy("policy1", "/user/dhruba/policytest", srcReplication,
targetReplication, metaReplication, stripeLength);
cb.persist();
Path dir = new Path("/user/dhruba/policytest/");
Path file1 = new Path(dir + "/file1");
Path file2 = new Path(dir + "/file2");
RaidNode cnode = null;
try {
Path destPath = new Path("/destraid/user/dhruba/policytest");
fileSys.delete(dir, true);
fileSys.delete(destPath, true);
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
cnode = RaidNode.createRaidNode(null, localConf);
// this file should be picked up RaidNode
createOldFile(fileSys, file2, 2, numBlock, blockSize);
FileStatus[] listPaths = null;
long firstmodtime = 0;
// wait till file is raided
while (true) {
Thread.sleep(1000); // waiting
try {
listPaths = fileSys.listStatus(destPath);
} catch (FileNotFoundException e) {
LOG.warn("File not found " + destPath);
// The directory have been deleted by the purge thread.
continue;
}
int count = 0;
if (listPaths != null && listPaths.length == 1) {
for (FileStatus s : listPaths) {
LOG.info("doCheckPolicy found path " + s.getPath());
if (!s.getPath().toString().endsWith(".tmp") &&
fileSys.getFileStatus(file2).getReplication() ==
targetReplication) {
count++;
firstmodtime = s.getModificationTime();
}
}
}
if (count > 0) {
break;
}
LOG.info("doCheckPolicy waiting for files to be raided. Found " +
(listPaths == null ? "none" : listPaths.length));
}
assertEquals(listPaths.length, 1);
LOG.info("doCheckPolicy all files found in Raid the first time.");
LOG.info("doCheckPolicy: recreating source file");
createOldFile(fileSys, file2, 2, numBlock, blockSize);
FileStatus st = fileSys.getFileStatus(file2);
assertTrue(st.getModificationTime() > firstmodtime);
// wait till file is raided
while (true) {
Thread.sleep(20000L); // waiting
listPaths = fileSys.listStatus(destPath);
int count = 0;
if (listPaths != null && listPaths.length == 1) {
for (FileStatus s : listPaths) {
LOG.info("doCheckPolicy found path " + s.getPath() + " " + s.getModificationTime());
if (!s.getPath().toString().endsWith(".tmp") &&
s.getModificationTime() > firstmodtime &&
fileSys.getFileStatus(file2).getReplication() ==
targetReplication) {
count++;
}
}
}
if (count > 0) {
break;
}
LOG.info("doCheckPolicy waiting for files to be raided. Found " +
(listPaths == null ? "none" : listPaths.length));
}
assertEquals(listPaths.length, 1);
LOG.info("doCheckPolicy: file got re-raided as expected.");
} catch (Exception e) {
LOG.info("doCheckPolicy Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
if (cnode != null) { cnode.stop(); cnode.join(); }
LOG.info("doTestPathFilter delete file " + file1);
fileSys.delete(file1, false);
}
LOG.info("doCheckPolicy completed:");
}
static public void createTestFiles(FileSystem fileSys,
String path, String destpath, int nfile,
int nblock) throws IOException {
createTestFiles(fileSys, path, destpath, nfile, nblock, (short)1);
}
static void createTestFiles(FileSystem fileSys, String path, String destpath, int nfile,
int nblock, short repl) throws IOException {
long blockSize = 1024L;
Path dir = new Path(path);
Path destPath = new Path(destpath);
fileSys.delete(dir, true);
fileSys.delete(destPath, true);
for(int i = 0 ; i < nfile; i++){
Path file = new Path(path + "file" + i);
createOldFile(fileSys, file, repl, nblock, blockSize);
}
}
/**
* Test dist Raid
*/
@Test
public void testDistRaid() throws Exception {
LOG.info("Test testDistRaid started.");
long targetReplication = 2;
long metaReplication = 2;
long stripeLength = 3;
short srcReplication = 1;
createClusters(false);
ConfigBuilder cb = new ConfigBuilder();
cb.addPolicy("policy1", "/user/dhruba/raidtest",
srcReplication, targetReplication, metaReplication, stripeLength);
cb.addPolicy("policy2", "/user/dhruba/raidtest2",
srcReplication, targetReplication, metaReplication, stripeLength);
cb.persist();
RaidNode cnode = null;
try {
createTestFiles(fileSys, "/user/dhruba/raidtest/",
"/destraid/user/dhruba/raidtest", 5, 7);
createTestFiles(fileSys, "/user/dhruba/raidtest2/",
"/destraid/user/dhruba/raidtest2", 5, 7);
LOG.info("Test testDistRaid created test files");
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
localConf.set(JobContext.JAR, TestRaidNode.DistRaid_JAR);
cnode = RaidNode.createRaidNode(null, localConf);
// Verify the policies are parsed correctly
for (PolicyList policyList : cnode.getAllPolicies()) {
for (PolicyInfo p : policyList.getAll()) {
if (p.getName().equals("policy1")) {
Path srcPath = new Path("/user/dhruba/raidtest");
FileSystem fs = srcPath.getFileSystem(conf);
assertTrue(p.getSrcPath().equals(
srcPath.makeQualified(fs.getUri(), fs.getWorkingDirectory())));
} else {
assertTrue(p.getName().equals("policy2"));
Path srcPath = new Path("/user/dhruba/raidtest2");
FileSystem fs = srcPath.getFileSystem(conf);
assertTrue(p.getSrcPath().equals(
srcPath.makeQualified(fs.getUri(), fs.getWorkingDirectory())));
}
assertEquals(targetReplication,
Integer.parseInt(p.getProperty("targetReplication")));
assertEquals(metaReplication,
Integer.parseInt(p.getProperty("metaReplication")));
assertEquals(stripeLength,
Integer.parseInt(p.getProperty("stripeLength")));
}
}
long start = Time.now();
final int MAX_WAITTIME = 300000;
assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode);
DistRaidNode dcnode = (DistRaidNode) cnode;
while (dcnode.jobMonitor.jobsMonitored() < 2 &&
Time.now() - start < MAX_WAITTIME) {
Thread.sleep(1000);
}
start = Time.now();
while (dcnode.jobMonitor.jobsSucceeded() < 2 &&
Time.now() - start < MAX_WAITTIME) {
Thread.sleep(1000);
}
assertEquals(dcnode.jobMonitor.jobsSucceeded(), dcnode.jobMonitor.jobsMonitored());
LOG.info("Test testDistRaid successful.");
} catch (Exception e) {
LOG.info("testDistRaid Exception " + e + StringUtils.stringifyException(e));
throw e;
} finally {
if (cnode != null) { cnode.stop(); cnode.join(); }
stopClusters();
}
LOG.info("Test testDistRaid completed.");
}
//
// simulate a corruption at specified offset and verify that eveyrthing is good
//
void simulateError(RaidShell shell, FileSystem fileSys, Path file1,
long crc, long corruptOffset) throws IOException {
// recover the file assuming that we encountered a corruption at offset 0
String[] args = new String[3];
args[0] = "-recover";
args[1] = file1.toString();
args[2] = Long.toString(corruptOffset);
Path recover1 = shell.recover(args[0], args, 1)[0];
// compare that the recovered file is identical to the original one
LOG.info("Comparing file " + file1 + " with recovered file " + recover1);
validateFile(fileSys, file1, recover1, crc);
fileSys.delete(recover1, false);
}
//
// creates a file and populate it with random data. Returns its crc.
//
static long createOldFile(FileSystem fileSys, Path name, int repl, int numBlocks, long blocksize)
throws IOException {
CRC32 crc = new CRC32();
FSDataOutputStream stm = fileSys.create(name, true,
fileSys.getConf().getInt("io.file.buffer.size", 4096),
(short)repl, blocksize);
// fill random data into file
byte[] b = new byte[(int)blocksize];
for (int i = 0; i < numBlocks; i++) {
if (i == (numBlocks-1)) {
b = new byte[(int)blocksize/2];
}
rand.nextBytes(b);
stm.write(b);
crc.update(b);
}
stm.close();
return crc.getValue();
}
//
// validates that file matches the crc.
//
private void validateFile(FileSystem fileSys, Path name1, Path name2, long crc)
throws IOException {
FileStatus stat1 = fileSys.getFileStatus(name1);
FileStatus stat2 = fileSys.getFileStatus(name2);
assertTrue(" Length of file " + name1 + " is " + stat1.getLen() +
" is different from length of file " + name1 + " " + stat2.getLen(),
stat1.getLen() == stat2.getLen());
CRC32 newcrc = new CRC32();
FSDataInputStream stm = fileSys.open(name2);
final byte[] b = new byte[4192];
int num = 0;
while (num >= 0) {
num = stm.read(b);
if (num < 0) {
break;
}
newcrc.update(b, 0, num);
}
stm.close();
if (newcrc.getValue() != crc) {
fail("CRC mismatch of files " + name1 + " with file " + name2);
}
}
@Test
public void testSuspendTraversal() throws Exception {
LOG.info("Test testSuspendTraversal started.");
long targetReplication = 2;
long metaReplication = 2;
long stripeLength = 3;
short srcReplication = 1;
createClusters(false);
ConfigBuilder cb = new ConfigBuilder();
cb.addPolicy("policy1", "/user/dhruba/raidtest",
srcReplication, targetReplication, metaReplication, stripeLength);
cb.persist();
RaidNode cnode = null;
try {
for(int i = 0; i < 4; i++){
Path file = new Path("/user/dhruba/raidtest/dir" + i + "/file" + i);
createOldFile(fileSys, file, 1, 7, 1024L);
}
LOG.info("Test testSuspendTraversal created test files");
Configuration localConf = new Configuration(conf);
localConf.setInt("raid.distraid.max.jobs", 2);
localConf.setInt("raid.distraid.max.files", 2);
localConf.setInt("raid.directorytraversal.threads", 1);
localConf.set(JobContext.JAR, TestRaidNode.DistRaid_JAR);
// 4 test files: 2 jobs with 2 files each.
final int numJobsExpected = 2;
cnode = RaidNode.createRaidNode(null, localConf);
long start = Time.now();
final int MAX_WAITTIME = 300000;
assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode);
DistRaidNode dcnode = (DistRaidNode) cnode;
start = Time.now();
while (dcnode.jobMonitor.jobsSucceeded() < numJobsExpected &&
Time.now() - start < MAX_WAITTIME) {
LOG.info("Waiting for num jobs succeeded " + dcnode.jobMonitor.jobsSucceeded() +
" to reach " + numJobsExpected);
Thread.sleep(3000);
}
// Wait for any running jobs to finish.
start = Time.now();
while (dcnode.jobMonitor.runningJobsCount() > 0 &&
Time.now() - start < MAX_WAITTIME) {
LOG.info("Waiting for zero running jobs: " +
dcnode.jobMonitor.runningJobsCount());
Thread.sleep(1000);
}
assertEquals(numJobsExpected, dcnode.jobMonitor.jobsMonitored());
assertEquals(numJobsExpected, dcnode.jobMonitor.jobsSucceeded());
LOG.info("Test testSuspendTraversal successful.");
} catch (Exception e) {
LOG.info("testSuspendTraversal Exception " + e + StringUtils.stringifyException(e));
throw e;
} finally {
if (cnode != null) { cnode.stop(); cnode.join(); }
stopClusters();
}
LOG.info("Test testSuspendTraversal completed.");
}
}

View File

@ -1,521 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.apache.log4j.Level;
import org.junit.Test;
/**
* If a file gets deleted, then verify that the parity file gets deleted too.
*/
public class TestRaidPurge {
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"target/test-data")).getAbsolutePath();
final static String CONFIG_FILE = new File(TEST_DIR,
"test-raid.xml").getAbsolutePath();
final static long RELOAD_INTERVAL = 1000;
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode");
final Random rand = new Random();
{
((Log4JLogger)RaidNode.LOG).getLogger().setLevel(Level.ALL);
}
Configuration conf;
String namenode = null;
String hftp = null;
MiniDFSCluster dfs = null;
MiniMRCluster mr = null;
FileSystem fileSys = null;
String jobTrackerName = null;
/**
* create mapreduce and dfs clusters
*/
private void createClusters(boolean local) throws Exception {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
conf.set("raid.config.file", CONFIG_FILE);
conf.setBoolean("raid.config.reload", true);
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// make all deletions not go through Trash
conf.set("fs.shell.delete.classname", "org.apache.hadoop.dfs.DFSClient");
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
if (local) {
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
} else {
conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
}
conf.set("raid.server.address", "localhost:0");
// create a dfs and map-reduce cluster
final int taskTrackers = 4;
final int jobTrackerPort = 60050;
dfs = new MiniDFSCluster(conf, 3, true, null);
dfs.waitActive();
fileSys = dfs.getFileSystem();
namenode = fileSys.getUri().toString();
mr = new MiniMRCluster(taskTrackers, namenode, 3);
JobConf jobConf = mr.createJobConf();
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
FileSystem.setDefaultUri(conf, namenode);
conf.set("mapred.job.tracker", jobTrackerName);
conf.set("mapreduce.framework.name", "yarn");
String rmAdress = jobConf.get("yarn.resourcemanager.address");
if (rmAdress != null) {
conf.set("yarn.resourcemanager.address", rmAdress);
}
String schedulerAdress =
jobConf.get("yarn.resourcemanager.scheduler.address");
if (schedulerAdress != null) {
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
}
String jobHistoryAddress =
jobConf.get("mapreduce.jobhistory.address");
if (jobHistoryAddress != null) {
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
}
}
/**
* create raid.xml file for RaidNode
*/
private void mySetup(long targetReplication,
long metaReplication, long stripeLength) throws Exception {
int harDelay = 1; // 1 day.
mySetup(targetReplication, metaReplication, stripeLength, harDelay);
}
private void mySetup(long targetReplication,
long metaReplication, long stripeLength, int harDelay) throws Exception {
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
fileWriter.write("<?xml version=\"1.0\"?>\n");
String str = "<configuration> " +
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
"<policy name = \"RaidTest1\"> " +
"<erasureCode>xor</erasureCode> " +
"<destPath> /destraid</destPath> " +
"<property> " +
"<name>targetReplication</name> " +
"<value>" + targetReplication + "</value> " +
"<description>after RAIDing, decrease the replication factor of a file to this value." +
"</description> " +
"</property> " +
"<property> " +
"<name>metaReplication</name> " +
"<value>" + metaReplication + "</value> " +
"<description> replication factor of parity file" +
"</description> " +
"</property> " +
"<property> " +
"<name>stripeLength</name> " +
"<value>" + stripeLength + "</value> " +
"<description> the max number of blocks in a file to RAID together " +
"</description> " +
"</property> " +
"<property> " +
"<name>modTimePeriod</name> " +
"<value>2000</value> " +
"<description> time (milliseconds) after a file is modified to make it " +
"a candidate for RAIDing " +
"</description> " +
"</property> " +
"<property> " +
"<name>time_before_har</name> " +
"<value> " + harDelay + "</value> " +
"<description> amount of time waited before har'ing parity files" +
"</description> " +
"</property> " +
"</policy>" +
"</srcPath>" +
"</configuration>";
fileWriter.write(str);
fileWriter.close();
}
/**
* stop clusters created earlier
*/
private void stopClusters() throws Exception {
if (mr != null) { mr.shutdown(); }
if (dfs != null) { dfs.shutdown(); }
}
/**
* Test that parity files that do not have an associated master file
* get deleted.
*/
@Test
public void testPurge() throws Exception {
LOG.info("Test testPurge started.");
long blockSizes [] = {1024L};
long stripeLengths [] = {5};
long targetReplication = 1;
long metaReplication = 1;
int numBlock = 9;
int iter = 0;
createClusters(true);
try {
for (long blockSize : blockSizes) {
for (long stripeLength : stripeLengths) {
doTestPurge(iter, targetReplication, metaReplication,
stripeLength, blockSize, numBlock);
iter++;
}
}
} finally {
stopClusters();
}
LOG.info("Test testPurge completed.");
}
/**
* Create parity file, delete original file and then validate that
* parity file is automatically deleted.
*/
private void doTestPurge(int iter, long targetReplication,
long metaReplication, long stripeLength,
long blockSize, int numBlock) throws Exception {
LOG.info("doTestPurge started---------------------------:" + " iter " + iter +
" blockSize=" + blockSize + " stripeLength=" + stripeLength);
mySetup(targetReplication, metaReplication, stripeLength);
Path dir = new Path("/user/dhruba/raidtest/");
Path file1 = new Path(dir + "/file" + iter);
RaidNode cnode = null;
try {
Path destPath = new Path("/destraid/user/dhruba/raidtest");
fileSys.delete(dir, true);
fileSys.delete(destPath, true);
TestRaidNode.createOldFile(fileSys, file1, 1, numBlock, blockSize);
LOG.info("doTestPurge created test files for iteration " + iter);
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
cnode = RaidNode.createRaidNode(null, localConf);
FileStatus[] listPaths = null;
// wait till file is raided
while (true) {
try {
listPaths = fileSys.listStatus(destPath);
int count = 0;
if (listPaths != null && listPaths.length == 1) {
for (FileStatus s : listPaths) {
LOG.info("doTestPurge found path " + s.getPath());
if (!s.getPath().toString().endsWith(".tmp")) {
count++;
}
}
}
if (count > 0) {
break;
}
} catch (FileNotFoundException e) {
//ignore
}
LOG.info("doTestPurge waiting for files to be raided. Found " +
(listPaths == null ? "none" : listPaths.length));
Thread.sleep(1000); // keep waiting
}
// assertEquals(listPaths.length, 1); // all files raided
LOG.info("doTestPurge all files found in Raid.");
// delete original file
assertTrue("Unable to delete original file " + file1 ,
fileSys.delete(file1, true));
LOG.info("deleted file " + file1);
// wait till parity file and directory are automatically deleted
while (fileSys.exists(destPath)) {
LOG.info("doTestPurge waiting for parity files to be removed.");
Thread.sleep(1000); // keep waiting
}
} catch (Exception e) {
LOG.info("doTestPurge Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
if (cnode != null) { cnode.stop(); cnode.join(); }
LOG.info("doTestPurge delete file " + file1);
fileSys.delete(file1, true);
}
LOG.info("doTestPurge completed:" + " blockSize=" + blockSize +
" stripeLength=" + stripeLength);
}
/**
* Create a file, wait for parity file to get HARed. Then modify the file,
* wait for the HAR to get purged.
*/
@Test
public void testPurgeHar() throws Exception {
LOG.info("testPurgeHar started");
int harDelay = 0;
createClusters(true);
mySetup(1, 1, 5, harDelay);
Path dir = new Path("/user/dhruba/raidtest/");
Path destPath = new Path("/raid/user/dhruba/raidtest");
Path file1 = new Path(dir + "/file");
RaidNode cnode = null;
try {
TestRaidNode.createOldFile(fileSys, file1, 1, 8, 8192L);
LOG.info("testPurgeHar created test files");
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
cnode = RaidNode.createRaidNode(null, localConf);
// Wait till har is created.
while (true) {
try {
FileStatus[] listPaths = listPaths = fileSys.listStatus(destPath);
if (listPaths != null && listPaths.length == 1) {
FileStatus s = listPaths[0];
LOG.info("testPurgeHar found path " + s.getPath());
if (s.getPath().toString().endsWith(".har")) {
break;
}
}
} catch (FileNotFoundException e) {
//ignore
}
Thread.sleep(1000); // keep waiting
}
// Set an old timestamp.
fileSys.setTimes(file1, 0, 0);
boolean found = false;
FileStatus[] listPaths = null;
while (!found || listPaths == null || listPaths.length > 1) {
listPaths = fileSys.listStatus(destPath);
if (listPaths != null) {
for (FileStatus s: listPaths) {
LOG.info("testPurgeHar waiting for parity file to be recreated" +
" and har to be deleted found " + s.getPath());
if (s.getPath().toString().endsWith("file") &&
s.getModificationTime() == 0) {
found = true;
}
}
}
Thread.sleep(1000);
}
} catch (Exception e) {
LOG.info("testPurgeHar Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
if (cnode != null) { cnode.stop(); cnode.join(); }
fileSys.delete(dir, true);
fileSys.delete(destPath, true);
stopClusters();
}
}
/**
* Create parity file, delete original file's directory and then validate that
* parity directory is automatically deleted.
*/
@Test
public void testPurgeDirectory() throws Exception {
long stripeLength = 5;
long blockSize = 8192;
long targetReplication = 1;
long metaReplication = 1;
int numBlock = 9;
createClusters(true);
mySetup(targetReplication, metaReplication, stripeLength);
Path dir = new Path("/user/dhruba/raidtest/");
Path file1 = new Path(dir + "/file1");
RaidNode cnode = null;
try {
TestRaidNode.createOldFile(fileSys, file1, 1, numBlock, blockSize);
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
cnode = RaidNode.createRaidNode(null, localConf);
Path destPath = new Path("/destraid/user/dhruba/raidtest");
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
// delete original directory.
assertTrue("Unable to delete original directory " + file1 ,
fileSys.delete(file1.getParent(), true));
LOG.info("deleted file " + file1);
// wait till parity file and directory are automatically deleted
long start = Time.now();
while (fileSys.exists(destPath) &&
Time.now() - start < 120000) {
LOG.info("testPurgeDirectory waiting for parity files to be removed.");
Thread.sleep(1000); // keep waiting
}
assertFalse(fileSys.exists(destPath));
} catch (Exception e) {
LOG.info("testPurgeDirectory Exception " + e +
StringUtils.stringifyException(e));
throw e;
} finally {
if (cnode != null) { cnode.stop(); cnode.join(); }
LOG.info("testPurgeDirectory delete file " + file1);
fileSys.delete(file1, true);
stopClusters();
}
}
/**
* Test that an XOR parity file is removed when a RS parity file is detected.
*/
@Test
public void testPurgePreference() throws Exception {
createClusters(true);
Path dir = new Path("/user/test/raidtest/");
Path file1 = new Path(dir + "/file1");
PolicyInfo infoXor = new PolicyInfo("testPurgePreference", conf);
infoXor.setSrcPath("/user/test/raidtest");
infoXor.setErasureCode("xor");
infoXor.setDescription("test policy");
infoXor.setProperty("targetReplication", "2");
infoXor.setProperty("metaReplication", "2");
PolicyInfo infoRs = new PolicyInfo("testPurgePreference", conf);
infoRs.setSrcPath("/user/test/raidtest");
infoRs.setErasureCode("rs");
infoRs.setDescription("test policy");
infoRs.setProperty("targetReplication", "1");
infoRs.setProperty("metaReplication", "1");
try {
TestRaidNode.createOldFile(fileSys, file1, 1, 9, 8192L);
FileStatus stat = fileSys.getFileStatus(file1);
// Create the parity files.
RaidNode.doRaid(
conf, infoXor, stat, new RaidNode.Statistics(), Reporter.NULL);
RaidNode.doRaid(
conf, infoRs, stat, new RaidNode.Statistics(), Reporter.NULL);
Path xorParity =
new Path(RaidNode.DEFAULT_RAID_LOCATION, "user/test/raidtest/file1");
Path rsParity =
new Path(RaidNode.DEFAULT_RAIDRS_LOCATION, "user/test/raidtest/file1");
assertTrue(fileSys.exists(xorParity));
assertTrue(fileSys.exists(rsParity));
// Check purge of a single parity file.
RaidNode cnode = RaidNode.createRaidNode(conf);
FileStatus raidRsStat =
fileSys.getFileStatus(new Path(RaidNode.DEFAULT_RAIDRS_LOCATION));
cnode.purgeMonitor.recursePurge(infoRs.getErasureCode(), fileSys, fileSys,
RaidNode.DEFAULT_RAIDRS_LOCATION, raidRsStat);
// Calling purge under the RS path has no effect.
assertTrue(fileSys.exists(xorParity));
assertTrue(fileSys.exists(rsParity));
FileStatus raidStat =
fileSys.getFileStatus(new Path(RaidNode.DEFAULT_RAID_LOCATION));
cnode.purgeMonitor.recursePurge(infoXor.getErasureCode(), fileSys, fileSys,
RaidNode.DEFAULT_RAID_LOCATION, raidStat);
// XOR parity must have been purged by now.
assertFalse(fileSys.exists(xorParity));
assertTrue(fileSys.exists(rsParity));
// Now check the purge of a parity har.
// Delete the RS parity for now.
fileSys.delete(rsParity);
// Recreate the XOR parity.
Path xorHar =
new Path(RaidNode.DEFAULT_RAID_LOCATION, "user/test/raidtest/raidtest" +
RaidNode.HAR_SUFFIX);
RaidNode.doRaid(
conf, infoXor, stat, new RaidNode.Statistics(), Reporter.NULL);
assertTrue(fileSys.exists(xorParity));
assertFalse(fileSys.exists(xorHar));
// Create the har.
long cutoff = Time.now();
cnode.recurseHar(infoXor, fileSys, raidStat,
RaidNode.DEFAULT_RAID_LOCATION, fileSys, cutoff,
RaidNode.tmpHarPathForCode(conf, infoXor.getErasureCode()));
// Call purge to get rid of the parity file. The har should remain.
cnode.purgeMonitor.recursePurge(infoXor.getErasureCode(), fileSys, fileSys,
RaidNode.DEFAULT_RAID_LOCATION, raidStat);
// XOR har should exist but xor parity file should have been purged.
assertFalse(fileSys.exists(xorParity));
assertTrue(fileSys.exists(xorHar));
// Now create the RS parity.
RaidNode.doRaid(
conf, infoRs, stat, new RaidNode.Statistics(), Reporter.NULL);
cnode.purgeMonitor.recursePurge(infoXor.getErasureCode(), fileSys, fileSys,
RaidNode.DEFAULT_RAID_LOCATION, raidStat);
// XOR har should get deleted.
assertTrue(fileSys.exists(rsParity));
assertFalse(fileSys.exists(xorParity));
assertFalse(fileSys.exists(xorHar));
} finally {
stopClusters();
}
}
}

View File

@ -1,267 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Random;
import java.util.zip.CRC32;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.RaidDFSUtil;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Test;
public class TestRaidShell {
final static Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.TestRaidShell");
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"target/test-data")).getAbsolutePath();
final static String CONFIG_FILE = new File(TEST_DIR,
"test-raid.xml").getAbsolutePath();
final static long RELOAD_INTERVAL = 1000;
final static int NUM_DATANODES = 3;
Configuration conf;
String namenode = null;
MiniDFSCluster dfs = null;
String hftp = null;
FileSystem fileSys = null;
RaidNode cnode = null;
Random rand = new Random();
/**
* Create a file with three stripes, corrupt a block each in two stripes,
* and wait for the the file to be fixed.
*/
@Test
public void testBlockFix() throws Exception {
LOG.info("Test testBlockFix started.");
long blockSize = 8192L;
int stripeLength = 3;
mySetup(stripeLength, -1);
Path file1 = new Path("/user/dhruba/raidtest/file1");
Path destPath = new Path("/destraid/user/dhruba/raidtest");
Path parityFile = new Path(destPath, "file1");
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
1, 7, blockSize);
long file1Len = fileSys.getFileStatus(file1).getLen();
LOG.info("Test testBlockFix created test files");
// create an instance of the RaidNode
Configuration localConf = new Configuration(conf);
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
localConf.setInt("raid.blockfix.interval", 1000);
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
conf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.LocalBlockFixer");
cnode = RaidNode.createRaidNode(null, localConf);
try {
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
cnode.stop();
cnode.join();
cnode = null;
FileStatus srcStat = fileSys.getFileStatus(file1);
LocatedBlocks locations = RaidDFSUtil.getBlockLocations(
(DistributedFileSystem) fileSys, file1.toUri().getPath(),
0, srcStat.getLen());
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
// Corrupt blocks in different stripes. We can fix them.
int[] corruptBlockIdxs = new int[]{0, 4, 6};
for (int idx: corruptBlockIdxs) {
LOG.info("Corrupting block " + locations.get(idx).getBlock());
corruptBlock(locations.get(idx).getBlock());
}
TestBlockFixer.reportCorruptBlocks(fileSys, file1, corruptBlockIdxs,
srcStat.getBlockSize());
waitForCorruptBlocks(corruptBlockIdxs.length, dfs, file1);
// Create RaidShell and fix the file.
RaidShell shell = new RaidShell(conf);
String[] args = new String[2];
args[0] = "-recoverBlocks";
args[1] = file1.toUri().getPath();
ToolRunner.run(shell, args);
waitForCorruptBlocks(0, dfs, file1);
assertTrue(TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
// Now corrupt and fix the parity file.
FileStatus parityStat = fileSys.getFileStatus(parityFile);
long parityCrc = getCRC(fileSys, parityFile);
locations = RaidDFSUtil.getBlockLocations(
dfs, parityFile.toUri().getPath(), 0, parityStat.getLen());
corruptBlock(locations.get(0).getBlock());
TestBlockFixer.reportCorruptBlocks(fileSys, parityFile, new int[]{0},
srcStat.getBlockSize());
waitForCorruptBlocks(1, dfs, parityFile);
args[1] = parityFile.toUri().getPath();
ToolRunner.run(shell, args);
waitForCorruptBlocks(0, dfs, file1);
assertEquals(parityCrc, getCRC(fileSys, parityFile));
} catch (Exception e) {
LOG.info("Test testBlockFix Exception " + e + StringUtils.stringifyException(e));
throw e;
} finally {
myTearDown();
}
LOG.info("Test testBlockFix completed.");
}
private void waitForCorruptBlocks(
int numCorruptBlocks, DistributedFileSystem dfs, Path file)
throws Exception {
String path = file.toUri().getPath();
FileStatus stat = dfs.getFileStatus(file);
long start = Time.now();
long actual = 0;
do {
actual = RaidDFSUtil.corruptBlocksInFile(
dfs, path, 0, stat.getLen()).size();
if (actual == numCorruptBlocks) break;
if (Time.now() - start > 120000) break;
LOG.info("Waiting for " + numCorruptBlocks + " corrupt blocks in " +
path + ", found " + actual);
Thread.sleep(1000);
} while (true);
assertEquals(numCorruptBlocks, actual);
}
private void mySetup(int stripeLength, int timeBeforeHar) throws Exception {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
conf.set("raid.config.file", CONFIG_FILE);
conf.setBoolean("raid.config.reload", true);
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// make all deletions not go through Trash
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
// do not use map-reduce cluster for Raiding
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
conf.set("raid.server.address", "localhost:0");
conf.setInt("hdfs.raid.stripeLength", stripeLength);
conf.set("hdfs.raid.locations", "/destraid");
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
dfs.waitActive();
fileSys = dfs.getFileSystem();
namenode = fileSys.getUri().toString();
FileSystem.setDefaultUri(conf, namenode);
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
FileSystem.setDefaultUri(conf, namenode);
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
fileWriter.write("<?xml version=\"1.0\"?>\n");
String str = "<configuration> " +
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
"<policy name = \"RaidTest1\"> " +
"<erasureCode>xor</erasureCode> " +
"<destPath> /destraid</destPath> " +
"<property> " +
"<name>targetReplication</name> " +
"<value>1</value> " +
"<description>after RAIDing, decrease the replication factor of a file to this value." +
"</description> " +
"</property> " +
"<property> " +
"<name>metaReplication</name> " +
"<value>1</value> " +
"<description> replication factor of parity file" +
"</description> " +
"</property> " +
"<property> " +
"<name>modTimePeriod</name> " +
"<value>2000</value> " +
"<description> time (milliseconds) after a file is modified to make it " +
"a candidate for RAIDing " +
"</description> " +
"</property> ";
if (timeBeforeHar >= 0) {
str +=
"<property> " +
"<name>time_before_har</name> " +
"<value>" + timeBeforeHar + "</value> " +
"<description> amount of time waited before har'ing parity files" +
"</description> " +
"</property> ";
}
str +=
"</policy>" +
"</srcPath>" +
"</configuration>";
fileWriter.write(str);
fileWriter.close();
}
private void myTearDown() throws Exception {
if (cnode != null) { cnode.stop(); cnode.join(); }
if (dfs != null) { dfs.shutdown(); }
}
private long getCRC(FileSystem fs, Path p) throws IOException {
CRC32 crc = new CRC32();
FSDataInputStream stm = fs.open(p);
int b;
while ((b = stm.read())>=0) {
crc.update(b);
}
stm.close();
return crc.getValue();
}
void corruptBlock(ExtendedBlock block) throws IOException {
assertTrue("Could not corrupt block",
dfs.corruptBlockOnDataNodes(block) > 0);
}
}

View File

@ -1,724 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http:www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.RaidDFSUtil;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.ToolRunner;
import org.junit.After;
import org.junit.Test;
public class TestRaidShellFsck {
final static Log LOG =
LogFactory.getLog("org.apache.hadoop.raid.TestRaidShellFsck");
final static String TEST_DIR =
new File(System.
getProperty("test.build.data", "target/test-data")).getAbsolutePath();
final static String CONFIG_FILE = new File(TEST_DIR, "test-raid.xml").
getAbsolutePath();
final static long RELOAD_INTERVAL = 1000;
final static int NUM_DATANODES = 4;
final static int STRIPE_BLOCKS = 3; // number of blocks per stripe
final static int FILE_BLOCKS = 6; // number of blocks that file consists of
final static short REPL = 1; // replication factor before raiding
final static long BLOCK_SIZE = 8192L; // size of block in byte
final static String DIR_PATH = "/user/pkling/raidtest";
final static Path FILE_PATH0 =
new Path("/user/pkling/raidtest/raidfsck.test");
final static Path FILE_PATH1 =
new Path("/user/pkling/raidtest/raidfsck2.test");
final static Path RAID_PATH = new Path("/destraid/user/pkling/raidtest");
final static String HAR_NAME = "raidtest_raid.har";
final static String RAID_DIR = "/destraid";
Configuration conf = null;
Configuration raidConf = null;
Configuration clientConf = null;
MiniDFSCluster cluster = null;
DistributedFileSystem dfs = null;
RaidNode rnode = null;
RaidShell shell = null;
String[] args = null;
/**
* creates a MiniDFS instance with a raided file in it
*/
private void setUp(boolean doHar) throws IOException, ClassNotFoundException {
final int timeBeforeHar;
if (doHar) {
timeBeforeHar = 0;
} else {
timeBeforeHar = -1;
}
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
conf.set("raid.config.file", CONFIG_FILE);
conf.setBoolean("raid.config.reload", true);
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
// scan all policies once every 5 second
conf.setLong("raid.policy.rescan.interval", 5000);
// make all deletions not go through Trash
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
// do not use map-reduce cluster for Raiding
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
// use local block fixer
conf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.LocalBlockFixer");
conf.set("raid.server.address", "localhost:0");
conf.setInt("hdfs.raid.stripeLength", STRIPE_BLOCKS);
conf.set("hdfs.raid.locations", RAID_DIR);
conf.setInt("dfs.corruptfilesreturned.max", 500);
conf.setBoolean("dfs.permissions", false);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES)
.build();
cluster.waitActive();
dfs = (DistributedFileSystem) cluster.getFileSystem();
String namenode = dfs.getUri().toString();
FileSystem.setDefaultUri(conf, namenode);
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
fileWriter.write("<?xml version=\"1.0\"?>\n");
String str =
"<configuration> " +
" <srcPath prefix=\"" + DIR_PATH + "\"> " +
" <policy name = \"RaidTest1\"> " +
" <erasureCode>xor</erasureCode> " +
" <destPath> " + RAID_DIR + " </destPath> " +
" <property> " +
" <name>targetReplication</name> " +
" <value>1</value> " +
" <description>after RAIDing, decrease the replication " +
"factor of a file to this value.</description> " +
" </property> " +
" <property> " +
" <name>metaReplication</name> " +
" <value>1</value> " +
" <description> replication factor of parity file</description> " +
" </property> " +
" <property> " +
" <name>modTimePeriod</name> " +
" <value>2000</value> " +
" <description>time (milliseconds) after a file is modified " +
"to make it a candidate for RAIDing</description> " +
" </property> ";
if (timeBeforeHar >= 0) {
str +=
" <property> " +
" <name>time_before_har</name> " +
" <value>" + timeBeforeHar + "</value> " +
" <description> amount of time waited before har'ing parity " +
"files</description> " +
" </property> ";
}
str +=
" </policy>" +
" </srcPath>" +
"</configuration>";
fileWriter.write(str);
fileWriter.close();
createTestFile(FILE_PATH0);
createTestFile(FILE_PATH1);
Path[] filePaths = { FILE_PATH0, FILE_PATH1 };
raidTestFiles(RAID_PATH, filePaths, doHar);
clientConf = new Configuration(raidConf);
clientConf.set("fs.hdfs.impl",
"org.apache.hadoop.hdfs.DistributedRaidFileSystem");
clientConf.set("fs.raid.underlyingfs.impl",
"org.apache.hadoop.hdfs.DistributedFileSystem");
// prepare shell and arguments
shell = new RaidShell(clientConf);
args = new String[2];
args[0] = "-fsck";
args[1] = DIR_PATH;
}
/**
* Creates test file consisting of random data
*/
private void createTestFile(Path filePath) throws IOException {
Random rand = new Random();
FSDataOutputStream stm = dfs.create(filePath, true,
conf.getInt("io.file.buffer.size",
4096), REPL, BLOCK_SIZE);
final byte[] b = new byte[(int) BLOCK_SIZE];
for (int i = 0; i < FILE_BLOCKS; i++) {
rand.nextBytes(b);
stm.write(b);
}
stm.close();
LOG.info("test file created");
}
/**
* raids test file
*/
private void raidTestFiles(Path raidPath, Path[] filePaths, boolean doHar)
throws IOException, ClassNotFoundException {
// create RaidNode
raidConf = new Configuration(conf);
raidConf.set(RaidNode.RAID_LOCATION_KEY, RAID_DIR);
raidConf.setInt("raid.blockfix.interval", 1000);
raidConf.setLong("har.block.size", BLOCK_SIZE * 3);
// the RaidNode does the raiding inline (instead of submitting to MR node)
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
rnode = RaidNode.createRaidNode(null, raidConf);
for (Path filePath: filePaths) {
long waitStart = Time.now();
boolean raided = false;
Path parityFilePath = new Path(RAID_DIR,
filePath.toString().substring(1));
while (!raided) {
try {
FileStatus[] listPaths = dfs.listStatus(raidPath);
if (listPaths != null) {
if (doHar) {
// case with HAR
for (FileStatus f: listPaths) {
if (f.getPath().toString().endsWith(".har")) {
// check if the parity file is in the index
final Path indexPath = new Path(f.getPath(), "_index");
final FileStatus indexFileStatus =
dfs.getFileStatus(indexPath);
final HarIndex harIndex =
new HarIndex(dfs.open(indexPath), indexFileStatus.getLen());
final HarIndex.IndexEntry indexEntry =
harIndex.findEntryByFileName(parityFilePath.toString());
if (indexEntry != null) {
LOG.info("raid file " + parityFilePath.toString() +
" found in Har archive: " +
f.getPath().toString() +
" ts=" + indexEntry.mtime);
raided = true;
break;
}
}
}
} else {
// case without HAR
for (FileStatus f : listPaths) {
Path found = new Path(f.getPath().toUri().getPath());
if (parityFilePath.equals(found)) {
LOG.info("raid file found: " + f.getPath().toString());
raided = true;
break;
}
}
}
}
} catch (FileNotFoundException ignore) {
}
if (!raided) {
if (Time.now() > waitStart + 40000L) {
LOG.error("parity file not created after 40s");
throw new IOException("parity file not HARed after 40s");
} else {
try {
Thread.sleep(1000);
} catch (InterruptedException ignore) {
}
}
}
}
}
rnode.stop();
rnode.join();
rnode = null;
LOG.info("test file raided");
}
/**
* sleeps for up to 20s until the number of corrupt files
* in the file system is equal to the number specified
*/
private void waitUntilCorruptFileCount(DistributedFileSystem dfs,
int corruptFiles)
throws IOException {
long waitStart = Time.now();
while (RaidDFSUtil.getCorruptFiles(dfs).length != corruptFiles) {
try {
Thread.sleep(1000);
} catch (InterruptedException ignore) {
}
if (Time.now() > waitStart + 20000L) {
break;
}
}
int corruptFilesFound = RaidDFSUtil.getCorruptFiles(dfs).length;
if (corruptFilesFound != corruptFiles) {
throw new IOException("expected " + corruptFiles +
" corrupt files but got " +
corruptFilesFound);
}
}
/**
* removes a specified block from MiniDFS storage and reports it as corrupt
*/
private void removeAndReportBlock(DistributedFileSystem blockDfs,
Path filePath,
LocatedBlock block)
throws IOException {
TestRaidDfs.corruptBlock(cluster, filePath, block.getBlock(), NUM_DATANODES, true);
// report deleted block to the name node
LocatedBlock[] toReport = { block };
blockDfs.getClient().getNamenode().reportBadBlocks(toReport);
}
/**
* removes a file block in the specified stripe
*/
private void removeFileBlock(Path filePath, int stripe, int blockInStripe)
throws IOException {
LocatedBlocks fileBlocks = dfs.getClient().getNamenode().
getBlockLocations(filePath.toString(), 0, FILE_BLOCKS * BLOCK_SIZE);
if (fileBlocks.locatedBlockCount() != FILE_BLOCKS) {
throw new IOException("expected " + FILE_BLOCKS +
" file blocks but found " +
fileBlocks.locatedBlockCount());
}
if (blockInStripe >= STRIPE_BLOCKS) {
throw new IOException("blockInStripe is " + blockInStripe +
" but must be smaller than " + STRIPE_BLOCKS);
}
LocatedBlock block = fileBlocks.get(stripe * STRIPE_BLOCKS + blockInStripe);
removeAndReportBlock(dfs, filePath, block);
LOG.info("removed file " + filePath.toString() + " block " +
stripe * STRIPE_BLOCKS + " in stripe " + stripe);
}
/**
* removes a parity block in the specified stripe
*/
private void removeParityBlock(Path filePath, int stripe) throws IOException {
// find parity file
Path destPath = new Path(RAID_DIR);
RaidNode.ParityFilePair ppair = null;
ppair = RaidNode.getParityFile(destPath, filePath, conf);
String parityPathStr = ppair.getPath().toUri().getPath();
LOG.info("parity path: " + parityPathStr);
FileSystem parityFS = ppair.getFileSystem();
if (!(parityFS instanceof DistributedFileSystem)) {
throw new IOException("parity file is not on distributed file system");
}
DistributedFileSystem parityDFS = (DistributedFileSystem) parityFS;
// now corrupt the block corresponding to the stripe selected
FileStatus parityFileStatus =
parityDFS.getFileStatus(new Path(parityPathStr));
long parityBlockSize = parityFileStatus.getBlockSize();
long parityFileLength = parityFileStatus.getLen();
long parityFileLengthInBlocks = (parityFileLength / parityBlockSize) +
(((parityFileLength % parityBlockSize) == 0) ? 0L : 1L);
if (parityFileLengthInBlocks <= stripe) {
throw new IOException("selected stripe " + stripe +
" but parity file only has " +
parityFileLengthInBlocks + " blocks");
}
if (parityBlockSize != BLOCK_SIZE) {
throw new IOException("file block size is " + BLOCK_SIZE +
" but parity file block size is " +
parityBlockSize);
}
LocatedBlocks parityFileBlocks = parityDFS.getClient().getNamenode().
getBlockLocations(parityPathStr, 0, parityFileLength);
if (parityFileBlocks.locatedBlockCount() != parityFileLengthInBlocks) {
throw new IOException("expected " + parityFileLengthInBlocks +
" parity file blocks but got " +
parityFileBlocks.locatedBlockCount() +
" blocks");
}
LocatedBlock parityFileBlock = parityFileBlocks.get(stripe);
removeAndReportBlock(parityDFS, new Path(parityPathStr), parityFileBlock);
LOG.info("removed parity file block/stripe " + stripe +
" for " + filePath.toString());
}
/**
* removes a block from the har part file
*/
private void removeHarParityBlock(int block) throws IOException {
Path harPath = new Path(RAID_PATH, HAR_NAME);
FileStatus [] listPaths = dfs.listStatus(harPath);
boolean deleted = false;
for (FileStatus f: listPaths) {
if (f.getPath().getName().startsWith("part-")) {
final Path partPath = new Path(f.getPath().toUri().getPath());
final LocatedBlocks partBlocks = dfs.getClient().getNamenode().
getBlockLocations(partPath.toString(),
0,
f.getLen());
if (partBlocks.locatedBlockCount() <= block) {
throw new IOException("invalid har block " + block);
}
final LocatedBlock partBlock = partBlocks.get(block);
removeAndReportBlock(dfs, partPath, partBlock);
LOG.info("removed block " + block + "/" +
partBlocks.locatedBlockCount() +
" of file " + partPath.toString() +
" block size " + partBlock.getBlockSize());
deleted = true;
break;
}
}
if (!deleted) {
throw new IOException("cannot find part file in " + harPath.toString());
}
}
/**
* checks fsck with no missing blocks
*/
@Test
public void testClean() throws Exception {
LOG.info("testClean");
setUp(false);
int result = ToolRunner.run(shell, args);
assertTrue("fsck should return 0, but returns " +
Integer.toString(result), result == 0);
}
/**
* checks fsck with missing block in file block but not in parity block
*/
@Test
public void testFileBlockMissing() throws Exception {
LOG.info("testFileBlockMissing");
setUp(false);
waitUntilCorruptFileCount(dfs, 0);
removeFileBlock(FILE_PATH0, 0, 0);
waitUntilCorruptFileCount(dfs, 1);
int result = ToolRunner.run(shell, args);
assertTrue("fsck should return 0, but returns " +
Integer.toString(result), result == 0);
}
/**
* checks fsck with missing block in parity block but not in file block
*/
@Test
public void testParityBlockMissing() throws Exception {
LOG.info("testParityBlockMissing");
setUp(false);
waitUntilCorruptFileCount(dfs, 0);
removeParityBlock(FILE_PATH0, 0);
waitUntilCorruptFileCount(dfs, 1);
int result = ToolRunner.run(shell, args);
assertTrue("fsck should return 0, but returns " +
Integer.toString(result), result == 0);
}
/**
* checks fsck with missing block in both file block and parity block
* in different stripes
*/
@Test
public void testFileBlockAndParityBlockMissingInDifferentStripes()
throws Exception {
LOG.info("testFileBlockAndParityBlockMissingInDifferentStripes");
setUp(false);
waitUntilCorruptFileCount(dfs, 0);
removeFileBlock(FILE_PATH0, 0, 0);
waitUntilCorruptFileCount(dfs, 1);
removeParityBlock(FILE_PATH0, 1);
waitUntilCorruptFileCount(dfs, 2);
int result = ToolRunner.run(shell, args);
assertTrue("fsck should return 0, but returns " +
Integer.toString(result), result == 0);
}
/**
* checks fsck with missing block in both file block and parity block
* in same stripe
*/
@Test
public void testFileBlockAndParityBlockMissingInSameStripe()
throws Exception {
LOG.info("testFileBlockAndParityBlockMissingInSameStripe");
setUp(false);
waitUntilCorruptFileCount(dfs, 0);
removeParityBlock(FILE_PATH0, 1);
waitUntilCorruptFileCount(dfs, 1);
removeFileBlock(FILE_PATH0, 1, 0);
waitUntilCorruptFileCount(dfs, 2);
int result = ToolRunner.run(shell, args);
assertTrue("fsck should return 1, but returns " +
Integer.toString(result), result == 1);
}
/**
* checks fsck with two missing file blocks in same stripe
*/
@Test
public void test2FileBlocksMissingInSameStripe()
throws Exception {
LOG.info("test2FileBlocksMissingInSameStripe");
setUp(false);
waitUntilCorruptFileCount(dfs, 0);
removeFileBlock(FILE_PATH0, 1, 1);
waitUntilCorruptFileCount(dfs, 1);
removeFileBlock(FILE_PATH0, 1, 0);
waitUntilCorruptFileCount(dfs, 1);
int result = ToolRunner.run(shell, args);
assertTrue("fsck should return 1, but returns " +
Integer.toString(result), result == 1);
}
/**
* checks fsck with two missing file blocks in different stripes
*/
@Test
public void test2FileBlocksMissingInDifferentStripes()
throws Exception {
LOG.info("test2FileBlocksMissingInDifferentStripes");
setUp(false);
waitUntilCorruptFileCount(dfs, 0);
removeFileBlock(FILE_PATH0, 1, 1);
waitUntilCorruptFileCount(dfs, 1);
removeFileBlock(FILE_PATH0, 0, 0);
waitUntilCorruptFileCount(dfs, 1);
int result = ToolRunner.run(shell, args);
assertTrue("fsck should return 0, but returns " +
Integer.toString(result), result == 0);
}
/**
* checks fsck with file block missing (HAR)
* use 2 files to verify HAR offset logic in RaidShell fsck
* both files have one corrupt block, parity blocks are clean
*
* parity blocks in har (file.stripe):
* +-----+-----+-----+ +-----+
* | 0.0 | 0.1 | 1.0 | | 1.1 |
* +-----+-----+-----+ +-----+
* 0 1
*
*/
@Test
public void testFileBlockMissingHar()
throws Exception {
LOG.info("testFileBlockMissingHar");
setUp(true);
waitUntilCorruptFileCount(dfs, 0);
removeFileBlock(FILE_PATH0, 1, 1);
removeFileBlock(FILE_PATH1, 1, 1);
waitUntilCorruptFileCount(dfs, 2);
int result = ToolRunner.run(shell, args);
assertTrue("fsck should return 0, but returns " +
Integer.toString(result), result == 0);
}
/**
* checks fsck with file block missing (HAR)
* use 2 files to verify HAR offset logic in RaidShell fsck
*
* parity blocks in har (file.stripe):
* +-----+-----+-----+ +-----+
* | 0.0 | 0.1 | 1.0 | | 1.1 |
* +-----+-----+-----+ +-----+
* 0 1
*
* corrupt file 0, stripe 0 file block 0
* corrupt file 0, stripe 1 file block 0
* corrupt file 1, stripe 0 file block 0
* corrupt file 1, stripe 1 file block 0
* corrupt har block 0
* both files should be corrupt
*/
@Test
public void testFileBlockAndParityBlockMissingHar1()
throws Exception {
LOG.info("testFileBlockAndParityBlockMissingHar1");
setUp(true);
waitUntilCorruptFileCount(dfs, 0);
removeFileBlock(FILE_PATH0, 0, 0);
removeFileBlock(FILE_PATH0, 1, 0);
removeFileBlock(FILE_PATH1, 0, 0);
removeFileBlock(FILE_PATH1, 1, 0);
removeHarParityBlock(0);
waitUntilCorruptFileCount(dfs, 3);
int result = ToolRunner.run(shell, args);
assertTrue("fsck should return 2, but returns " +
Integer.toString(result), result == 2);
}
/**
* checks fsck with file block missing (HAR)
* use 2 files to verify HAR offset logic in RaidShell fsck
*
* parity blocks in har (file.stripe):
* +-----+-----+-----+ +-----+
* | 0.0 | 0.1 | 1.0 | | 1.1 |
* +-----+-----+-----+ +-----+
* 0 1
*
* corrupt file 0, stripe 0 file block 0
* corrupt file 0, stripe 1 file block 0
* corrupt file 1, stripe 0 file block 0
* corrupt file 1, stripe 1 file block 0
* corrupt har block 1
* only file 2 should be corrupt
*/
@Test
public void testFileBlockAndParityBlockMissingHar2()
throws Exception {
LOG.info("testFileBlockAndParityBlockMissingHar2");
setUp(true);
waitUntilCorruptFileCount(dfs, 0);
removeFileBlock(FILE_PATH0, 0, 0);
removeFileBlock(FILE_PATH0, 1, 0);
removeFileBlock(FILE_PATH1, 0, 0);
removeFileBlock(FILE_PATH1, 1, 0);
removeHarParityBlock(1);
waitUntilCorruptFileCount(dfs, 3);
int result = ToolRunner.run(shell, args);
assertTrue("fsck should return 1, but returns " +
Integer.toString(result), result == 1);
}
/**
* checks that fsck does not report corrupt file that is not in
* the specified path
*/
@Test
public void testPathFilter()
throws Exception {
LOG.info("testPathFilter");
setUp(false);
waitUntilCorruptFileCount(dfs, 0);
removeParityBlock(FILE_PATH0, 1);
waitUntilCorruptFileCount(dfs, 1);
removeFileBlock(FILE_PATH0, 1, 0);
waitUntilCorruptFileCount(dfs, 2);
String[] otherArgs = new String[2];
otherArgs[0] = "-fsck";
otherArgs[1] = "/user/pkling/other";
int result = ToolRunner.run(shell, otherArgs);
assertTrue("fsck should return 0, but returns " +
Integer.toString(result), result == 0);
}
@After
public void tearDown() throws Exception {
if (rnode != null) {
rnode.stop();
rnode.join();
rnode = null;
}
if (cluster != null) {
cluster.shutdown();
cluster = null;
}
dfs = null;
LOG.info("Test cluster shut down");
}
}

View File

@ -1,135 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.RaidDFSUtil;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.mapred.Reporter;
import org.junit.Test;
public class TestReedSolomonDecoder {
final static Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.TestReedSolomonDecoder");
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"target/test-data")).getAbsolutePath();
final static int NUM_DATANODES = 3;
Configuration conf;
MiniDFSCluster dfs = null;
FileSystem fileSys = null;
@Test
public void testDecoder() throws Exception {
mySetup();
int stripeSize = 10;
int paritySize = 4;
long blockSize = 8192;
Path file1 = new Path("/user/raidtest/file1");
Path recoveredFile1 = new Path("/user/raidtest/file1.recovered");
Path parityFile1 = new Path("/rsraid/user/raidtest/file1");
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
1, 25, blockSize);
FileStatus file1Stat = fileSys.getFileStatus(file1);
conf.setInt("raid.rsdecoder.bufsize", 512);
conf.setInt("raid.rsencoder.bufsize", 512);
try {
// First encode the file.
ReedSolomonEncoder encoder = new ReedSolomonEncoder(
conf, stripeSize, paritySize);
short parityRepl = 1;
encoder.encodeFile(fileSys, file1, fileSys, parityFile1, parityRepl,
Reporter.NULL);
// Ensure there are no corrupt files yet.
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals(corruptFiles.length, 0);
// Now corrupt the file.
long corruptOffset = blockSize * 5;
FileStatus srcStat = fileSys.getFileStatus(file1);
LocatedBlocks locations = RaidDFSUtil.getBlockLocations(dfs,
file1.toUri().getPath(), 0, srcStat.getLen());
corruptBlock(locations.get(5).getBlock());
corruptBlock(locations.get(6).getBlock());
TestBlockFixer.reportCorruptBlocks(dfs, file1, new int[]{5, 6},
srcStat.getBlockSize());
// Ensure file is corrupted.
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
assertEquals(corruptFiles.length, 1);
assertEquals(corruptFiles[0], file1.toString());
// Fix the file.
ReedSolomonDecoder decoder = new ReedSolomonDecoder(
conf, stripeSize, paritySize);
decoder.decodeFile(fileSys, file1, fileSys, parityFile1,
corruptOffset, recoveredFile1);
assertTrue(TestRaidDfs.validateFile(
fileSys, recoveredFile1, file1Stat.getLen(), crc1));
} finally {
myTearDown();
}
}
void corruptBlock(ExtendedBlock block) throws IOException {
assertTrue("Could not corrupt block",
dfs.corruptBlockOnDataNodes(block) > 0);
}
private void mySetup() throws Exception {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
// make all deletions not go through Trash
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
conf.setBoolean("dfs.permissions", false);
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
dfs.waitActive();
fileSys = dfs.getFileSystem();
String namenode = fileSys.getUri().toString();
FileSystem.setDefaultUri(conf, namenode);
}
private void myTearDown() throws Exception {
if (dfs != null) { dfs.shutdown(); }
}
}

View File

@ -1,94 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import static org.junit.Assert.assertEquals;
import java.io.File;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.mapred.Reporter;
import org.junit.Test;
public class TestReedSolomonEncoder {
final static Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.TestReedSolomonEncoder");
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"target/test-data")).getAbsolutePath();
final static int NUM_DATANODES = 3;
Configuration conf;
String namenode = null;
MiniDFSCluster dfs = null;
FileSystem fileSys = null;
@Test
public void testEncoder() throws Exception {
mySetup();
int stripeSize = 10;
int paritySize = 4;
long blockSize = 8192;
Path file1 = new Path("/user/raidtest/file1");
Path parityFile1 = new Path("/rsraid/user/raidtest/file1");
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
1, 25, blockSize);
try {
ReedSolomonEncoder encoder = new ReedSolomonEncoder(
conf, stripeSize, paritySize);
short parityRepl = 1;
encoder.encodeFile(fileSys, file1, fileSys, parityFile1, parityRepl,
Reporter.NULL);
FileStatus parityStat = fileSys.getFileStatus(parityFile1);
assertEquals(4*8192*3, parityStat.getLen());
} finally {
myTearDown();
}
}
private void mySetup() throws Exception {
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
// make all deletions not go through Trash
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
dfs = new MiniDFSCluster(conf, NUM_DATANODES, true, null);
dfs.waitActive();
fileSys = dfs.getFileSystem();
namenode = fileSys.getUri().toString();
FileSystem.setDefaultUri(conf, namenode);
}
private void myTearDown() throws Exception {
if (dfs != null) { dfs.shutdown(); }
}
}

View File

@ -34,7 +34,6 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
<module>hadoop-hdfs</module>
<module>hadoop-hdfs-httpfs</module>
<module>hadoop-hdfs/src/contrib/bkjournal</module>
<module>hadoop-hdfs-raid</module>
</modules>
<build>

View File

@ -1711,10 +1711,6 @@
output="${build.dir.eclipse-contrib-classes}/gridmix/main" />
<source path="${contrib.dir}/gridmix/src/test"
output="${build.dir.eclipse-contrib-classes}/gridmix/test" />
<source path="${contrib.dir}/raid/src/java"
output="${build.dir.eclipse-contrib-classes}/raid/main" />
<source path="${contrib.dir}/raid/src/test"
output="${build.dir.eclipse-contrib-classes}/raid/test" />
<source path="${contrib.dir}/vaidya/src/java"
output="${build.dir.eclipse-contrib-classes}/vaidya/main" />
<source path="${contrib.dir}/vertica/src/java"

View File

@ -62,7 +62,6 @@
<fileset dir="." includes="streaming/build.xml"/>
<fileset dir="." includes="gridmix/build.xml"/>
<fileset dir="." includes="vertica/build.xml"/>
<fileset dir="." includes="raid/build.xml"/>
</subant>
<available file="${build.contrib.dir}/testsfailed" property="testsfailed"/>
<fail if="testsfailed">Tests failed!</fail>

View File

@ -1,201 +0,0 @@
# Copyright 2008 The Apache Software Foundation Licensed under the
# Apache License, Version 2.0 (the "License"); you may not use this
# file except in compliance with the License. You may obtain a copy
# of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless
# required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
This package implements a Distributed Raid File System. It is used alongwith
an instance of the Hadoop Distributed File System (HDFS). It can be used to
provide better protection against data corruption. It can also be used to
reduce the total storage requirements of HDFS.
Distributed Raid File System consists of two main software components. The first component
is the RaidNode, a daemon that creates parity files from specified HDFS files.
The second component "raidfs" is a software that is layered over a HDFS client and it
intercepts all calls that an application makes to the HDFS client. If HDFS encounters
corrupted data while reading a file, the raidfs client detects it; it uses the
relevant parity blocks to recover the corrupted data (if possible) and returns
the data to the application. The application is completely transparent to the
fact that parity data was used to satisfy it's read request.
The primary use of this feature is to save disk space for HDFS files.
HDFS typically stores data in triplicate.
The Distributed Raid File System can be configured in such a way that a set of
data blocks of a file are combined together to form one or more parity blocks.
This allows one to reduce the replication factor of a HDFS file from 3 to 2
while keeping the failure probabilty relatively same as before. This typically
results in saving 25% to 30% of storage space in a HDFS cluster.
--------------------------------------------------------------------------------
BUILDING:
In HADOOP_PREFIX, run ant package to build Hadoop and its contrib packages.
--------------------------------------------------------------------------------
INSTALLING and CONFIGURING:
The entire code is packaged in the form of a single jar file hadoop-*-raid.jar.
To use HDFS Raid, you need to put the above mentioned jar file on
the CLASSPATH. The easiest way is to copy the hadoop-*-raid.jar
from HADOOP_PREFIX/build/contrib/raid to HADOOP_PREFIX/lib. Alternatively
you can modify HADOOP_CLASSPATH to include this jar, in conf/hadoop-env.sh.
There is a single configuration file named raid.xml that describes the HDFS
path(s) that you want to raid. A sample of this file can be found in
sc/contrib/raid/conf/raid.xml. Please edit the entries in this file to list the
path(s) that you want to raid. Then, edit the hdfs-site.xml file for
your installation to include a reference to this raid.xml. You can add the
following to your hdfs-site.xml
<property>
<name>raid.config.file</name>
<value>/mnt/hdfs/DFS/conf/raid.xml</value>
<description>This is needed by the RaidNode </description>
</property>
Please add an entry to your hdfs-site.xml to enable hdfs clients to use the
parity bits to recover corrupted data.
<property>
<name>fs.hdfs.impl</name>
<value>org.apache.hadoop.dfs.DistributedRaidFileSystem</value>
<description>The FileSystem for hdfs: uris.</description>
</property>
--------------------------------------------------------------------------------
OPTIONAL CONFIGIURATION:
The following properties can be set in hdfs-site.xml to further tune you configuration:
Specifies the location where parity files are located.
<property>
<name>hdfs.raid.locations</name>
<value>hdfs://newdfs.data:8000/raid</value>
<description>The location for parity files. If this is
is not defined, then defaults to /raid.
</descrition>
</property>
Specify the parity stripe length
<property>
<name>hdfs.raid.stripeLength</name>
<value>10</value>
<description>The number of blocks in a file to be combined into
a single raid parity block. The default value is 5. The lower
the number the greater is the disk space you will save when you
enable raid.
</description>
</property>
Specify the size of HAR part-files
<property>
<name>raid.har.partfile.size</name>
<value>4294967296</value>
<description>The size of HAR part files that store raid parity
files. The default is 4GB. The higher the number the fewer the
number of files used to store the HAR archive.
</description>
</property>
Specify which implementation of RaidNode to use.
<property>
<name>raid.classname</name>
<value>org.apache.hadoop.raid.DistRaidNode</value>
<description>Specify which implementation of RaidNode to use
(class name).
</description>
</property>
Specify the periodicy at which the RaidNode re-calculates (if necessary)
the parity blocks
<property>
<name>raid.policy.rescan.interval</name>
<value>5000</value>
<description>Specify the periodicity in milliseconds after which
all source paths are rescanned and parity blocks recomputed if
necessary. By default, this value is 1 hour.
</description>
</property>
By default, the DistributedRaidFileSystem assumes that the underlying file
system is the DistributedFileSystem. If you want to layer the DistributedRaidFileSystem
over some other file system, then define a property named fs.raid.underlyingfs.impl
that specifies the name of the underlying class. For example, if you want to layer
The DistributedRaidFileSystem over an instance of the NewFileSystem, then
<property>
<name>fs.raid.underlyingfs.impl</name>
<value>org.apche.hadoop.new.NewFileSystem</value>
<description>Specify the filesystem that is layered immediately below the
DistributedRaidFileSystem. By default, this value is DistributedFileSystem.
</description>
--------------------------------------------------------------------------------
ADMINISTRATION:
The Distributed Raid File System provides support for administration at runtime without
any downtime to cluster services. It is possible to add/delete new paths to be raided without
interrupting any load on the cluster. If you change raid.xml, its contents will be
reload within seconds and the new contents will take effect immediately.
Designate one machine in your cluster to run the RaidNode software. You can run this daemon
on any machine irrespective of whether that machine is running any other hadoop daemon or not.
You can start the RaidNode by running the following on the selected machine:
nohup $HADOOP_PREFIX/bin/hadoop org.apache.hadoop.raid.RaidNode >> /xxx/logs/hadoop-root-raidnode-hadoop.xxx.com.log &
Optionally, we provide two scripts to start and stop the RaidNode. Copy the scripts
start-raidnode.sh and stop-raidnode.sh to the directory $HADOOP_PREFIX/bin in the machine
you would like to deploy the daemon. You can start or stop the RaidNode by directly
callying the scripts from that machine. If you want to deploy the RaidNode remotely,
copy start-raidnode-remote.sh and stop-raidnode-remote.sh to $HADOOP_PREFIX/bin at
the machine from which you want to trigger the remote deployment and create a text
file $HADOOP_PREFIX/conf/raidnode at the same machine containing the name of the server
where the RaidNode should run. These scripts run ssh to the specified machine and
invoke start/stop-raidnode.sh there. As an example, you might want to change
start-mapred.sh in the JobTracker machine so that it automatically calls
start-raidnode-remote.sh (and do the equivalent thing for stop-mapred.sh and
stop-raidnode-remote.sh).
To validate the integrity of a file system, run RaidFSCK as follows:
$HADOOP_PREFIX/bin/hadoop org.apache.hadoop.raid.RaidShell -fsck [path]
This will print a list of corrupt files (i.e., files which have lost too many
blocks and can no longer be fixed by Raid).
--------------------------------------------------------------------------------
IMPLEMENTATION:
The RaidNode periodically scans all the specified paths in the configuration
file. For each path, it recursively scans all files that have more than 2 blocks
and that has not been modified during the last few hours (default is 24 hours).
It picks the specified number of blocks (as specified by the stripe size),
from the file, generates a parity block by combining them and
stores the results as another HDFS file in the specified destination
directory. There is a one-to-one mapping between a HDFS
file and its parity file. The RaidNode also periodically finds parity files
that are orphaned and deletes them.
The Distributed Raid FileSystem is layered over a DistributedFileSystem
instance intercepts all calls that go into HDFS. HDFS throws a ChecksumException
or a BlocMissingException when a file read encounters bad data. The layered
Distributed Raid FileSystem catches these exceptions, locates the corresponding
parity file, extract the original data from the parity files and feeds the
extracted data back to the application in a completely transparent way.
The layered Distributed Raid FileSystem does not fix the data-loss that it
encounters while serving data. It merely make the application transparently
use the parity blocks to re-create the original data. A command line tool
"fsckraid" is currently under development that will fix the corrupted files
by extracting the data from the associated parity files. An adminstrator
can run "fsckraid" manually as and when needed.

View File

@ -1,64 +0,0 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Before you can run these subtargets directly, you need
to call at top-level: ant deploy-contrib compile-core-test
-->
<project name="raid" default="jar">
<import file="../build-contrib.xml"/>
<!-- the unit test classpath -->
<path id="contrib.classpath.raid">
<pathelement location="${hadoop.root}/src/contrib/raid/lib"/>
<path refid="contrib-classpath"/>
</path>
<target name="test" depends="compile,compile-test,test-junit" description="Automated Test Framework" if="test.available"/>
<target name="test-junit" depends="compile,compile-test" if="test.available">
<junit maxmemory="512m" showoutput="${test.output}" fork="yes" printsummary="yes" errorProperty="tests.failed"
haltonfailure="no" failureProperty="tests.failed" timeout="${test.timeout}">
<classpath refid="test.classpath"/>
<sysproperty key="test.build.data" value="${build.test}/data"/>
<sysproperty key="build.test" value="${build.test}"/>
<sysproperty key="user.dir" value="${build.test}/data"/>
<sysproperty key="fs.default.name" value="${fs.default.name}"/>
<sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
<sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/>
<sysproperty key="test.src.dir" value="${test.src.dir}"/>
<formatter type="${test.junit.output.format}" />
<batchtest todir="${build.test}" unless="testcase">
<fileset dir="${src.test}">
<include name="**/Test*.java"/>
</fileset>
</batchtest>
<batchtest todir="${build.test}" if="testcase">
<fileset dir="${src.test}">
<include name="**/${testcase}.java"/>
</fileset>
</batchtest>
</junit>
<fail if="tests.failed">Tests failed!</fail>
</target>
</project>

View File

@ -1,145 +0,0 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<ivy-module version="1.0" xmlns:m="http://ant.apache.org/ivy/maven">
<info organisation="org.apache.hadoop" module="${ant.project.name}">
<license name="Apache 2.0"/>
<description>Rumen</description>
</info>
<configurations defaultconfmapping="default">
<!--these match the Maven configurations-->
<conf name="default" extends="master,runtime"/>
<conf name="master" description="contains the artifact but no dependencies"/>
<conf name="runtime" description="runtime but not the artifact" />
<conf name="common" visibility="private" extends="runtime"
description="artifacts needed to compile/test the application"/>
<conf name="test" visibility="private" extends="runtime"/>
</configurations>
<publications>
<!--get the artifact from our module name-->
<artifact conf="master"/>
</publications>
<dependencies>
<dependency org="org.apache.hadoop" name="hadoop-annotations" rev="${hadoop-common.version}" conf="common->default"/>
<dependency org="org.apache.hadoop"
name="hadoop-common"
rev="${hadoop-common.version}"
conf="common->default"/>
<dependency org="org.apache.hadoop"
name="hadoop-common"
rev="${hadoop-common.version}"
conf="test->default">
<artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests"/>
</dependency>
<dependency org="org.apache.hadoop"
name="hadoop-hdfs"
rev="${hadoop-hdfs.version}"
conf="common->default"/>
<dependency org="org.apache.hadoop"
name="hadoop-hdfs"
rev="${hadoop-hdfs.version}"
conf="test->default">
<artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests"/>
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
rev="${yarn.version}" conf="common->default"/>
<dependency org="org.apache.hadoop" name="hadoop-yarn-common"
rev="${yarn.version}" conf="common->default"/>
<dependency org="org.apache.hadoop" name="hadoop-archives"
rev="${hadoop-common.version}" conf="common->default"/>
<dependency org="commons-logging"
name="commons-logging"
rev="${commons-logging.version}"
conf="common->default"/>
<dependency org="log4j"
name="log4j"
rev="${log4j.version}"
conf="common->master"/>
<dependency org="junit"
name="junit"
rev="${junit.version}"
conf="common->default"/>
<!-- necessary for Mini*Clusters -->
<dependency org="commons-httpclient"
name="commons-httpclient"
rev="${commons-httpclient.version}"
conf="common->master"/>
<dependency org="commons-codec"
name="commons-codec"
rev="${commons-codec.version}"
conf="common->default"/>
<dependency org="commons-net"
name="commons-net"
rev="${commons-net.version}"
conf="common->default"/>
<dependency org="org.mortbay.jetty"
name="jetty"
rev="${jetty.version}"
conf="common->master"/>
<dependency org="org.mortbay.jetty"
name="jetty-util"
rev="${jetty-util.version}"
conf="common->master"/>
<dependency org="org.mortbay.jetty"
name="jsp-api-2.1"
rev="${jetty.version}"
conf="common->master"/>
<dependency org="org.mortbay.jetty"
name="jsp-2.1"
rev="${jetty.version}"
conf="common->master"/>
<dependency org="org.mortbay.jetty"
name="servlet-api-2.5"
rev="${servlet-api-2.5.version}"
conf="common->master"/>
<dependency org="commons-cli"
name="commons-cli"
rev="${commons-cli.version}"
conf="common->default"/>
<dependency org="org.apache.avro"
name="avro"
rev="${avro.version}"
conf="common->default">
<exclude module="ant"/>
<exclude module="jetty"/>
<exclude module="slf4j-simple"/>
</dependency>
<dependency org="org.codehaus.jackson"
name="jackson-mapper-asl"
rev="${jackson.version}"
conf="common->default"/>
<dependency org="org.codehaus.jackson"
name="jackson-core-asl"
rev="${jackson.version}"
conf="common->default"/>
<dependency org="com.thoughtworks.paranamer"
name="paranamer"
rev="${paranamer.version}"
conf="common->default"/>
<!-- Exclusions for transitive dependencies pulled in by log4j -->
<exclude org="com.sun.jdmk"/>
<exclude org="com.sun.jmx"/>
<exclude org="javax.jms"/>
<exclude org="javax.mail"/>
</dependencies>
</ivy-module>

View File

@ -1,18 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#This properties file lists the versions of the various artifacts used by hadoop.
#It drives ivy and the generation of a maven POM
#These are the versions of our dependencies (in alphabetical order)

View File

@ -257,11 +257,6 @@
<artifactId>hadoop-client</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs-raid</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>