Revert MAPREDUCE-3868. Reenable Raid.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1363572 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4c51dacd52
commit
370c65f282
|
@ -1,60 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<assembly>
|
||||
<id>hadoop-raid-dist</id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<!-- Configuration files -->
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/conf</directory>
|
||||
<outputDirectory>/etc/hadoop</outputDirectory>
|
||||
<includes>
|
||||
<include>*</include>
|
||||
</includes>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/sbin</directory>
|
||||
<outputDirectory>/sbin</outputDirectory>
|
||||
<includes>
|
||||
<include>*</include>
|
||||
</includes>
|
||||
<fileMode>0755</fileMode>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/libexec</directory>
|
||||
<outputDirectory>/libexec</outputDirectory>
|
||||
<includes>
|
||||
<include>*</include>
|
||||
</includes>
|
||||
<fileMode>0755</fileMode>
|
||||
</fileSet>
|
||||
<!-- Documentation -->
|
||||
<fileSet>
|
||||
<directory>${project.build.directory}/site</directory>
|
||||
<outputDirectory>/share/doc/hadoop/raid</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<outputDirectory>/share/hadoop/${hadoop.component}/lib</outputDirectory>
|
||||
<unpack>false</unpack>
|
||||
<scope>runtime</scope>
|
||||
<useProjectArtifact>true</useProjectArtifact>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
|
@ -52,11 +52,6 @@
|
|||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs-raid</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
@ -125,7 +120,6 @@
|
|||
run cp -r $ROOT/hadoop-common-project/hadoop-common/target/hadoop-common-${project.version}/* .
|
||||
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${project.version}/* .
|
||||
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-httpfs/target/hadoop-hdfs-httpfs-${project.version}/* .
|
||||
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-raid/target/hadoop-hdfs-raid-${project.version}/* .
|
||||
run cp -r $ROOT/hadoop-mapreduce-project/target/hadoop-mapreduce-${project.version}/* .
|
||||
run cp -r $ROOT/hadoop-tools/hadoop-tools-dist/target/hadoop-tools-dist-${project.version}/* .
|
||||
echo
|
||||
|
|
|
@ -1,170 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
-->
|
||||
<project>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-project-dist</artifactId>
|
||||
<version>3.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../hadoop-project-dist</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs-raid</artifactId>
|
||||
<version>3.0.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>Apache Hadoop HDFS Raid</name>
|
||||
<description>Apache Hadoop HDFS Raid</description>
|
||||
|
||||
|
||||
<properties>
|
||||
<hadoop.component>raid</hadoop.component>
|
||||
<is.hadoop.component>false</is.hadoop.component>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-minicluster</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-archives</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>create-mrapp-generated-classpath</id>
|
||||
<phase>generate-test-resources</phase>
|
||||
<goals>
|
||||
<goal>build-classpath</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<!--
|
||||
This is needed to run the unit tests. It generates the required classpath
|
||||
that is required in the env of the launch container in the mini mr/yarn cluster.
|
||||
-->
|
||||
<outputFile>${project.build.directory}/test-classes/mrapp-generated-classpath</outputFile>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.rat</groupId>
|
||||
<artifactId>apache-rat-plugin</artifactId>
|
||||
<configuration>
|
||||
<excludes>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>findbugs-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<excludeFilterFile combine.self="override"></excludeFilterFile>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>docs</id>
|
||||
<activation>
|
||||
<activeByDefault>false</activeByDefault>
|
||||
</activation>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-site-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>docs</id>
|
||||
<phase>prepare-package</phase>
|
||||
<goals>
|
||||
<goal>site</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>dist</id>
|
||||
<activation>
|
||||
<activeByDefault>false</activeByDefault>
|
||||
</activation>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-assemblies</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dist</id>
|
||||
<phase>prepare-package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<finalName>${project.artifactId}-${project.version}</finalName>
|
||||
<appendAssemblyId>false</appendAssemblyId>
|
||||
<attach>false</attach>
|
||||
<descriptorRefs>
|
||||
<descriptorRef>hadoop-raid-dist</descriptorRef>
|
||||
</descriptorRefs>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
|
@ -1,58 +0,0 @@
|
|||
<configuration>
|
||||
<srcPath prefix="hdfs://dfs1.xxx.com:8000/user/dhruba/">
|
||||
<policy name = "dhruba">
|
||||
<property>
|
||||
<name>srcReplication</name>
|
||||
<value>3</value>
|
||||
<description> pick files for RAID only if their replication factor is
|
||||
greater than or equal to this value.
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>targetReplication</name>
|
||||
<value>2</value>
|
||||
<description> after RAIDing, decrease the replication factor of a file to
|
||||
this value.
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>metaReplication</name>
|
||||
<value>2</value>
|
||||
<description> the replication factor of the RAID meta file
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>modTimePeriod</name>
|
||||
<value>3600000</value>
|
||||
<description> time (milliseconds) after a file is modified to make it a
|
||||
candidate for RAIDing
|
||||
</description>
|
||||
</property>
|
||||
</policy>
|
||||
</srcPath>
|
||||
<srcPath prefix="hdfs://dfs1.xxx.com:9000/warehouse/table1">
|
||||
<policy name = "table1">
|
||||
<property>
|
||||
<name>targetReplication</name>
|
||||
<value>1</value>
|
||||
<description> after RAIDing, decrease the replication factor of a file to
|
||||
this value.
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>metaReplication</name>
|
||||
<value>2</value>
|
||||
<description> the replication factor of the RAID meta file
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>modTimePeriod</name>
|
||||
<value>3600000</value>
|
||||
<description> time (milliseconds) after a file is modified to make it a
|
||||
candidate for RAIDing
|
||||
</description>
|
||||
</property>
|
||||
</policy>
|
||||
</srcPath>
|
||||
</configuration>
|
||||
|
|
@ -1,509 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.net.URI;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.ChecksumException;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FSInputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FilterFileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.raid.Decoder;
|
||||
import org.apache.hadoop.raid.RaidNode;
|
||||
import org.apache.hadoop.raid.ReedSolomonDecoder;
|
||||
import org.apache.hadoop.raid.XORDecoder;
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
|
||||
/**
|
||||
* This is an implementation of the Hadoop RAID Filesystem. This FileSystem
|
||||
* wraps an instance of the DistributedFileSystem.
|
||||
* If a file is corrupted, this FileSystem uses the parity blocks to
|
||||
* regenerate the bad block.
|
||||
*/
|
||||
|
||||
public class DistributedRaidFileSystem extends FilterFileSystem {
|
||||
|
||||
// these are alternate locations that can be used for read-only access
|
||||
DecodeInfo[] alternates;
|
||||
Configuration conf;
|
||||
int stripeLength;
|
||||
|
||||
DistributedRaidFileSystem() throws IOException {
|
||||
}
|
||||
|
||||
DistributedRaidFileSystem(FileSystem fs) throws IOException {
|
||||
super(fs);
|
||||
alternates = null;
|
||||
stripeLength = 0;
|
||||
}
|
||||
|
||||
// Information required for decoding a source file
|
||||
static private class DecodeInfo {
|
||||
final Path destPath;
|
||||
final ErasureCodeType type;
|
||||
final Configuration conf;
|
||||
final int stripeLength;
|
||||
private DecodeInfo(Configuration conf, ErasureCodeType type, Path destPath) {
|
||||
this.conf = conf;
|
||||
this.type = type;
|
||||
this.destPath = destPath;
|
||||
this.stripeLength = RaidNode.getStripeLength(conf);
|
||||
}
|
||||
|
||||
Decoder createDecoder() {
|
||||
if (this.type == ErasureCodeType.XOR) {
|
||||
return new XORDecoder(conf, stripeLength);
|
||||
} else if (this.type == ErasureCodeType.RS) {
|
||||
return new ReedSolomonDecoder(conf, stripeLength,
|
||||
RaidNode.rsParityLength(conf));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize a Raid FileSystem
|
||||
*/
|
||||
public void initialize(URI name, Configuration conf) throws IOException {
|
||||
this.conf = conf;
|
||||
|
||||
Class<?> clazz = conf.getClass("fs.raid.underlyingfs.impl",
|
||||
DistributedFileSystem.class);
|
||||
if (clazz == null) {
|
||||
throw new IOException("No FileSystem for fs.raid.underlyingfs.impl.");
|
||||
}
|
||||
|
||||
this.fs = (FileSystem)ReflectionUtils.newInstance(clazz, null);
|
||||
super.initialize(name, conf);
|
||||
|
||||
// find stripe length configured
|
||||
stripeLength = RaidNode.getStripeLength(conf);
|
||||
if (stripeLength == 0) {
|
||||
LOG.info("dfs.raid.stripeLength is incorrectly defined to be " +
|
||||
stripeLength + " Ignoring...");
|
||||
return;
|
||||
}
|
||||
|
||||
// Put XOR and RS in alternates
|
||||
alternates= new DecodeInfo[2];
|
||||
Path xorPath = RaidNode.xorDestinationPath(conf, fs);
|
||||
alternates[0] = new DecodeInfo(conf, ErasureCodeType.XOR, xorPath);
|
||||
Path rsPath = RaidNode.rsDestinationPath(conf, fs);
|
||||
alternates[1] = new DecodeInfo(conf, ErasureCodeType.RS, rsPath);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the underlying filesystem
|
||||
*/
|
||||
public FileSystem getFileSystem() throws IOException {
|
||||
return fs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
|
||||
ExtFSDataInputStream fd = new ExtFSDataInputStream(conf, this, alternates, f,
|
||||
stripeLength, bufferSize);
|
||||
return fd;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
if (fs != null) {
|
||||
try {
|
||||
fs.close();
|
||||
} catch(IOException ie) {
|
||||
//this might already be closed, ignore
|
||||
}
|
||||
}
|
||||
super.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Layered filesystem input stream. This input stream tries reading
|
||||
* from alternate locations if it encoumters read errors in the primary location.
|
||||
*/
|
||||
private static class ExtFSDataInputStream extends FSDataInputStream {
|
||||
|
||||
private static class UnderlyingBlock {
|
||||
// File that holds this block. Need not be the same as outer file.
|
||||
public Path path;
|
||||
// Offset within path where this block starts.
|
||||
public long actualFileOffset;
|
||||
// Offset within the outer file where this block starts.
|
||||
public long originalFileOffset;
|
||||
// Length of the block (length <= blk sz of outer file).
|
||||
public long length;
|
||||
public UnderlyingBlock(Path path, long actualFileOffset,
|
||||
long originalFileOffset, long length) {
|
||||
this.path = path;
|
||||
this.actualFileOffset = actualFileOffset;
|
||||
this.originalFileOffset = originalFileOffset;
|
||||
this.length = length;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an input stream that wraps all the reads/positions/seeking.
|
||||
*/
|
||||
private static class ExtFsInputStream extends FSInputStream {
|
||||
|
||||
// Extents of "good" underlying data that can be read.
|
||||
private UnderlyingBlock[] underlyingBlocks;
|
||||
private long currentOffset;
|
||||
private FSDataInputStream currentStream;
|
||||
private UnderlyingBlock currentBlock;
|
||||
private byte[] oneBytebuff = new byte[1];
|
||||
private int nextLocation;
|
||||
private DistributedRaidFileSystem lfs;
|
||||
private Path path;
|
||||
private FileStatus stat;
|
||||
private final DecodeInfo[] alternates;
|
||||
private final int buffersize;
|
||||
private final Configuration conf;
|
||||
private final int stripeLength;
|
||||
|
||||
ExtFsInputStream(Configuration conf, DistributedRaidFileSystem lfs,
|
||||
DecodeInfo[] alternates, Path path, int stripeLength, int buffersize)
|
||||
throws IOException {
|
||||
this.path = path;
|
||||
this.nextLocation = 0;
|
||||
// Construct array of blocks in file.
|
||||
this.stat = lfs.getFileStatus(path);
|
||||
long numBlocks = (this.stat.getLen() % this.stat.getBlockSize() == 0) ?
|
||||
this.stat.getLen() / this.stat.getBlockSize() :
|
||||
1 + this.stat.getLen() / this.stat.getBlockSize();
|
||||
this.underlyingBlocks = new UnderlyingBlock[(int)numBlocks];
|
||||
for (int i = 0; i < numBlocks; i++) {
|
||||
long actualFileOffset = i * stat.getBlockSize();
|
||||
long originalFileOffset = i * stat.getBlockSize();
|
||||
long length = Math.min(
|
||||
stat.getBlockSize(), stat.getLen() - originalFileOffset);
|
||||
this.underlyingBlocks[i] = new UnderlyingBlock(
|
||||
path, actualFileOffset, originalFileOffset, length);
|
||||
}
|
||||
this.currentOffset = 0;
|
||||
this.currentBlock = null;
|
||||
this.alternates = alternates;
|
||||
this.buffersize = buffersize;
|
||||
this.conf = conf;
|
||||
this.lfs = lfs;
|
||||
this.stripeLength = stripeLength;
|
||||
// Open a stream to the first block.
|
||||
openCurrentStream();
|
||||
}
|
||||
|
||||
private void closeCurrentStream() throws IOException {
|
||||
if (currentStream != null) {
|
||||
currentStream.close();
|
||||
currentStream = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a stream to the file containing the current block
|
||||
* and seek to the appropriate offset
|
||||
*/
|
||||
private void openCurrentStream() throws IOException {
|
||||
int blockIdx = (int)(currentOffset/stat.getBlockSize());
|
||||
UnderlyingBlock block = underlyingBlocks[blockIdx];
|
||||
// If the current path is the same as we want.
|
||||
if (currentBlock == block ||
|
||||
currentBlock != null && currentBlock.path == block.path) {
|
||||
// If we have a valid stream, nothing to do.
|
||||
if (currentStream != null) {
|
||||
currentBlock = block;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
closeCurrentStream();
|
||||
}
|
||||
currentBlock = block;
|
||||
currentStream = lfs.fs.open(currentBlock.path, buffersize);
|
||||
long offset = block.actualFileOffset +
|
||||
(currentOffset - block.originalFileOffset);
|
||||
currentStream.seek(offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes available in the current block.
|
||||
*/
|
||||
private int blockAvailable() {
|
||||
return (int) (currentBlock.length -
|
||||
(currentOffset - currentBlock.originalFileOffset));
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int available() throws IOException {
|
||||
// Application should not assume that any bytes are buffered here.
|
||||
nextLocation = 0;
|
||||
return Math.min(blockAvailable(), currentStream.available());
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() throws IOException {
|
||||
closeCurrentStream();
|
||||
super.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean markSupported() { return false; }
|
||||
|
||||
@Override
|
||||
public void mark(int readLimit) {
|
||||
// Mark and reset are not supported.
|
||||
nextLocation = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
// Mark and reset are not supported.
|
||||
nextLocation = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int read() throws IOException {
|
||||
int value = read(oneBytebuff);
|
||||
if (value < 0) {
|
||||
return value;
|
||||
} else {
|
||||
return oneBytebuff[0];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int read(byte[] b) throws IOException {
|
||||
int value = read(b, 0, b.length);
|
||||
nextLocation = 0;
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int read(byte[] b, int offset, int len)
|
||||
throws IOException {
|
||||
while (true) {
|
||||
openCurrentStream();
|
||||
try{
|
||||
int limit = Math.min(blockAvailable(), len);
|
||||
int value = currentStream.read(b, offset, limit);
|
||||
currentOffset += value;
|
||||
nextLocation = 0;
|
||||
return value;
|
||||
} catch (BlockMissingException e) {
|
||||
setAlternateLocations(e, currentOffset);
|
||||
} catch (ChecksumException e) {
|
||||
setAlternateLocations(e, currentOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int read(long position, byte[] b, int offset, int len)
|
||||
throws IOException {
|
||||
long oldPos = currentOffset;
|
||||
seek(position);
|
||||
try {
|
||||
return read(b, offset, len);
|
||||
} finally {
|
||||
seek(oldPos);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized long skip(long n) throws IOException {
|
||||
long skipped = 0;
|
||||
while (skipped < n) {
|
||||
int val = read();
|
||||
if (val < 0) {
|
||||
break;
|
||||
}
|
||||
skipped++;
|
||||
}
|
||||
nextLocation = 0;
|
||||
return skipped;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized long getPos() throws IOException {
|
||||
nextLocation = 0;
|
||||
return currentOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void seek(long pos) throws IOException {
|
||||
if (pos != currentOffset) {
|
||||
closeCurrentStream();
|
||||
currentOffset = pos;
|
||||
openCurrentStream();
|
||||
}
|
||||
nextLocation = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
||||
seek(targetPos);
|
||||
boolean value = currentStream.seekToNewSource(currentStream.getPos());
|
||||
nextLocation = 0;
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* position readable again.
|
||||
*/
|
||||
@Override
|
||||
public void readFully(long pos, byte[] b, int offset, int length)
|
||||
throws IOException {
|
||||
long oldPos = currentOffset;
|
||||
seek(pos);
|
||||
try {
|
||||
while (true) {
|
||||
// This loop retries reading until successful. Unrecoverable errors
|
||||
// cause exceptions.
|
||||
// currentOffset is changed by read().
|
||||
try {
|
||||
while (length > 0) {
|
||||
int n = read(b, offset, length);
|
||||
if (n < 0) {
|
||||
throw new IOException("Premature EOF");
|
||||
}
|
||||
offset += n;
|
||||
length -= n;
|
||||
}
|
||||
nextLocation = 0;
|
||||
return;
|
||||
} catch (BlockMissingException e) {
|
||||
setAlternateLocations(e, currentOffset);
|
||||
} catch (ChecksumException e) {
|
||||
setAlternateLocations(e, currentOffset);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
seek(oldPos);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFully(long pos, byte[] b) throws IOException {
|
||||
readFully(pos, b, 0, b.length);
|
||||
nextLocation = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract good block from RAID
|
||||
* @throws IOException if all alternate locations are exhausted
|
||||
*/
|
||||
private void setAlternateLocations(IOException curexp, long offset)
|
||||
throws IOException {
|
||||
while (alternates != null && nextLocation < alternates.length) {
|
||||
try {
|
||||
int idx = nextLocation++;
|
||||
// Start offset of block.
|
||||
long corruptOffset =
|
||||
(offset / stat.getBlockSize()) * stat.getBlockSize();
|
||||
// Make sure we use DFS and not DistributedRaidFileSystem for unRaid.
|
||||
Configuration clientConf = new Configuration(conf);
|
||||
Class<?> clazz = conf.getClass("fs.raid.underlyingfs.impl",
|
||||
DistributedFileSystem.class);
|
||||
clientConf.set("fs.hdfs.impl", clazz.getName());
|
||||
// Disable caching so that a previously cached RaidDfs is not used.
|
||||
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
|
||||
Path npath = RaidNode.unRaidCorruptBlock(clientConf, path,
|
||||
alternates[idx].destPath,
|
||||
alternates[idx].createDecoder(),
|
||||
stripeLength, corruptOffset);
|
||||
if (npath == null)
|
||||
continue;
|
||||
try {
|
||||
String outdir = conf.get("fs.raid.recoverylogdir");
|
||||
if (outdir != null) {
|
||||
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss");
|
||||
java.util.Date date = new java.util.Date();
|
||||
String fname = path.getName() + dateFormat.format(date) +
|
||||
(new Random()).nextInt() + ".txt";
|
||||
Path outputunraid = new Path(outdir, fname);
|
||||
FileSystem fs = outputunraid.getFileSystem(conf);
|
||||
FSDataOutputStream dout = fs.create(outputunraid);
|
||||
PrintStream ps = new PrintStream(dout);
|
||||
ps.println("Recovery attempt log");
|
||||
ps.println("Source path : " + path );
|
||||
ps.println("Alternate path : " + alternates[idx].destPath);
|
||||
ps.println("Stripe lentgh : " + stripeLength);
|
||||
ps.println("Corrupt offset : " + corruptOffset);
|
||||
String output = (npath==null) ? "UNSUCCESSFUL" : npath.toString();
|
||||
ps.println("Output from unRaid : " + output);
|
||||
ps.close();
|
||||
}
|
||||
} catch (Exception exc) {
|
||||
LOG.info("Error while creating recovery log: " + exc);
|
||||
}
|
||||
|
||||
closeCurrentStream();
|
||||
LOG.info("Using block at offset " + corruptOffset + " from " +
|
||||
npath);
|
||||
currentBlock.path = npath;
|
||||
currentBlock.actualFileOffset = 0; // Single block in file.
|
||||
// Dont change currentOffset, in case the user had done a seek?
|
||||
openCurrentStream();
|
||||
|
||||
return;
|
||||
} catch (Exception e) {
|
||||
LOG.info("Error in using alternate path " + path + ". " + e +
|
||||
" Ignoring...");
|
||||
}
|
||||
}
|
||||
throw curexp;
|
||||
}
|
||||
|
||||
/**
|
||||
* The name of the file system that is immediately below the
|
||||
* DistributedRaidFileSystem. This is specified by the
|
||||
* configuration parameter called fs.raid.underlyingfs.impl.
|
||||
* If this parameter is not specified in the configuration, then
|
||||
* the default class DistributedFileSystem is returned.
|
||||
* @param conf the configuration object
|
||||
* @return the filesystem object immediately below DistributedRaidFileSystem
|
||||
* @throws IOException if all alternate locations are exhausted
|
||||
*/
|
||||
private FileSystem getUnderlyingFileSystem(Configuration conf) {
|
||||
Class<?> clazz = conf.getClass("fs.raid.underlyingfs.impl", DistributedFileSystem.class);
|
||||
FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf);
|
||||
return fs;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* constructor for ext input stream.
|
||||
* @param fs the underlying filesystem
|
||||
* @param p the path in the underlying file system
|
||||
* @param buffersize the size of IO
|
||||
* @throws IOException
|
||||
*/
|
||||
public ExtFSDataInputStream(Configuration conf, DistributedRaidFileSystem lfs,
|
||||
DecodeInfo[] alternates, Path p, int stripeLength, int buffersize) throws IOException {
|
||||
super(new ExtFsInputStream(conf, lfs, alternates, p, stripeLength, buffersize));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,79 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.PrintStream;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.tools.DFSck;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
|
||||
public abstract class RaidDFSUtil {
|
||||
/**
|
||||
* Returns the corrupt blocks in a file.
|
||||
*/
|
||||
public static List<LocatedBlock> corruptBlocksInFile(
|
||||
DistributedFileSystem dfs, String path, long offset, long length)
|
||||
throws IOException {
|
||||
List<LocatedBlock> corrupt = new LinkedList<LocatedBlock>();
|
||||
LocatedBlocks locatedBlocks =
|
||||
getBlockLocations(dfs, path, offset, length);
|
||||
for (LocatedBlock b: locatedBlocks.getLocatedBlocks()) {
|
||||
if (b.isCorrupt() ||
|
||||
(b.getLocations().length == 0 && b.getBlockSize() > 0)) {
|
||||
corrupt.add(b);
|
||||
}
|
||||
}
|
||||
return corrupt;
|
||||
}
|
||||
|
||||
public static LocatedBlocks getBlockLocations(
|
||||
DistributedFileSystem dfs, String path, long offset, long length)
|
||||
throws IOException {
|
||||
return dfs.getClient().namenode.getBlockLocations(path, offset, length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Make successive calls to listCorruptFiles to obtain all
|
||||
* corrupt files.
|
||||
*/
|
||||
public static String[] getCorruptFiles(DistributedFileSystem dfs)
|
||||
throws IOException {
|
||||
Set<String> corruptFiles = new HashSet<String>();
|
||||
RemoteIterator<Path> cfb = dfs.listCorruptFileBlocks(new Path("/"));
|
||||
while (cfb.hasNext()) {
|
||||
corruptFiles.add(cfb.next().toUri().getPath());
|
||||
}
|
||||
|
||||
return corruptFiles.toArray(new String[corruptFiles.size()]);
|
||||
}
|
||||
}
|
|
@ -1,632 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.blockmanagement;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.server.namenode.*;
|
||||
import org.apache.hadoop.net.NetworkTopology;
|
||||
import org.apache.hadoop.net.Node;
|
||||
import org.apache.hadoop.raid.RaidNode;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
|
||||
/**
|
||||
* This BlockPlacementPolicy spreads out the group of blocks which used by RAID
|
||||
* for recovering each other. This is important for the availability
|
||||
* of the blocks. This class can be used by multiple threads. It has to be
|
||||
* thread safe.
|
||||
*/
|
||||
public class BlockPlacementPolicyRaid extends BlockPlacementPolicy {
|
||||
public static final Log LOG =
|
||||
LogFactory.getLog(BlockPlacementPolicyRaid.class);
|
||||
Configuration conf;
|
||||
private int stripeLength;
|
||||
private int xorParityLength;
|
||||
private int rsParityLength;
|
||||
private String xorPrefix = null;
|
||||
private String rsPrefix = null;
|
||||
private String raidTempPrefix = null;
|
||||
private String raidrsTempPrefix = null;
|
||||
private String raidHarTempPrefix = null;
|
||||
private String raidrsHarTempPrefix = null;
|
||||
private FSNamesystem namesystem = null;
|
||||
private BlockPlacementPolicyDefault defaultPolicy;
|
||||
|
||||
CachedLocatedBlocks cachedLocatedBlocks;
|
||||
CachedFullPathNames cachedFullPathNames;
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public void initialize(Configuration conf, FSClusterStats stats,
|
||||
NetworkTopology clusterMap) {
|
||||
this.conf = conf;
|
||||
this.stripeLength = RaidNode.getStripeLength(conf);
|
||||
this.rsParityLength = RaidNode.rsParityLength(conf);
|
||||
this.xorParityLength = 1;
|
||||
try {
|
||||
this.xorPrefix = RaidNode.xorDestinationPath(conf).toUri().getPath();
|
||||
this.rsPrefix = RaidNode.rsDestinationPath(conf).toUri().getPath();
|
||||
} catch (IOException e) {
|
||||
}
|
||||
if (this.xorPrefix == null) {
|
||||
this.xorPrefix = RaidNode.DEFAULT_RAID_LOCATION;
|
||||
}
|
||||
if (this.rsPrefix == null) {
|
||||
this.rsPrefix = RaidNode.DEFAULT_RAIDRS_LOCATION;
|
||||
}
|
||||
// Throws ClassCastException if we cannot cast here.
|
||||
this.namesystem = (FSNamesystem) stats;
|
||||
this.cachedLocatedBlocks = new CachedLocatedBlocks(namesystem);
|
||||
this.cachedFullPathNames = new CachedFullPathNames(namesystem);
|
||||
this.raidTempPrefix = RaidNode.xorTempPrefix(conf);
|
||||
this.raidrsTempPrefix = RaidNode.rsTempPrefix(conf);
|
||||
this.raidHarTempPrefix = RaidNode.xorHarTempPrefix(conf);
|
||||
this.raidrsHarTempPrefix = RaidNode.rsHarTempPrefix(conf);
|
||||
defaultPolicy = new BlockPlacementPolicyDefault(conf, stats, clusterMap);
|
||||
}
|
||||
|
||||
@Override
|
||||
DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas,
|
||||
DatanodeDescriptor writer, List<DatanodeDescriptor> chosenNodes,
|
||||
long blocksize) {
|
||||
return chooseTarget(srcPath, numOfReplicas, writer, chosenNodes,
|
||||
null, blocksize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas,
|
||||
DatanodeDescriptor writer, List<DatanodeDescriptor> chosenNodes,
|
||||
boolean returnChosenNodes,
|
||||
HashMap<Node, Node> excludedNodes, long blocksize) {
|
||||
try {
|
||||
FileType type = getFileType(srcPath);
|
||||
if (type == FileType.NOT_RAID) {
|
||||
return defaultPolicy.chooseTarget(
|
||||
srcPath, numOfReplicas, writer, chosenNodes, blocksize);
|
||||
}
|
||||
if (excludedNodes == null) {
|
||||
excludedNodes = new HashMap<Node, Node>();
|
||||
}
|
||||
addExcludedNodes(srcPath, type, excludedNodes);
|
||||
DatanodeDescriptor[] result =
|
||||
defaultPolicy.chooseTarget(numOfReplicas, writer,
|
||||
chosenNodes, returnChosenNodes, excludedNodes, blocksize);
|
||||
// Add the added block locations in the block locations cache.
|
||||
// So the rest of the blocks know about these locations.
|
||||
cachedLocatedBlocks.get(srcPath).
|
||||
add(new LocatedBlock(new ExtendedBlock(), result));
|
||||
return result;
|
||||
} catch (Exception e) {
|
||||
LOG.debug("Error happend when choosing datanode to write:" +
|
||||
StringUtils.stringifyException(e));
|
||||
return defaultPolicy.chooseTarget(srcPath, numOfReplicas, writer,
|
||||
chosenNodes, blocksize);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int verifyBlockPlacement(String srcPath, LocatedBlock lBlk,
|
||||
int minRacks) {
|
||||
return defaultPolicy.verifyBlockPlacement(srcPath, lBlk, minRacks);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public DatanodeDescriptor chooseReplicaToDelete(BlockCollection bc,
|
||||
Block block, short replicationFactor,
|
||||
Collection<DatanodeDescriptor> first,
|
||||
Collection<DatanodeDescriptor> second) {
|
||||
|
||||
DatanodeDescriptor chosenNode = null;
|
||||
try {
|
||||
String path = cachedFullPathNames.get(bc);
|
||||
FileType type = getFileType(path);
|
||||
if (type == FileType.NOT_RAID) {
|
||||
return defaultPolicy.chooseReplicaToDelete(
|
||||
bc, block, replicationFactor, first, second);
|
||||
}
|
||||
List<LocatedBlock> companionBlocks =
|
||||
getCompanionBlocks(path, type, block);
|
||||
if (companionBlocks == null || companionBlocks.size() == 0) {
|
||||
// Use the default method if it is not a valid raided or parity file
|
||||
return defaultPolicy.chooseReplicaToDelete(
|
||||
bc, block, replicationFactor, first, second);
|
||||
}
|
||||
// Delete from the first collection first
|
||||
// This ensures the number of unique rack of this block is not reduced
|
||||
Collection<DatanodeDescriptor> all = new HashSet<DatanodeDescriptor>();
|
||||
all.addAll(first);
|
||||
all.addAll(second);
|
||||
chosenNode = chooseReplicaToDelete(companionBlocks, all);
|
||||
if (chosenNode != null) {
|
||||
return chosenNode;
|
||||
}
|
||||
return defaultPolicy.chooseReplicaToDelete(
|
||||
bc, block, replicationFactor, first, second);
|
||||
} catch (Exception e) {
|
||||
LOG.debug("Error happend when choosing replica to delete" +
|
||||
StringUtils.stringifyException(e));
|
||||
return defaultPolicy.chooseReplicaToDelete(
|
||||
bc, block, replicationFactor, first, second);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain the excluded nodes for the current block that is being written
|
||||
*/
|
||||
void addExcludedNodes(String file, FileType type, HashMap<Node, Node> excluded)
|
||||
throws IOException {
|
||||
Collection<LocatedBlock> blocks = getCompanionBlocks(file, type, null);
|
||||
if (blocks == null) {
|
||||
return;
|
||||
}
|
||||
for (LocatedBlock b : blocks) {
|
||||
for (Node n : b.getLocations()) {
|
||||
excluded.put(n, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private DatanodeDescriptor chooseReplicaToDelete(
|
||||
Collection<LocatedBlock> companionBlocks,
|
||||
Collection<DatanodeDescriptor> dataNodes) throws IOException {
|
||||
|
||||
if (dataNodes.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
// Count the number of replicas on each node and rack
|
||||
final Map<String, Integer> nodeCompanionBlockCount =
|
||||
countCompanionBlocks(companionBlocks, false);
|
||||
final Map<String, Integer> rackCompanionBlockCount =
|
||||
countCompanionBlocks(companionBlocks, true);
|
||||
|
||||
NodeComparator comparator =
|
||||
new NodeComparator(nodeCompanionBlockCount, rackCompanionBlockCount);
|
||||
return Collections.max(dataNodes, comparator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Count how many companion blocks are on each datanode or the each rack
|
||||
* @param companionBlocks a collection of all the companion blocks
|
||||
* @param doRackCount count the companion blocks on the racks of datanodes
|
||||
* @param result the map from node name to the number of companion blocks
|
||||
*/
|
||||
static Map<String, Integer> countCompanionBlocks(
|
||||
Collection<LocatedBlock> companionBlocks, boolean doRackCount) {
|
||||
Map<String, Integer> result = new HashMap<String, Integer>();
|
||||
for (LocatedBlock block : companionBlocks) {
|
||||
for (DatanodeInfo d : block.getLocations()) {
|
||||
String name = doRackCount ? d.getParent().getName() : d.getName();
|
||||
if (result.containsKey(name)) {
|
||||
int count = result.get(name) + 1;
|
||||
result.put(name, count);
|
||||
} else {
|
||||
result.put(name, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the datanodes based on the number of companion blocks on the same
|
||||
* node and rack. If even, compare the remaining space on the datanodes.
|
||||
*/
|
||||
class NodeComparator implements Comparator<DatanodeDescriptor> {
|
||||
private Map<String, Integer> nodeBlockCount;
|
||||
private Map<String, Integer> rackBlockCount;
|
||||
private NodeComparator(Map<String, Integer> nodeBlockCount,
|
||||
Map<String, Integer> rackBlockCount) {
|
||||
this.nodeBlockCount = nodeBlockCount;
|
||||
this.rackBlockCount = rackBlockCount;
|
||||
}
|
||||
@Override
|
||||
public int compare(DatanodeDescriptor d1, DatanodeDescriptor d2) {
|
||||
int res = compareBlockCount(d1, d2, nodeBlockCount);
|
||||
if (res != 0) {
|
||||
return res;
|
||||
}
|
||||
res = compareBlockCount(d1.getParent(), d2.getParent(), rackBlockCount);
|
||||
if (res != 0) {
|
||||
return res;
|
||||
}
|
||||
if (d1.getRemaining() > d2.getRemaining()) {
|
||||
return -1;
|
||||
}
|
||||
if (d1.getRemaining() < d2.getRemaining()) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
private int compareBlockCount(Node node1, Node node2,
|
||||
Map<String, Integer> blockCount) {
|
||||
Integer count1 = blockCount.get(node1.getName());
|
||||
Integer count2 = blockCount.get(node2.getName());
|
||||
count1 = count1 == null ? 0 : count1;
|
||||
count2 = count2 == null ? 0 : count2;
|
||||
if (count1 > count2) {
|
||||
return 1;
|
||||
}
|
||||
if (count1 < count2) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain the companion blocks of the give block
|
||||
* Companion blocks are defined as the blocks that can help recover each
|
||||
* others by using raid decoder.
|
||||
* @param path The path of the file contains the block
|
||||
* @param type The type of this file
|
||||
* @param block The given block
|
||||
* null if it is the block which is currently being written to
|
||||
* @return the block locations of companion blocks
|
||||
*/
|
||||
List<LocatedBlock> getCompanionBlocks(String path, FileType type,
|
||||
Block block) throws IOException {
|
||||
switch (type) {
|
||||
case NOT_RAID:
|
||||
return new ArrayList<LocatedBlock>();
|
||||
case XOR_HAR_TEMP_PARITY:
|
||||
return getCompanionBlocksForHarParityBlock(
|
||||
path, xorParityLength, block);
|
||||
case RS_HAR_TEMP_PARITY:
|
||||
return getCompanionBlocksForHarParityBlock(
|
||||
path, rsParityLength, block);
|
||||
case XOR_TEMP_PARITY:
|
||||
return getCompanionBlocksForParityBlock(
|
||||
getSourceFile(path, raidTempPrefix), path, xorParityLength, block);
|
||||
case RS_TEMP_PARITY:
|
||||
return getCompanionBlocksForParityBlock(
|
||||
getSourceFile(path, raidrsTempPrefix), path, rsParityLength, block);
|
||||
case XOR_PARITY:
|
||||
return getCompanionBlocksForParityBlock(getSourceFile(path, xorPrefix),
|
||||
path, xorParityLength, block);
|
||||
case RS_PARITY:
|
||||
return getCompanionBlocksForParityBlock(getSourceFile(path, rsPrefix),
|
||||
path, rsParityLength, block);
|
||||
case XOR_SOURCE:
|
||||
return getCompanionBlocksForSourceBlock(
|
||||
path, getParityFile(path), xorParityLength, block);
|
||||
case RS_SOURCE:
|
||||
return getCompanionBlocksForSourceBlock(
|
||||
path, getParityFile(path), xorParityLength, block);
|
||||
}
|
||||
return new ArrayList<LocatedBlock>();
|
||||
}
|
||||
|
||||
private List<LocatedBlock> getCompanionBlocksForHarParityBlock(
|
||||
String parity, int parityLength, Block block)
|
||||
throws IOException {
|
||||
int blockIndex = getBlockIndex(parity, block);
|
||||
// consider only parity file in this case because source file block
|
||||
// location is not easy to obtain
|
||||
List<LocatedBlock> parityBlocks = cachedLocatedBlocks.get(parity);
|
||||
List<LocatedBlock> result = new ArrayList<LocatedBlock>();
|
||||
synchronized (parityBlocks) {
|
||||
int start = Math.max(0, blockIndex - parityLength + 1);
|
||||
int end = Math.min(parityBlocks.size(), blockIndex + parityLength);
|
||||
result.addAll(parityBlocks.subList(start, end));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private List<LocatedBlock> getCompanionBlocksForParityBlock(
|
||||
String src, String parity, int parityLength, Block block)
|
||||
throws IOException {
|
||||
int blockIndex = getBlockIndex(parity, block);
|
||||
List<LocatedBlock> result = new ArrayList<LocatedBlock>();
|
||||
List<LocatedBlock> parityBlocks = cachedLocatedBlocks.get(parity);
|
||||
int stripeIndex = blockIndex / parityLength;
|
||||
synchronized (parityBlocks) {
|
||||
int parityStart = stripeIndex * parityLength;
|
||||
int parityEnd = Math.min(parityStart + parityLength,
|
||||
parityBlocks.size());
|
||||
// for parity, always consider the neighbor blocks as companion blocks
|
||||
if (parityStart < parityBlocks.size()) {
|
||||
result.addAll(parityBlocks.subList(parityStart, parityEnd));
|
||||
}
|
||||
}
|
||||
|
||||
if (src == null) {
|
||||
return result;
|
||||
}
|
||||
List<LocatedBlock> sourceBlocks = cachedLocatedBlocks.get(src);
|
||||
synchronized (sourceBlocks) {
|
||||
int sourceStart = stripeIndex * stripeLength;
|
||||
int sourceEnd = Math.min(sourceStart + stripeLength,
|
||||
sourceBlocks.size());
|
||||
if (sourceStart < sourceBlocks.size()) {
|
||||
result.addAll(sourceBlocks.subList(sourceStart, sourceEnd));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private List<LocatedBlock> getCompanionBlocksForSourceBlock(
|
||||
String src, String parity, int parityLength, Block block)
|
||||
throws IOException {
|
||||
int blockIndex = getBlockIndex(src, block);
|
||||
List<LocatedBlock> result = new ArrayList<LocatedBlock>();
|
||||
List<LocatedBlock> sourceBlocks = cachedLocatedBlocks.get(src);
|
||||
int stripeIndex = blockIndex / stripeLength;
|
||||
synchronized (sourceBlocks) {
|
||||
int sourceStart = stripeIndex * stripeLength;
|
||||
int sourceEnd = Math.min(sourceStart + stripeLength,
|
||||
sourceBlocks.size());
|
||||
if (sourceStart < sourceBlocks.size()) {
|
||||
result.addAll(sourceBlocks.subList(sourceStart, sourceEnd));
|
||||
}
|
||||
}
|
||||
if (parity == null) {
|
||||
return result;
|
||||
}
|
||||
List<LocatedBlock> parityBlocks = cachedLocatedBlocks.get(parity);
|
||||
synchronized (parityBlocks) {
|
||||
int parityStart = stripeIndex * parityLength;
|
||||
int parityEnd = Math.min(parityStart + parityLength,
|
||||
parityBlocks.size());
|
||||
if (parityStart < parityBlocks.size()) {
|
||||
result.addAll(parityBlocks.subList(parityStart, parityEnd));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private int getBlockIndex(String file, Block block) throws IOException {
|
||||
List<LocatedBlock> blocks = cachedLocatedBlocks.get(file);
|
||||
synchronized (blocks) {
|
||||
// null indicates that this block is currently added. Return size()
|
||||
// as the index in this case
|
||||
if (block == null) {
|
||||
return blocks.size();
|
||||
}
|
||||
for (int i = 0; i < blocks.size(); i++) {
|
||||
if (blocks.get(i).getBlock().getLocalBlock().equals(block)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new IOException("Cannot locate " + block + " in file " + file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache results for getFullPathName()
|
||||
*/
|
||||
static class CachedFullPathNames {
|
||||
FSNamesystem namesystem;
|
||||
CachedFullPathNames(FSNamesystem namesystem) {
|
||||
this.namesystem = namesystem;
|
||||
}
|
||||
private Cache<INodeWithHashCode, String> cacheInternal =
|
||||
new Cache<INodeWithHashCode, String>() {
|
||||
@Override
|
||||
public String getDirectly(INodeWithHashCode inode) throws IOException {
|
||||
namesystem.readLock();
|
||||
try {
|
||||
return inode.getFullPathName();
|
||||
} finally {
|
||||
namesystem.readUnlock();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static private class INodeWithHashCode {
|
||||
BlockCollection bc;
|
||||
INodeWithHashCode(BlockCollection bc) {
|
||||
this.bc= bc;
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return bc== obj;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return System.identityHashCode(bc);
|
||||
}
|
||||
String getFullPathName() {
|
||||
return bc.getName();
|
||||
}
|
||||
}
|
||||
|
||||
public String get(BlockCollection bc) throws IOException {
|
||||
return cacheInternal.get(new INodeWithHashCode(bc));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache results for FSNamesystem.getBlockLocations()
|
||||
*/
|
||||
static class CachedLocatedBlocks extends Cache<String, List<LocatedBlock>> {
|
||||
FSNamesystem namesystem;
|
||||
CachedLocatedBlocks(FSNamesystem namesystem) {
|
||||
this.namesystem = namesystem;
|
||||
}
|
||||
@Override
|
||||
public List<LocatedBlock> getDirectly(String file) throws IOException {
|
||||
long len = NameNodeRaidUtil.getFileInfo(namesystem, file, true).getLen();
|
||||
List<LocatedBlock> result = NameNodeRaidUtil.getBlockLocations(namesystem,
|
||||
file, 0L, len, false, false).getLocatedBlocks();
|
||||
if (result == null || result.isEmpty()) {
|
||||
result = new ArrayList<LocatedBlock>();
|
||||
}
|
||||
return Collections.synchronizedList(result);
|
||||
}
|
||||
}
|
||||
|
||||
static abstract class Cache<K, V> {
|
||||
private Map<K, ValueWithTime> cache;
|
||||
private static final long CACHE_TIMEOUT = 300000L; // 5 minutes
|
||||
// The timeout is long but the consequence of stale value is not serious
|
||||
Cache() {
|
||||
Map<K, ValueWithTime> map = new LinkedHashMap<K, ValueWithTime>() {
|
||||
private static final long serialVersionUID = 1L;
|
||||
final private int MAX_ENTRIES = 50000;
|
||||
@Override
|
||||
protected boolean removeEldestEntry(
|
||||
Map.Entry<K, ValueWithTime> eldest) {
|
||||
return size() > MAX_ENTRIES;
|
||||
}
|
||||
};
|
||||
this.cache = Collections.synchronizedMap(map);
|
||||
}
|
||||
|
||||
// Note that this method may hold FSNamesystem.readLock() and it may
|
||||
// be called inside FSNamesystem.writeLock(). If we make this method
|
||||
// synchronized, it will deadlock.
|
||||
abstract protected V getDirectly(K key) throws IOException;
|
||||
|
||||
public V get(K key) throws IOException {
|
||||
// The method is not synchronized so we may get some stale value here but
|
||||
// it's OK.
|
||||
ValueWithTime result = cache.get(key);
|
||||
long now = Time.now();
|
||||
if (result != null &&
|
||||
now - result.cachedTime < CACHE_TIMEOUT) {
|
||||
return result.value;
|
||||
}
|
||||
result = new ValueWithTime();
|
||||
result.value = getDirectly(key);
|
||||
result.cachedTime = now;
|
||||
cache.put(key, result);
|
||||
return result.value;
|
||||
}
|
||||
private class ValueWithTime {
|
||||
V value = null;
|
||||
long cachedTime = 0L;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get path for the corresponding source file for a valid parity
|
||||
* file. Returns null if it does not exists
|
||||
* @param parity the toUri path of the parity file
|
||||
* @return the toUri path of the source file
|
||||
*/
|
||||
String getSourceFile(String parity, String prefix) throws IOException {
|
||||
if (isHarFile(parity)) {
|
||||
return null;
|
||||
}
|
||||
// remove the prefix
|
||||
String src = parity.substring(prefix.length());
|
||||
if (NameNodeRaidUtil.getFileInfo(namesystem, src, true) == null) {
|
||||
return null;
|
||||
}
|
||||
return src;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get path for the corresponding parity file for a source file.
|
||||
* Returns null if it does not exists
|
||||
* @param src the toUri path of the source file
|
||||
* @return the toUri path of the parity file
|
||||
*/
|
||||
String getParityFile(String src) throws IOException {
|
||||
String xorParity = getParityFile(xorPrefix, src);
|
||||
if (xorParity != null) {
|
||||
return xorParity;
|
||||
}
|
||||
String rsParity = getParityFile(rsPrefix, src);
|
||||
if (rsParity != null) {
|
||||
return rsParity;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get path for the parity file. Returns null if it does not exists
|
||||
* @param parityPrefix usuall "/raid/" or "/raidrs/"
|
||||
* @return the toUri path of the parity file
|
||||
*/
|
||||
private String getParityFile(String parityPrefix, String src)
|
||||
throws IOException {
|
||||
String parity = parityPrefix + src;
|
||||
if (NameNodeRaidUtil.getFileInfo(namesystem, parity, true) == null) {
|
||||
return null;
|
||||
}
|
||||
return parity;
|
||||
}
|
||||
|
||||
private boolean isHarFile(String path) {
|
||||
return path.lastIndexOf(RaidNode.HAR_SUFFIX) != -1;
|
||||
}
|
||||
|
||||
enum FileType {
|
||||
NOT_RAID,
|
||||
XOR_HAR_TEMP_PARITY,
|
||||
XOR_TEMP_PARITY,
|
||||
XOR_PARITY,
|
||||
XOR_SOURCE,
|
||||
RS_HAR_TEMP_PARITY,
|
||||
RS_TEMP_PARITY,
|
||||
RS_PARITY,
|
||||
RS_SOURCE,
|
||||
}
|
||||
|
||||
FileType getFileType(String path) throws IOException {
|
||||
if (path.startsWith(raidHarTempPrefix + Path.SEPARATOR)) {
|
||||
return FileType.XOR_HAR_TEMP_PARITY;
|
||||
}
|
||||
if (path.startsWith(raidrsHarTempPrefix + Path.SEPARATOR)) {
|
||||
return FileType.RS_HAR_TEMP_PARITY;
|
||||
}
|
||||
if (path.startsWith(raidTempPrefix + Path.SEPARATOR)) {
|
||||
return FileType.XOR_TEMP_PARITY;
|
||||
}
|
||||
if (path.startsWith(raidrsTempPrefix + Path.SEPARATOR)) {
|
||||
return FileType.RS_TEMP_PARITY;
|
||||
}
|
||||
if (path.startsWith(xorPrefix + Path.SEPARATOR)) {
|
||||
return FileType.XOR_PARITY;
|
||||
}
|
||||
if (path.startsWith(rsPrefix + Path.SEPARATOR)) {
|
||||
return FileType.RS_PARITY;
|
||||
}
|
||||
String parity = getParityFile(path);
|
||||
if (parity == null) {
|
||||
return FileType.NOT_RAID;
|
||||
}
|
||||
if (parity.startsWith(xorPrefix + Path.SEPARATOR)) {
|
||||
return FileType.XOR_SOURCE;
|
||||
}
|
||||
if (parity.startsWith(rsPrefix + Path.SEPARATOR)) {
|
||||
return FileType.RS_SOURCE;
|
||||
}
|
||||
return FileType.NOT_RAID;
|
||||
}
|
||||
}
|
|
@ -1,505 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.datanode;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.SocketException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.hadoop.fs.ChecksumException;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
|
||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
|
||||
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.io.nativeio.NativeIO;
|
||||
import org.apache.hadoop.net.SocketOutputStream;
|
||||
import org.apache.hadoop.util.DataChecksum;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
/**
|
||||
* Reads a block from the disk and sends it to a recipient.
|
||||
*/
|
||||
public class RaidBlockSender implements java.io.Closeable {
|
||||
public static final Log LOG = DataNode.LOG;
|
||||
static final Log ClientTraceLog = DataNode.ClientTraceLog;
|
||||
|
||||
private ExtendedBlock block; // the block to read from
|
||||
|
||||
/** The visible length of a replica. */
|
||||
private final long replicaVisibleLength;
|
||||
|
||||
private InputStream blockIn; // data stream
|
||||
private long blockInPosition = -1; // updated while using transferTo().
|
||||
private DataInputStream checksumIn; // checksum datastream
|
||||
private DataChecksum checksum; // checksum stream
|
||||
private long offset; // starting position to read
|
||||
/** Initial position to read */
|
||||
private long initialOffset;
|
||||
private long endOffset; // ending position
|
||||
private int chunkSize; // chunk size
|
||||
private int checksumSize; // checksum size
|
||||
private boolean corruptChecksumOk; // if need to verify checksum
|
||||
private boolean chunkOffsetOK; // if need to send chunk offset
|
||||
private long seqno; // sequence number of packet
|
||||
|
||||
private boolean transferToAllowed = true;
|
||||
private boolean blockReadFully; //set when the whole block is read
|
||||
private boolean verifyChecksum; //if true, check is verified while reading
|
||||
private final String clientTraceFmt; // format of client trace log message
|
||||
|
||||
/**
|
||||
* Minimum buffer used while sending data to clients. Used only if
|
||||
* transferTo() is enabled. 64KB is not that large. It could be larger, but
|
||||
* not sure if there will be much more improvement.
|
||||
*/
|
||||
private static final int MIN_BUFFER_WITH_TRANSFERTO = 64*1024;
|
||||
private static final int TRANSFERTO_BUFFER_SIZE = Math.max(
|
||||
HdfsConstants.IO_FILE_BUFFER_SIZE, MIN_BUFFER_WITH_TRANSFERTO);
|
||||
private volatile ChunkChecksum lastChunkChecksum = null;
|
||||
|
||||
|
||||
public RaidBlockSender(ExtendedBlock block, long blockLength, long startOffset, long length,
|
||||
boolean corruptChecksumOk, boolean chunkOffsetOK,
|
||||
boolean verifyChecksum, boolean transferToAllowed,
|
||||
DataInputStream metadataIn, InputStreamFactory streamFactory
|
||||
) throws IOException {
|
||||
this(block, blockLength, startOffset, length,
|
||||
corruptChecksumOk, chunkOffsetOK,
|
||||
verifyChecksum, transferToAllowed,
|
||||
metadataIn, streamFactory, null);
|
||||
}
|
||||
|
||||
public RaidBlockSender(ExtendedBlock block, long blockLength, long startOffset, long length,
|
||||
boolean corruptChecksumOk, boolean chunkOffsetOK,
|
||||
boolean verifyChecksum, boolean transferToAllowed,
|
||||
DataInputStream metadataIn, InputStreamFactory streamFactory,
|
||||
String clientTraceFmt) throws IOException {
|
||||
try {
|
||||
this.block = block;
|
||||
this.chunkOffsetOK = chunkOffsetOK;
|
||||
this.corruptChecksumOk = corruptChecksumOk;
|
||||
this.verifyChecksum = verifyChecksum;
|
||||
this.replicaVisibleLength = blockLength;
|
||||
this.transferToAllowed = transferToAllowed;
|
||||
this.clientTraceFmt = clientTraceFmt;
|
||||
|
||||
if ( !corruptChecksumOk || metadataIn != null) {
|
||||
this.checksumIn = metadataIn;
|
||||
|
||||
// read and handle the common header here. For now just a version
|
||||
BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn);
|
||||
short version = header.getVersion();
|
||||
|
||||
if (version != BlockMetadataHeader.VERSION) {
|
||||
LOG.warn("Wrong version (" + version + ") for metadata file for "
|
||||
+ block + " ignoring ...");
|
||||
}
|
||||
checksum = header.getChecksum();
|
||||
} else {
|
||||
LOG.warn("Could not find metadata file for " + block);
|
||||
// This only decides the buffer size. Use BUFFER_SIZE?
|
||||
checksum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_NULL,
|
||||
16 * 1024);
|
||||
}
|
||||
|
||||
/* If bytesPerChecksum is very large, then the metadata file
|
||||
* is mostly corrupted. For now just truncate bytesPerchecksum to
|
||||
* blockLength.
|
||||
*/
|
||||
int size = checksum.getBytesPerChecksum();
|
||||
if (size > 10*1024*1024 && size > replicaVisibleLength) {
|
||||
checksum = DataChecksum.newDataChecksum(checksum.getChecksumType(),
|
||||
Math.max((int)replicaVisibleLength, 10*1024*1024));
|
||||
size = checksum.getBytesPerChecksum();
|
||||
}
|
||||
chunkSize = size;
|
||||
checksumSize = checksum.getChecksumSize();
|
||||
|
||||
if (length < 0) {
|
||||
length = replicaVisibleLength;
|
||||
}
|
||||
|
||||
endOffset = blockLength;
|
||||
|
||||
if (startOffset < 0 || startOffset > endOffset
|
||||
|| (length + startOffset) > endOffset) {
|
||||
String msg = " Offset " + startOffset + " and length " + length
|
||||
+ " don't match block " + block + " ( blockLen " + endOffset + " )";
|
||||
LOG.warn("sendBlock() : " + msg);
|
||||
throw new IOException(msg);
|
||||
}
|
||||
|
||||
offset = (startOffset - (startOffset % chunkSize));
|
||||
if (length >= 0) {
|
||||
// Make sure endOffset points to end of a checksumed chunk.
|
||||
long tmpLen = startOffset + length;
|
||||
if (tmpLen % chunkSize != 0) {
|
||||
tmpLen += (chunkSize - tmpLen % chunkSize);
|
||||
}
|
||||
if (tmpLen < endOffset) {
|
||||
// will use on-disk checksum here since the end is a stable chunk
|
||||
endOffset = tmpLen;
|
||||
}
|
||||
}
|
||||
|
||||
// seek to the right offsets
|
||||
if (offset > 0) {
|
||||
long checksumSkip = (offset / chunkSize) * checksumSize;
|
||||
// note blockInStream is seeked when created below
|
||||
if (checksumSkip > 0) {
|
||||
// Should we use seek() for checksum file as well?
|
||||
IOUtils.skipFully(checksumIn, checksumSkip);
|
||||
}
|
||||
}
|
||||
seqno = 0;
|
||||
|
||||
blockIn = streamFactory.createStream(offset);
|
||||
} catch (IOException ioe) {
|
||||
IOUtils.closeStream(this);
|
||||
IOUtils.closeStream(blockIn);
|
||||
throw ioe;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* close opened files.
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
IOException ioe = null;
|
||||
// close checksum file
|
||||
if(checksumIn!=null) {
|
||||
try {
|
||||
checksumIn.close();
|
||||
} catch (IOException e) {
|
||||
ioe = e;
|
||||
}
|
||||
checksumIn = null;
|
||||
}
|
||||
// close data file
|
||||
if(blockIn!=null) {
|
||||
try {
|
||||
blockIn.close();
|
||||
} catch (IOException e) {
|
||||
ioe = e;
|
||||
}
|
||||
blockIn = null;
|
||||
}
|
||||
// throw IOException if there is any
|
||||
if(ioe!= null) {
|
||||
throw ioe;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an IOExcpetion (not subclasses) to SocketException.
|
||||
* This is typically done to indicate to upper layers that the error
|
||||
* was a socket error rather than often more serious exceptions like
|
||||
* disk errors.
|
||||
*/
|
||||
private static IOException ioeToSocketException(IOException ioe) {
|
||||
if (ioe.getClass().equals(IOException.class)) {
|
||||
// "se" could be a new class in stead of SocketException.
|
||||
IOException se = new SocketException("Original Exception : " + ioe);
|
||||
se.initCause(ioe);
|
||||
/* Change the stacktrace so that original trace is not truncated
|
||||
* when printed.*/
|
||||
se.setStackTrace(ioe.getStackTrace());
|
||||
return se;
|
||||
}
|
||||
// otherwise just return the same exception.
|
||||
return ioe;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param datalen Length of data
|
||||
* @return number of chunks for data of given size
|
||||
*/
|
||||
private int numberOfChunks(long datalen) {
|
||||
return (int) ((datalen + chunkSize - 1)/chunkSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write packet header into {@code pkt}
|
||||
*/
|
||||
private void writePacketHeader(ByteBuffer pkt, int dataLen, int packetLen) {
|
||||
pkt.clear();
|
||||
PacketHeader header = new PacketHeader(packetLen, offset, seqno,
|
||||
(dataLen == 0), dataLen, false);
|
||||
header.putInBuffer(pkt);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read checksum into given buffer
|
||||
* @param buf buffer to read the checksum into
|
||||
* @param checksumOffset offset at which to write the checksum into buf
|
||||
* @param checksumLen length of checksum to write
|
||||
* @throws IOException on error
|
||||
*/
|
||||
private void readChecksum(byte[] buf, final int checksumOffset,
|
||||
final int checksumLen) throws IOException {
|
||||
if (checksumSize <= 0 && checksumIn == null) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
checksumIn.readFully(buf, checksumOffset, checksumLen);
|
||||
} catch (IOException e) {
|
||||
LOG.warn(" Could not read or failed to veirfy checksum for data"
|
||||
+ " at offset " + offset + " for block " + block, e);
|
||||
IOUtils.closeStream(checksumIn);
|
||||
checksumIn = null;
|
||||
if (corruptChecksumOk) {
|
||||
if (checksumOffset < checksumLen) {
|
||||
// Just fill the array with zeros.
|
||||
Arrays.fill(buf, checksumOffset, checksumLen, (byte) 0);
|
||||
}
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a packet with up to maxChunks chunks of data.
|
||||
*
|
||||
* @param pkt buffer used for writing packet data
|
||||
* @param maxChunks maximum number of chunks to send
|
||||
* @param out stream to send data to
|
||||
* @param transferTo use transferTo to send data
|
||||
* @param throttler used for throttling data transfer bandwidth
|
||||
*/
|
||||
private int sendPacket(ByteBuffer pkt, int maxChunks, OutputStream out,
|
||||
boolean transferTo, DataTransferThrottler throttler) throws IOException {
|
||||
int dataLen = (int) Math.min(endOffset - offset,
|
||||
(chunkSize * (long) maxChunks));
|
||||
|
||||
int numChunks = numberOfChunks(dataLen); // Number of chunks be sent in the packet
|
||||
int checksumDataLen = numChunks * checksumSize;
|
||||
int packetLen = dataLen + checksumDataLen + 4;
|
||||
boolean lastDataPacket = offset + dataLen == endOffset && dataLen > 0;
|
||||
|
||||
writePacketHeader(pkt, dataLen, packetLen);
|
||||
|
||||
int checksumOff = pkt.position();
|
||||
byte[] buf = pkt.array();
|
||||
|
||||
if (checksumSize > 0 && checksumIn != null) {
|
||||
readChecksum(buf, checksumOff, checksumDataLen);
|
||||
|
||||
// write in progress that we need to use to get last checksum
|
||||
if (lastDataPacket && lastChunkChecksum != null) {
|
||||
int start = checksumOff + checksumDataLen - checksumSize;
|
||||
byte[] updatedChecksum = lastChunkChecksum.getChecksum();
|
||||
|
||||
if (updatedChecksum != null) {
|
||||
System.arraycopy(updatedChecksum, 0, buf, start, checksumSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int dataOff = checksumOff + checksumDataLen;
|
||||
if (!transferTo) { // normal transfer
|
||||
IOUtils.readFully(blockIn, buf, dataOff, dataLen);
|
||||
|
||||
if (verifyChecksum) {
|
||||
verifyChecksum(buf, dataOff, dataLen, numChunks, checksumOff);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if (transferTo) {
|
||||
SocketOutputStream sockOut = (SocketOutputStream)out;
|
||||
sockOut.write(buf, 0, dataOff); // First write checksum
|
||||
|
||||
// no need to flush. since we know out is not a buffered stream.
|
||||
sockOut.transferToFully(((FileInputStream)blockIn).getChannel(),
|
||||
blockInPosition, dataLen);
|
||||
blockInPosition += dataLen;
|
||||
} else {
|
||||
// normal transfer
|
||||
out.write(buf, 0, dataOff + dataLen);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
/* Exception while writing to the client. Connection closure from
|
||||
* the other end is mostly the case and we do not care much about
|
||||
* it. But other things can go wrong, especially in transferTo(),
|
||||
* which we do not want to ignore.
|
||||
*
|
||||
* The message parsing below should not be considered as a good
|
||||
* coding example. NEVER do it to drive a program logic. NEVER.
|
||||
* It was done here because the NIO throws an IOException for EPIPE.
|
||||
*/
|
||||
String ioem = e.getMessage();
|
||||
if (!ioem.startsWith("Broken pipe") && !ioem.startsWith("Connection reset")) {
|
||||
LOG.error("BlockSender.sendChunks() exception: ", e);
|
||||
}
|
||||
throw ioeToSocketException(e);
|
||||
}
|
||||
|
||||
if (throttler != null) { // rebalancing so throttle
|
||||
throttler.throttle(packetLen);
|
||||
}
|
||||
|
||||
return dataLen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute checksum for chunks and verify the checksum that is read from
|
||||
* the metadata file is correct.
|
||||
*
|
||||
* @param buf buffer that has checksum and data
|
||||
* @param dataOffset position where data is written in the buf
|
||||
* @param datalen length of data
|
||||
* @param numChunks number of chunks corresponding to data
|
||||
* @param checksumOffset offset where checksum is written in the buf
|
||||
* @throws ChecksumException on failed checksum verification
|
||||
*/
|
||||
public void verifyChecksum(final byte[] buf, final int dataOffset,
|
||||
final int datalen, final int numChunks, final int checksumOffset)
|
||||
throws ChecksumException {
|
||||
int dOff = dataOffset;
|
||||
int cOff = checksumOffset;
|
||||
int dLeft = datalen;
|
||||
|
||||
for (int i = 0; i < numChunks; i++) {
|
||||
checksum.reset();
|
||||
int dLen = Math.min(dLeft, chunkSize);
|
||||
checksum.update(buf, dOff, dLen);
|
||||
if (!checksum.compare(buf, cOff)) {
|
||||
long failedPos = offset + datalen - dLeft;
|
||||
throw new ChecksumException("Checksum failed at " + failedPos,
|
||||
failedPos);
|
||||
}
|
||||
dLeft -= dLen;
|
||||
dOff += dLen;
|
||||
cOff += checksumSize;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* sendBlock() is used to read block and its metadata and stream the data to
|
||||
* either a client or to another datanode.
|
||||
*
|
||||
* @param out stream to which the block is written to
|
||||
* @param baseStream optional. if non-null, <code>out</code> is assumed to
|
||||
* be a wrapper over this stream. This enables optimizations for
|
||||
* sending the data, e.g.
|
||||
* {@link SocketOutputStream#transferToFully(FileChannel,
|
||||
* long, int)}.
|
||||
* @return total bytes reads, including crc.
|
||||
*/
|
||||
public long sendBlock(DataOutputStream out, OutputStream baseStream)
|
||||
throws IOException {
|
||||
if (out == null) {
|
||||
throw new IOException( "out stream is null" );
|
||||
}
|
||||
initialOffset = offset;
|
||||
long totalRead = 0;
|
||||
OutputStream streamForSendChunks = out;
|
||||
|
||||
final long startTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0;
|
||||
try {
|
||||
int maxChunksPerPacket;
|
||||
int pktSize = PacketHeader.PKT_HEADER_LEN;
|
||||
boolean transferTo = transferToAllowed && !verifyChecksum
|
||||
&& baseStream instanceof SocketOutputStream
|
||||
&& blockIn instanceof FileInputStream;
|
||||
if (transferTo) {
|
||||
FileChannel fileChannel = ((FileInputStream)blockIn).getChannel();
|
||||
blockInPosition = fileChannel.position();
|
||||
streamForSendChunks = baseStream;
|
||||
maxChunksPerPacket = numberOfChunks(TRANSFERTO_BUFFER_SIZE);
|
||||
|
||||
// Smaller packet size to only hold checksum when doing transferTo
|
||||
pktSize += checksumSize * maxChunksPerPacket;
|
||||
} else {
|
||||
maxChunksPerPacket = Math.max(1,
|
||||
numberOfChunks(HdfsConstants.IO_FILE_BUFFER_SIZE));
|
||||
// Packet size includes both checksum and data
|
||||
pktSize += (chunkSize + checksumSize) * maxChunksPerPacket;
|
||||
}
|
||||
|
||||
ByteBuffer pktBuf = ByteBuffer.allocate(pktSize);
|
||||
|
||||
while (endOffset > offset) {
|
||||
long len = sendPacket(pktBuf, maxChunksPerPacket, streamForSendChunks,
|
||||
transferTo, null);
|
||||
offset += len;
|
||||
totalRead += len + (numberOfChunks(len) * checksumSize);
|
||||
seqno++;
|
||||
}
|
||||
try {
|
||||
// send an empty packet to mark the end of the block
|
||||
sendPacket(pktBuf, maxChunksPerPacket, streamForSendChunks, transferTo,
|
||||
null);
|
||||
out.flush();
|
||||
} catch (IOException e) { //socket error
|
||||
throw ioeToSocketException(e);
|
||||
}
|
||||
blockReadFully = true;
|
||||
} finally {
|
||||
if (clientTraceFmt != null) {
|
||||
final long endTime = System.nanoTime();
|
||||
ClientTraceLog.info(String.format(clientTraceFmt, totalRead,
|
||||
initialOffset, endTime - startTime));
|
||||
}
|
||||
close();
|
||||
}
|
||||
return totalRead;
|
||||
}
|
||||
|
||||
boolean isBlockReadFully() {
|
||||
return blockReadFully;
|
||||
}
|
||||
|
||||
public static interface InputStreamFactory {
|
||||
public InputStream createStream(long offset) throws IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the checksum type that will be used with this block transfer.
|
||||
*/
|
||||
public DataChecksum getChecksum() {
|
||||
return checksum;
|
||||
}
|
||||
|
||||
private static class BlockInputStreamFactory implements InputStreamFactory {
|
||||
private final ExtendedBlock block;
|
||||
private final FsDatasetSpi<?> data;
|
||||
|
||||
private BlockInputStreamFactory(ExtendedBlock block, FsDatasetSpi<?> data) {
|
||||
this.block = block;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream createStream(long offset) throws IOException {
|
||||
return data.getBlockInputStream(block, offset);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import org.apache.hadoop.classification.*;
|
||||
import org.apache.hadoop.fs.*;
|
||||
import org.apache.hadoop.hdfs.protocol.*;
|
||||
import org.apache.hadoop.ipc.StandbyException;
|
||||
import org.apache.hadoop.security.AccessControlException;
|
||||
|
||||
/** Utilities used by RAID for accessing NameNode. */
|
||||
@InterfaceAudience.Private
|
||||
@InterfaceStability.Unstable
|
||||
public class NameNodeRaidUtil {
|
||||
/** Accessing FSDirectory.getFileInfo(..) */
|
||||
public static HdfsFileStatus getFileInfo(final FSDirectory dir,
|
||||
final String src, final boolean resolveLink
|
||||
) throws UnresolvedLinkException {
|
||||
return dir.getFileInfo(src, resolveLink);
|
||||
}
|
||||
|
||||
/** Accessing FSNamesystem.getFileInfo(..)
|
||||
* @throws StandbyException */
|
||||
public static HdfsFileStatus getFileInfo(final FSNamesystem namesystem,
|
||||
final String src, final boolean resolveLink
|
||||
) throws AccessControlException, UnresolvedLinkException, StandbyException {
|
||||
return namesystem.getFileInfo(src, resolveLink);
|
||||
}
|
||||
|
||||
/** Accessing FSNamesystem.getBlockLocations(..) */
|
||||
public static LocatedBlocks getBlockLocations(final FSNamesystem namesystem,
|
||||
final String src, final long offset, final long length,
|
||||
final boolean doAccessTime, final boolean needBlockToken
|
||||
) throws FileNotFoundException, UnresolvedLinkException, IOException {
|
||||
return namesystem.getBlockLocations(src, offset, length,
|
||||
doAccessTime, needBlockToken, true);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,840 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.Socket;
|
||||
import java.nio.channels.SocketChannel;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
||||
import org.apache.hadoop.util.DataChecksum;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.protocol.datatransfer.*;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.FSConstants;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.datanode.RaidBlockSender;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
import org.apache.hadoop.net.NetUtils;
|
||||
|
||||
import org.apache.hadoop.raid.RaidNode;
|
||||
import org.apache.hadoop.raid.RaidUtils;
|
||||
|
||||
|
||||
/**
|
||||
* contains the core functionality of the block fixer
|
||||
*
|
||||
* configuration options:
|
||||
* raid.blockfix.classname - the class name of the block fixer
|
||||
* implementation to use
|
||||
*
|
||||
* raid.blockfix.interval - interval between checks for corrupt files
|
||||
*
|
||||
* raid.blockfix.history.interval - interval before fixing same file again
|
||||
*
|
||||
* raid.blockfix.read.timeout - read time out
|
||||
*
|
||||
* raid.blockfix.write.timeout - write time out
|
||||
*/
|
||||
public abstract class BlockFixer extends Configured implements Runnable {
|
||||
|
||||
public static final String BLOCKFIX_CLASSNAME = "raid.blockfix.classname";
|
||||
public static final String BLOCKFIX_INTERVAL = "raid.blockfix.interval";
|
||||
public static final String BLOCKFIX_HISTORY_INTERVAL =
|
||||
"raid.blockfix.history.interval";
|
||||
public static final String BLOCKFIX_READ_TIMEOUT =
|
||||
"raid.blockfix.read.timeout";
|
||||
public static final String BLOCKFIX_WRITE_TIMEOUT =
|
||||
"raid.blockfix.write.timeout";
|
||||
|
||||
public static final long DEFAULT_BLOCKFIX_INTERVAL = 60 * 1000; // 1 min
|
||||
public static final long DEFAULT_BLOCKFIX_HISTORY_INTERVAL =
|
||||
60 * 60 * 1000; // 60 mins
|
||||
|
||||
public static BlockFixer createBlockFixer(Configuration conf)
|
||||
throws ClassNotFoundException {
|
||||
try {
|
||||
// default to distributed block fixer
|
||||
Class<?> blockFixerClass =
|
||||
conf.getClass(BLOCKFIX_CLASSNAME, DistBlockFixer.class);
|
||||
if (!BlockFixer.class.isAssignableFrom(blockFixerClass)) {
|
||||
throw new ClassNotFoundException("not an implementation of blockfixer");
|
||||
}
|
||||
Constructor<?> constructor =
|
||||
blockFixerClass.getConstructor(new Class[] {Configuration.class} );
|
||||
return (BlockFixer) constructor.newInstance(conf);
|
||||
} catch (NoSuchMethodException e) {
|
||||
throw new ClassNotFoundException("cannot construct blockfixer", e);
|
||||
} catch (InstantiationException e) {
|
||||
throw new ClassNotFoundException("cannot construct blockfixer", e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new ClassNotFoundException("cannot construct blockfixer", e);
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new ClassNotFoundException("cannot construct blockfixer", e);
|
||||
}
|
||||
}
|
||||
|
||||
private long numFilesFixed = 0;
|
||||
|
||||
public volatile boolean running = true;
|
||||
|
||||
// interval between checks for corrupt files
|
||||
protected long blockFixInterval;
|
||||
|
||||
// interval before fixing same file again
|
||||
protected long historyInterval;
|
||||
|
||||
public BlockFixer(Configuration conf) {
|
||||
super(conf);
|
||||
blockFixInterval =
|
||||
getConf().getLong(BLOCKFIX_INTERVAL, DEFAULT_BLOCKFIX_INTERVAL);
|
||||
historyInterval =
|
||||
getConf().getLong(BLOCKFIX_HISTORY_INTERVAL,
|
||||
DEFAULT_BLOCKFIX_HISTORY_INTERVAL);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract void run();
|
||||
|
||||
/**
|
||||
* returns the number of files that have been fixed by this block fixer
|
||||
*/
|
||||
public synchronized long filesFixed() {
|
||||
return numFilesFixed;
|
||||
}
|
||||
|
||||
/**
|
||||
* increments the number of files that have been fixed by this block fixer
|
||||
*/
|
||||
protected synchronized void incrFilesFixed() {
|
||||
numFilesFixed++;
|
||||
}
|
||||
|
||||
/**
|
||||
* increments the number of files that have been fixed by this block fixer
|
||||
*/
|
||||
protected synchronized void incrFilesFixed(long incr) {
|
||||
if (incr < 0) {
|
||||
throw new IllegalArgumentException("cannot increment by negative value " +
|
||||
incr);
|
||||
}
|
||||
|
||||
numFilesFixed += incr;
|
||||
}
|
||||
|
||||
static boolean isSourceFile(Path p, String[] destPrefixes) {
|
||||
String pathStr = p.toUri().getPath();
|
||||
for (String destPrefix: destPrefixes) {
|
||||
if (pathStr.startsWith(destPrefix)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void filterUnfixableSourceFiles(Iterator<Path> it) throws IOException {
|
||||
String xorPrefix = RaidNode.xorDestinationPath(getConf()).toUri().getPath();
|
||||
if (!xorPrefix.endsWith(Path.SEPARATOR)) {
|
||||
xorPrefix += Path.SEPARATOR;
|
||||
}
|
||||
String rsPrefix = RaidNode.rsDestinationPath(getConf()).toUri().getPath();
|
||||
if (!rsPrefix.endsWith(Path.SEPARATOR)) {
|
||||
rsPrefix += Path.SEPARATOR;
|
||||
}
|
||||
String[] destPrefixes = new String[]{xorPrefix, rsPrefix};
|
||||
while (it.hasNext()) {
|
||||
Path p = it.next();
|
||||
if (isSourceFile(p, destPrefixes) &&
|
||||
RaidNode.xorParityForSource(p, getConf()) == null &&
|
||||
RaidNode.rsParityForSource(p, getConf()) == null) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* this class implements the actual fixing functionality
|
||||
* we keep this in a separate class so that
|
||||
* the distributed block fixer can use it
|
||||
*/
|
||||
static class BlockFixerHelper extends Configured {
|
||||
|
||||
public static final Log LOG = LogFactory.getLog(BlockFixer.
|
||||
BlockFixerHelper.class);
|
||||
|
||||
private String xorPrefix;
|
||||
private String rsPrefix;
|
||||
private XOREncoder xorEncoder;
|
||||
private XORDecoder xorDecoder;
|
||||
private ReedSolomonEncoder rsEncoder;
|
||||
private ReedSolomonDecoder rsDecoder;
|
||||
|
||||
public BlockFixerHelper(Configuration conf) throws IOException {
|
||||
super(conf);
|
||||
|
||||
xorPrefix = RaidNode.xorDestinationPath(getConf()).toUri().getPath();
|
||||
if (!xorPrefix.endsWith(Path.SEPARATOR)) {
|
||||
xorPrefix += Path.SEPARATOR;
|
||||
}
|
||||
rsPrefix = RaidNode.rsDestinationPath(getConf()).toUri().getPath();
|
||||
if (!rsPrefix.endsWith(Path.SEPARATOR)) {
|
||||
rsPrefix += Path.SEPARATOR;
|
||||
}
|
||||
int stripeLength = RaidNode.getStripeLength(getConf());
|
||||
xorEncoder = new XOREncoder(getConf(), stripeLength);
|
||||
xorDecoder = new XORDecoder(getConf(), stripeLength);
|
||||
int parityLength = RaidNode.rsParityLength(getConf());
|
||||
rsEncoder = new ReedSolomonEncoder(getConf(), stripeLength, parityLength);
|
||||
rsDecoder = new ReedSolomonDecoder(getConf(), stripeLength, parityLength);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* checks whether file is xor parity file
|
||||
*/
|
||||
boolean isXorParityFile(Path p) {
|
||||
String pathStr = p.toUri().getPath();
|
||||
if (pathStr.contains(RaidNode.HAR_SUFFIX)) {
|
||||
return false;
|
||||
}
|
||||
return pathStr.startsWith(xorPrefix);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks whether file is rs parity file
|
||||
*/
|
||||
boolean isRsParityFile(Path p) {
|
||||
String pathStr = p.toUri().getPath();
|
||||
if (pathStr.contains(RaidNode.HAR_SUFFIX)) {
|
||||
return false;
|
||||
}
|
||||
return pathStr.startsWith(rsPrefix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix a file, do not report progess.
|
||||
*
|
||||
* @return true if file has been fixed, false if no fixing
|
||||
* was necessary or possible.
|
||||
*/
|
||||
boolean fixFile(Path srcPath) throws IOException {
|
||||
return fixFile(srcPath, new RaidUtils.DummyProgressable());
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix a file, report progess.
|
||||
*
|
||||
* @return true if file has been fixed, false if no fixing
|
||||
* was necessary or possible.
|
||||
*/
|
||||
boolean fixFile(Path srcPath, Progressable progress) throws IOException {
|
||||
|
||||
if (RaidNode.isParityHarPartFile(srcPath)) {
|
||||
return processCorruptParityHarPartFile(srcPath, progress);
|
||||
}
|
||||
|
||||
// The corrupted file is a XOR parity file
|
||||
if (isXorParityFile(srcPath)) {
|
||||
return processCorruptParityFile(srcPath, xorEncoder, progress);
|
||||
}
|
||||
|
||||
// The corrupted file is a ReedSolomon parity file
|
||||
if (isRsParityFile(srcPath)) {
|
||||
return processCorruptParityFile(srcPath, rsEncoder, progress);
|
||||
}
|
||||
|
||||
// The corrupted file is a source file
|
||||
RaidNode.ParityFilePair ppair =
|
||||
RaidNode.xorParityForSource(srcPath, getConf());
|
||||
Decoder decoder = null;
|
||||
if (ppair != null) {
|
||||
decoder = xorDecoder;
|
||||
} else {
|
||||
ppair = RaidNode.rsParityForSource(srcPath, getConf());
|
||||
if (ppair != null) {
|
||||
decoder = rsDecoder;
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a parity file, process the file and fix it.
|
||||
if (ppair != null) {
|
||||
return processCorruptFile(srcPath, ppair, decoder, progress);
|
||||
}
|
||||
|
||||
// there was nothing to do
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts source files ahead of parity files.
|
||||
*/
|
||||
void sortCorruptFiles(List<Path> files) {
|
||||
// TODO: We should first fix the files that lose more blocks
|
||||
Comparator<Path> comp = new Comparator<Path>() {
|
||||
public int compare(Path p1, Path p2) {
|
||||
if (isXorParityFile(p2) || isRsParityFile(p2)) {
|
||||
// If p2 is a parity file, p1 is smaller.
|
||||
return -1;
|
||||
}
|
||||
if (isXorParityFile(p1) || isRsParityFile(p1)) {
|
||||
// If p1 is a parity file, p2 is smaller.
|
||||
return 1;
|
||||
}
|
||||
// If both are source files, they are equal.
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
Collections.sort(files, comp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a DistributedFileSystem hosting the path supplied.
|
||||
*/
|
||||
protected DistributedFileSystem getDFS(Path p) throws IOException {
|
||||
return (DistributedFileSystem) p.getFileSystem(getConf());
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads through a corrupt source file fixing corrupt blocks on the way.
|
||||
* @param srcPath Path identifying the corrupt file.
|
||||
* @throws IOException
|
||||
* @return true if file has been fixed, false if no fixing
|
||||
* was necessary or possible.
|
||||
*/
|
||||
boolean processCorruptFile(Path srcPath, RaidNode.ParityFilePair parityPair,
|
||||
Decoder decoder, Progressable progress)
|
||||
throws IOException {
|
||||
LOG.info("Processing corrupt file " + srcPath);
|
||||
|
||||
DistributedFileSystem srcFs = getDFS(srcPath);
|
||||
FileStatus srcStat = srcFs.getFileStatus(srcPath);
|
||||
long blockSize = srcStat.getBlockSize();
|
||||
long srcFileSize = srcStat.getLen();
|
||||
String uriPath = srcPath.toUri().getPath();
|
||||
|
||||
int numBlocksFixed = 0;
|
||||
List<LocatedBlock> corrupt =
|
||||
RaidDFSUtil.corruptBlocksInFile(srcFs, uriPath, 0, srcFileSize);
|
||||
if (corrupt.size() == 0) {
|
||||
return false;
|
||||
}
|
||||
for (LocatedBlock lb: corrupt) {
|
||||
ExtendedBlock corruptBlock = lb.getBlock();
|
||||
long corruptOffset = lb.getStartOffset();
|
||||
|
||||
LOG.info("Found corrupt block " + corruptBlock +
|
||||
", offset " + corruptOffset);
|
||||
|
||||
final long blockContentsSize =
|
||||
Math.min(blockSize, srcFileSize - corruptOffset);
|
||||
File localBlockFile =
|
||||
File.createTempFile(corruptBlock.getBlockName(), ".tmp");
|
||||
localBlockFile.deleteOnExit();
|
||||
|
||||
try {
|
||||
decoder.recoverBlockToFile(srcFs, srcPath, parityPair.getFileSystem(),
|
||||
parityPair.getPath(), blockSize,
|
||||
corruptOffset, localBlockFile,
|
||||
blockContentsSize);
|
||||
|
||||
// We have a the contents of the block, send them.
|
||||
DatanodeInfo datanode = chooseDatanode(lb.getLocations());
|
||||
computeMetadataAndSendFixedBlock(datanode, localBlockFile,
|
||||
lb, blockContentsSize);
|
||||
numBlocksFixed++;
|
||||
} finally {
|
||||
localBlockFile.delete();
|
||||
}
|
||||
progress.progress();
|
||||
}
|
||||
LOG.info("Fixed " + numBlocksFixed + " blocks in " + srcPath);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fixes corrupt blocks in a parity file.
|
||||
* This function uses the corresponding source file to regenerate parity
|
||||
* file blocks.
|
||||
* @return true if file has been fixed, false if no fixing
|
||||
* was necessary or possible.
|
||||
*/
|
||||
boolean processCorruptParityFile(Path parityPath, Encoder encoder,
|
||||
Progressable progress)
|
||||
throws IOException {
|
||||
LOG.info("Processing corrupt file " + parityPath);
|
||||
Path srcPath = sourcePathFromParityPath(parityPath);
|
||||
if (srcPath == null) {
|
||||
LOG.warn("Unusable parity file " + parityPath);
|
||||
return false;
|
||||
}
|
||||
|
||||
DistributedFileSystem parityFs = getDFS(parityPath);
|
||||
FileStatus parityStat = parityFs.getFileStatus(parityPath);
|
||||
long blockSize = parityStat.getBlockSize();
|
||||
long parityFileSize = parityStat.getLen();
|
||||
FileStatus srcStat = getDFS(srcPath).getFileStatus(srcPath);
|
||||
long srcFileSize = srcStat.getLen();
|
||||
|
||||
// Check timestamp.
|
||||
if (srcStat.getModificationTime() != parityStat.getModificationTime()) {
|
||||
LOG.info("Mismatching timestamp for " + srcPath + " and " + parityPath +
|
||||
", moving on...");
|
||||
return false;
|
||||
}
|
||||
|
||||
String uriPath = parityPath.toUri().getPath();
|
||||
int numBlocksFixed = 0;
|
||||
List<LocatedBlock> corrupt =
|
||||
RaidDFSUtil.corruptBlocksInFile(parityFs, uriPath, 0, parityFileSize);
|
||||
if (corrupt.size() == 0) {
|
||||
return false;
|
||||
}
|
||||
for (LocatedBlock lb: corrupt) {
|
||||
ExtendedBlock corruptBlock = lb.getBlock();
|
||||
long corruptOffset = lb.getStartOffset();
|
||||
|
||||
LOG.info("Found corrupt block " + corruptBlock +
|
||||
", offset " + corruptOffset);
|
||||
|
||||
File localBlockFile =
|
||||
File.createTempFile(corruptBlock.getBlockName(), ".tmp");
|
||||
localBlockFile.deleteOnExit();
|
||||
|
||||
try {
|
||||
encoder.recoverParityBlockToFile(parityFs, srcPath, srcFileSize,
|
||||
blockSize, parityPath,
|
||||
corruptOffset, localBlockFile);
|
||||
// We have a the contents of the block, send them.
|
||||
DatanodeInfo datanode = chooseDatanode(lb.getLocations());
|
||||
computeMetadataAndSendFixedBlock(datanode, localBlockFile, lb,
|
||||
blockSize);
|
||||
|
||||
numBlocksFixed++;
|
||||
} finally {
|
||||
localBlockFile.delete();
|
||||
}
|
||||
progress.progress();
|
||||
}
|
||||
LOG.info("Fixed " + numBlocksFixed + " blocks in " + parityPath);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads through a parity HAR part file, fixing corrupt blocks on the way.
|
||||
* A HAR block can contain many file blocks, as long as the HAR part file
|
||||
* block size is a multiple of the file block size.
|
||||
* @return true if file has been fixed, false if no fixing
|
||||
* was necessary or possible.
|
||||
*/
|
||||
boolean processCorruptParityHarPartFile(Path partFile,
|
||||
Progressable progress)
|
||||
throws IOException {
|
||||
LOG.info("Processing corrupt file " + partFile);
|
||||
// Get some basic information.
|
||||
DistributedFileSystem dfs = getDFS(partFile);
|
||||
FileStatus partFileStat = dfs.getFileStatus(partFile);
|
||||
long partFileSize = partFileStat.getLen();
|
||||
long partFileBlockSize = partFileStat.getBlockSize();
|
||||
LOG.info(partFile + " has block size " + partFileBlockSize);
|
||||
|
||||
// Find the path to the index file.
|
||||
// Parity file HARs are only one level deep, so the index files is at the
|
||||
// same level as the part file.
|
||||
String harDirectory = partFile.toUri().getPath(); // Temporarily.
|
||||
harDirectory =
|
||||
harDirectory.substring(0, harDirectory.lastIndexOf(Path.SEPARATOR));
|
||||
Path indexFile = new Path(harDirectory + "/" + HarIndex.indexFileName);
|
||||
FileStatus indexStat = dfs.getFileStatus(indexFile);
|
||||
// Parses through the HAR index file.
|
||||
HarIndex harIndex = new HarIndex(dfs.open(indexFile), indexStat.getLen());
|
||||
|
||||
String uriPath = partFile.toUri().getPath();
|
||||
int numBlocksFixed = 0;
|
||||
List<LocatedBlock> corrupt =
|
||||
RaidDFSUtil.corruptBlocksInFile(dfs, uriPath, 0, partFileSize);
|
||||
if (corrupt.size() == 0) {
|
||||
return false;
|
||||
}
|
||||
for (LocatedBlock lb: corrupt) {
|
||||
ExtendedBlock corruptBlock = lb.getBlock();
|
||||
long corruptOffset = lb.getStartOffset();
|
||||
|
||||
File localBlockFile =
|
||||
File.createTempFile(corruptBlock.getBlockName(), ".tmp");
|
||||
localBlockFile.deleteOnExit();
|
||||
processCorruptParityHarPartBlock(dfs, partFile, corruptBlock,
|
||||
corruptOffset, partFileStat, harIndex,
|
||||
localBlockFile, progress);
|
||||
// Now we have recovered the part file block locally, send it.
|
||||
try {
|
||||
DatanodeInfo datanode = chooseDatanode(lb.getLocations());
|
||||
computeMetadataAndSendFixedBlock(datanode, localBlockFile,
|
||||
lb, localBlockFile.length());
|
||||
numBlocksFixed++;
|
||||
} finally {
|
||||
localBlockFile.delete();
|
||||
}
|
||||
progress.progress();
|
||||
}
|
||||
LOG.info("Fixed " + numBlocksFixed + " blocks in " + partFile);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This fixes a single part file block by recovering in sequence each
|
||||
* parity block in the part file block.
|
||||
*/
|
||||
private void processCorruptParityHarPartBlock(FileSystem dfs, Path partFile,
|
||||
ExtendedBlock corruptBlock,
|
||||
long corruptOffset,
|
||||
FileStatus partFileStat,
|
||||
HarIndex harIndex,
|
||||
File localBlockFile,
|
||||
Progressable progress)
|
||||
throws IOException {
|
||||
String partName = partFile.toUri().getPath(); // Temporarily.
|
||||
partName = partName.substring(1 + partName.lastIndexOf(Path.SEPARATOR));
|
||||
|
||||
OutputStream out = new FileOutputStream(localBlockFile);
|
||||
|
||||
try {
|
||||
// A HAR part file block could map to several parity files. We need to
|
||||
// use all of them to recover this block.
|
||||
final long corruptEnd = Math.min(corruptOffset +
|
||||
partFileStat.getBlockSize(),
|
||||
partFileStat.getLen());
|
||||
for (long offset = corruptOffset; offset < corruptEnd; ) {
|
||||
HarIndex.IndexEntry entry = harIndex.findEntry(partName, offset);
|
||||
if (entry == null) {
|
||||
String msg = "Corrupt index file has no matching index entry for " +
|
||||
partName + ":" + offset;
|
||||
LOG.warn(msg);
|
||||
throw new IOException(msg);
|
||||
}
|
||||
Path parityFile = new Path(entry.fileName);
|
||||
Encoder encoder;
|
||||
if (isXorParityFile(parityFile)) {
|
||||
encoder = xorEncoder;
|
||||
} else if (isRsParityFile(parityFile)) {
|
||||
encoder = rsEncoder;
|
||||
} else {
|
||||
String msg = "Could not figure out parity file correctly";
|
||||
LOG.warn(msg);
|
||||
throw new IOException(msg);
|
||||
}
|
||||
Path srcFile = sourcePathFromParityPath(parityFile);
|
||||
FileStatus srcStat = dfs.getFileStatus(srcFile);
|
||||
if (srcStat.getModificationTime() != entry.mtime) {
|
||||
String msg = "Modification times of " + parityFile + " and " +
|
||||
srcFile + " do not match.";
|
||||
LOG.warn(msg);
|
||||
throw new IOException(msg);
|
||||
}
|
||||
long corruptOffsetInParity = offset - entry.startOffset;
|
||||
LOG.info(partFile + ":" + offset + " maps to " +
|
||||
parityFile + ":" + corruptOffsetInParity +
|
||||
" and will be recovered from " + srcFile);
|
||||
encoder.recoverParityBlockToStream(dfs, srcFile, srcStat.getLen(),
|
||||
srcStat.getBlockSize(), parityFile,
|
||||
corruptOffsetInParity, out);
|
||||
// Finished recovery of one parity block. Since a parity block has the
|
||||
// same size as a source block, we can move offset by source block size.
|
||||
offset += srcStat.getBlockSize();
|
||||
LOG.info("Recovered " + srcStat.getBlockSize() + " part file bytes ");
|
||||
if (offset > corruptEnd) {
|
||||
String msg =
|
||||
"Recovered block spills across part file blocks. Cannot continue.";
|
||||
throw new IOException(msg);
|
||||
}
|
||||
progress.progress();
|
||||
}
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose a datanode (hostname:portnumber). The datanode is chosen at
|
||||
* random from the live datanodes.
|
||||
* @param locationsToAvoid locations to avoid.
|
||||
* @return A datanode
|
||||
* @throws IOException
|
||||
*/
|
||||
private DatanodeInfo chooseDatanode(DatanodeInfo[] locationsToAvoid)
|
||||
throws IOException {
|
||||
DistributedFileSystem dfs = getDFS(new Path("/"));
|
||||
DatanodeInfo[] live =
|
||||
dfs.getClient().datanodeReport(DatanodeReportType.LIVE);
|
||||
LOG.info("Choosing a datanode from " + live.length +
|
||||
" live nodes while avoiding " + locationsToAvoid.length);
|
||||
Random rand = new Random();
|
||||
DatanodeInfo chosen = null;
|
||||
int maxAttempts = 1000;
|
||||
for (int i = 0; i < maxAttempts && chosen == null; i++) {
|
||||
int idx = rand.nextInt(live.length);
|
||||
chosen = live[idx];
|
||||
for (DatanodeInfo avoid: locationsToAvoid) {
|
||||
if (chosen.getName().equals(avoid.getName())) {
|
||||
LOG.info("Avoiding " + avoid.getName());
|
||||
chosen = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (chosen == null) {
|
||||
throw new IOException("Could not choose datanode");
|
||||
}
|
||||
LOG.info("Choosing datanode " + chosen.getName());
|
||||
return chosen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads data from the data stream provided and computes metadata.
|
||||
*/
|
||||
static DataInputStream computeMetadata(Configuration conf,
|
||||
InputStream dataStream)
|
||||
throws IOException {
|
||||
ByteArrayOutputStream mdOutBase = new ByteArrayOutputStream(1024*1024);
|
||||
DataOutputStream mdOut = new DataOutputStream(mdOutBase);
|
||||
|
||||
// First, write out the version.
|
||||
mdOut.writeShort(BlockMetadataHeader.VERSION);
|
||||
|
||||
// Create a summer and write out its header.
|
||||
int bytesPerChecksum = conf.getInt("dfs.bytes-per-checksum", 512);
|
||||
DataChecksum sum =
|
||||
DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
|
||||
bytesPerChecksum);
|
||||
sum.writeHeader(mdOut);
|
||||
|
||||
// Buffer to read in a chunk of data.
|
||||
byte[] buf = new byte[bytesPerChecksum];
|
||||
// Buffer to store the checksum bytes.
|
||||
byte[] chk = new byte[sum.getChecksumSize()];
|
||||
|
||||
// Read data till we reach the end of the input stream.
|
||||
int bytesSinceFlush = 0;
|
||||
while (true) {
|
||||
// Read some bytes.
|
||||
int bytesRead = dataStream.read(buf, bytesSinceFlush,
|
||||
bytesPerChecksum-bytesSinceFlush);
|
||||
if (bytesRead == -1) {
|
||||
if (bytesSinceFlush > 0) {
|
||||
boolean reset = true;
|
||||
sum.writeValue(chk, 0, reset); // This also resets the sum.
|
||||
// Write the checksum to the stream.
|
||||
mdOut.write(chk, 0, chk.length);
|
||||
bytesSinceFlush = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Update the checksum.
|
||||
sum.update(buf, bytesSinceFlush, bytesRead);
|
||||
bytesSinceFlush += bytesRead;
|
||||
|
||||
// Flush the checksum if necessary.
|
||||
if (bytesSinceFlush == bytesPerChecksum) {
|
||||
boolean reset = true;
|
||||
sum.writeValue(chk, 0, reset); // This also resets the sum.
|
||||
// Write the checksum to the stream.
|
||||
mdOut.write(chk, 0, chk.length);
|
||||
bytesSinceFlush = 0;
|
||||
}
|
||||
}
|
||||
|
||||
byte[] mdBytes = mdOutBase.toByteArray();
|
||||
return new DataInputStream(new ByteArrayInputStream(mdBytes));
|
||||
}
|
||||
|
||||
private void computeMetadataAndSendFixedBlock(DatanodeInfo datanode,
|
||||
File localBlockFile,
|
||||
LocatedBlock block,
|
||||
long blockSize)
|
||||
throws IOException {
|
||||
|
||||
LOG.info("Computing metdata");
|
||||
InputStream blockContents = null;
|
||||
DataInputStream blockMetadata = null;
|
||||
try {
|
||||
blockContents = new FileInputStream(localBlockFile);
|
||||
blockMetadata = computeMetadata(getConf(), blockContents);
|
||||
blockContents.close();
|
||||
// Reopen
|
||||
blockContents = new FileInputStream(localBlockFile);
|
||||
sendFixedBlock(datanode, blockContents, blockMetadata, block,
|
||||
blockSize);
|
||||
} finally {
|
||||
if (blockContents != null) {
|
||||
blockContents.close();
|
||||
blockContents = null;
|
||||
}
|
||||
if (blockMetadata != null) {
|
||||
blockMetadata.close();
|
||||
blockMetadata = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a generated block to a datanode.
|
||||
* @param datanode Chosen datanode name in host:port form.
|
||||
* @param blockContents Stream with the block contents.
|
||||
* @param corruptBlock Block identifying the block to be sent.
|
||||
* @param blockSize size of the block.
|
||||
* @throws IOException
|
||||
*/
|
||||
private void sendFixedBlock(DatanodeInfo datanode,
|
||||
final InputStream blockContents,
|
||||
DataInputStream metadataIn,
|
||||
LocatedBlock block, long blockSize)
|
||||
throws IOException {
|
||||
InetSocketAddress target = NetUtils.createSocketAddr(datanode.getName());
|
||||
Socket sock = SocketChannel.open().socket();
|
||||
|
||||
int readTimeout =
|
||||
getConf().getInt(BLOCKFIX_READ_TIMEOUT,
|
||||
HdfsServerConstants.READ_TIMEOUT);
|
||||
NetUtils.connect(sock, target, readTimeout);
|
||||
sock.setSoTimeout(readTimeout);
|
||||
|
||||
int writeTimeout = getConf().getInt(BLOCKFIX_WRITE_TIMEOUT,
|
||||
HdfsServerConstants.WRITE_TIMEOUT);
|
||||
|
||||
OutputStream baseStream = NetUtils.getOutputStream(sock, writeTimeout);
|
||||
DataOutputStream out =
|
||||
new DataOutputStream(new BufferedOutputStream(baseStream,
|
||||
HdfsConstants.
|
||||
SMALL_BUFFER_SIZE));
|
||||
|
||||
boolean corruptChecksumOk = false;
|
||||
boolean chunkOffsetOK = false;
|
||||
boolean verifyChecksum = true;
|
||||
boolean transferToAllowed = false;
|
||||
|
||||
try {
|
||||
LOG.info("Sending block " + block.getBlock() +
|
||||
" from " + sock.getLocalSocketAddress().toString() +
|
||||
" to " + sock.getRemoteSocketAddress().toString() +
|
||||
" " + blockSize + " bytes");
|
||||
RaidBlockSender blockSender =
|
||||
new RaidBlockSender(block.getBlock(), blockSize, 0, blockSize,
|
||||
corruptChecksumOk, chunkOffsetOK, verifyChecksum,
|
||||
transferToAllowed, metadataIn,
|
||||
new RaidBlockSender.InputStreamFactory() {
|
||||
@Override
|
||||
public InputStream
|
||||
createStream(long offset) throws IOException {
|
||||
// we are passing 0 as the offset above,
|
||||
// so we can safely ignore
|
||||
// the offset passed
|
||||
return blockContents;
|
||||
}
|
||||
});
|
||||
|
||||
DatanodeInfo[] nodes = new DatanodeInfo[]{datanode};
|
||||
DataChecksum checksum = blockSender.getChecksum();
|
||||
new Sender(out).writeBlock(block.getBlock(), block.getBlockToken(), "",
|
||||
nodes, null, BlockConstructionStage.PIPELINE_SETUP_CREATE,
|
||||
1, 0L, blockSize, 0L, DataChecksum.newDataChecksum(
|
||||
checksum.getChecksumType(), checksum.getBytesPerChecksum()));
|
||||
blockSender.sendBlock(out, baseStream);
|
||||
|
||||
LOG.info("Sent block " + block.getBlock() + " to " + datanode.getName());
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the source file corresponding to a parity file
|
||||
*/
|
||||
Path sourcePathFromParityPath(Path parityPath) {
|
||||
String parityPathStr = parityPath.toUri().getPath();
|
||||
if (parityPathStr.startsWith(xorPrefix)) {
|
||||
// Remove the prefix to get the source file.
|
||||
String src = parityPathStr.replaceFirst(xorPrefix, "/");
|
||||
return new Path(src);
|
||||
} else if (parityPathStr.startsWith(rsPrefix)) {
|
||||
// Remove the prefix to get the source file.
|
||||
String src = parityPathStr.replaceFirst(rsPrefix, "/");
|
||||
return new Path(src);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the corrupt blocks in a file.
|
||||
*/
|
||||
List<LocatedBlock> corruptBlocksInFile(DistributedFileSystem fs,
|
||||
String uriPath, FileStatus stat)
|
||||
throws IOException {
|
||||
List<LocatedBlock> corrupt = new LinkedList<LocatedBlock>();
|
||||
LocatedBlocks locatedBlocks =
|
||||
RaidDFSUtil.getBlockLocations(fs, uriPath, 0, stat.getLen());
|
||||
for (LocatedBlock b: locatedBlocks.getLocatedBlocks()) {
|
||||
if (b.isCorrupt() ||
|
||||
(b.getLocations().length == 0 && b.getBlockSize() > 0)) {
|
||||
corrupt.add(b);
|
||||
}
|
||||
}
|
||||
return corrupt;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,408 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.w3c.dom.Text;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
||||
import org.apache.hadoop.raid.protocol.PolicyList;
|
||||
|
||||
/**
|
||||
* Maintains the configuration xml file that is read into memory.
|
||||
*/
|
||||
class ConfigManager {
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.ConfigManager");
|
||||
|
||||
/** Time to wait between checks of the config file */
|
||||
public static final long RELOAD_INTERVAL = 10 * 1000;
|
||||
|
||||
/** Time to wait between successive runs of all policies */
|
||||
public static final long RESCAN_INTERVAL = 3600 * 1000;
|
||||
|
||||
public static final long HAR_PARTFILE_SIZE = 10 * 1024 * 1024 * 1024l;
|
||||
|
||||
public static final int DISTRAID_MAX_JOBS = 10;
|
||||
|
||||
public static final int DISTRAID_MAX_FILES = 10000;
|
||||
|
||||
/**
|
||||
* Time to wait after the config file has been modified before reloading it
|
||||
* (this is done to prevent loading a file that hasn't been fully written).
|
||||
*/
|
||||
public static final long RELOAD_WAIT = 5 * 1000;
|
||||
|
||||
private Configuration conf; // Hadoop configuration
|
||||
private String configFileName; // Path to config XML file
|
||||
|
||||
private long lastReloadAttempt; // Last time we tried to reload the config file
|
||||
private long lastSuccessfulReload; // Last time we successfully reloaded config
|
||||
private boolean lastReloadAttemptFailed = false;
|
||||
private long reloadInterval = RELOAD_INTERVAL;
|
||||
private long periodicity; // time between runs of all policies
|
||||
private long harPartfileSize;
|
||||
private int maxJobsPerPolicy; // Max no. of jobs running simultaneously for
|
||||
// a job.
|
||||
private int maxFilesPerJob; // Max no. of files raided by a job.
|
||||
|
||||
// Reload the configuration
|
||||
private boolean doReload;
|
||||
private Thread reloadThread;
|
||||
private volatile boolean running = false;
|
||||
|
||||
// Collection of all configured policies.
|
||||
Collection<PolicyList> allPolicies = new ArrayList<PolicyList>();
|
||||
|
||||
public ConfigManager(Configuration conf) throws IOException, SAXException,
|
||||
RaidConfigurationException, ClassNotFoundException, ParserConfigurationException {
|
||||
this.conf = conf;
|
||||
this.configFileName = conf.get("raid.config.file");
|
||||
this.doReload = conf.getBoolean("raid.config.reload", true);
|
||||
this.reloadInterval = conf.getLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
||||
this.periodicity = conf.getLong("raid.policy.rescan.interval", RESCAN_INTERVAL);
|
||||
this.harPartfileSize = conf.getLong("raid.har.partfile.size", HAR_PARTFILE_SIZE);
|
||||
this.maxJobsPerPolicy = conf.getInt("raid.distraid.max.jobs",
|
||||
DISTRAID_MAX_JOBS);
|
||||
this.maxFilesPerJob = conf.getInt("raid.distraid.max.files",
|
||||
DISTRAID_MAX_FILES);
|
||||
if (configFileName == null) {
|
||||
String msg = "No raid.config.file given in conf - " +
|
||||
"the Hadoop Raid utility cannot run. Aborting....";
|
||||
LOG.warn(msg);
|
||||
throw new IOException(msg);
|
||||
}
|
||||
reloadConfigs();
|
||||
lastSuccessfulReload = RaidNode.now();
|
||||
lastReloadAttempt = RaidNode.now();
|
||||
running = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reload config file if it hasn't been loaded in a while
|
||||
* Returns true if the file was reloaded.
|
||||
*/
|
||||
public synchronized boolean reloadConfigsIfNecessary() {
|
||||
long time = RaidNode.now();
|
||||
if (time > lastReloadAttempt + reloadInterval) {
|
||||
lastReloadAttempt = time;
|
||||
try {
|
||||
File file = new File(configFileName);
|
||||
long lastModified = file.lastModified();
|
||||
if (lastModified > lastSuccessfulReload &&
|
||||
time > lastModified + RELOAD_WAIT) {
|
||||
reloadConfigs();
|
||||
lastSuccessfulReload = time;
|
||||
lastReloadAttemptFailed = false;
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (!lastReloadAttemptFailed) {
|
||||
LOG.error("Failed to reload config file - " +
|
||||
"will use existing configuration.", e);
|
||||
}
|
||||
lastReloadAttemptFailed = true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the in-memory data structures from the config file. This file is
|
||||
* expected to be in the following whitespace-separated format:
|
||||
*
|
||||
<configuration>
|
||||
<srcPath prefix="hdfs://hadoop.myhost.com:9000/user/warehouse/u_full/*">
|
||||
<policy name = RaidScanWeekly>
|
||||
<destPath> hdfs://dfsname.myhost.com:9000/archive/</destPath>
|
||||
<parentPolicy> RaidScanMonthly</parentPolicy>
|
||||
<property>
|
||||
<name>targetReplication</name>
|
||||
<value>2</value>
|
||||
<description> after RAIDing, decrease the replication factor of the file to
|
||||
this value.
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>metaReplication</name>
|
||||
<value>2</value>
|
||||
<description> the replication factor of the RAID meta file
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>stripeLength</name>
|
||||
<value>10</value>
|
||||
<description> the number of blocks to RAID together
|
||||
</description>
|
||||
</property>
|
||||
</policy>
|
||||
</srcPath>
|
||||
</configuration>
|
||||
*
|
||||
* Blank lines and lines starting with # are ignored.
|
||||
*
|
||||
* @throws IOException if the config file cannot be read.
|
||||
* @throws RaidConfigurationException if configuration entries are invalid.
|
||||
* @throws ClassNotFoundException if user-defined policy classes cannot be loaded
|
||||
* @throws ParserConfigurationException if XML parser is misconfigured.
|
||||
* @throws SAXException if config file is malformed.
|
||||
* @returns A new set of policy categories.
|
||||
*/
|
||||
void reloadConfigs() throws IOException, ParserConfigurationException,
|
||||
SAXException, ClassNotFoundException, RaidConfigurationException {
|
||||
|
||||
if (configFileName == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
File file = new File(configFileName);
|
||||
if (!file.exists()) {
|
||||
throw new RaidConfigurationException("Configuration file " + configFileName +
|
||||
" does not exist.");
|
||||
}
|
||||
|
||||
// Create some temporary hashmaps to hold the new allocs, and we only save
|
||||
// them in our fields if we have parsed the entire allocs file successfully.
|
||||
List<PolicyList> all = new ArrayList<PolicyList>();
|
||||
long periodicityValue = periodicity;
|
||||
|
||||
|
||||
// Read and parse the configuration file.
|
||||
// allow include files in configuration file
|
||||
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
|
||||
docBuilderFactory.setIgnoringComments(true);
|
||||
docBuilderFactory.setNamespaceAware(true);
|
||||
try {
|
||||
docBuilderFactory.setXIncludeAware(true);
|
||||
} catch (UnsupportedOperationException e) {
|
||||
LOG.error("Failed to set setXIncludeAware(true) for raid parser "
|
||||
+ docBuilderFactory + ":" + e, e);
|
||||
}
|
||||
LOG.error("Reloading config file " + file);
|
||||
|
||||
DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
|
||||
Document doc = builder.parse(file);
|
||||
Element root = doc.getDocumentElement();
|
||||
if (!"configuration".equalsIgnoreCase(root.getTagName()))
|
||||
throw new RaidConfigurationException("Bad configuration file: " +
|
||||
"top-level element not <configuration>");
|
||||
NodeList elements = root.getChildNodes();
|
||||
|
||||
Map<String, PolicyInfo> existingPolicies =
|
||||
new HashMap<String, PolicyInfo>();
|
||||
// loop through all the configured source paths.
|
||||
for (int i = 0; i < elements.getLength(); i++) {
|
||||
Node node = elements.item(i);
|
||||
if (!(node instanceof Element)) {
|
||||
continue;
|
||||
}
|
||||
Element element = (Element)node;
|
||||
String elementTagName = element.getTagName();
|
||||
if ("srcPath".equalsIgnoreCase(elementTagName)) {
|
||||
String srcPathPrefix = element.getAttribute("prefix");
|
||||
|
||||
PolicyList policyList = null;
|
||||
if (srcPathPrefix != null && srcPathPrefix.length() != 0) {
|
||||
// Empty srcPath will have no effect but policies will be processed
|
||||
// This allow us to define some "abstract" policies
|
||||
policyList = new PolicyList();
|
||||
all.add(policyList);
|
||||
policyList.setSrcPath(conf, srcPathPrefix);
|
||||
}
|
||||
|
||||
// loop through all the policies for this source path
|
||||
NodeList policies = element.getChildNodes();
|
||||
for (int j = 0; j < policies.getLength(); j++) {
|
||||
Node node1 = policies.item(j);
|
||||
if (!(node1 instanceof Element)) {
|
||||
continue;
|
||||
}
|
||||
Element policy = (Element)node1;
|
||||
if (!"policy".equalsIgnoreCase(policy.getTagName())) {
|
||||
throw new RaidConfigurationException("Bad configuration file: " +
|
||||
"Expecting <policy> for srcPath " + srcPathPrefix);
|
||||
}
|
||||
String policyName = policy.getAttribute("name");
|
||||
PolicyInfo curr = new PolicyInfo(policyName, conf);
|
||||
if (srcPathPrefix != null && srcPathPrefix.length() > 0) {
|
||||
curr.setSrcPath(srcPathPrefix);
|
||||
}
|
||||
// loop through all the properties of this policy
|
||||
NodeList properties = policy.getChildNodes();
|
||||
PolicyInfo parent = null;
|
||||
for (int k = 0; k < properties.getLength(); k++) {
|
||||
Node node2 = properties.item(k);
|
||||
if (!(node2 instanceof Element)) {
|
||||
continue;
|
||||
}
|
||||
Element property = (Element)node2;
|
||||
String propertyName = property.getTagName();
|
||||
if ("erasureCode".equalsIgnoreCase(propertyName)) {
|
||||
String text = ((Text)property.getFirstChild()).getData().trim();
|
||||
LOG.info(policyName + ".erasureCode = " + text);
|
||||
curr.setErasureCode(text);
|
||||
} else if ("description".equalsIgnoreCase(propertyName)) {
|
||||
String text = ((Text)property.getFirstChild()).getData().trim();
|
||||
curr.setDescription(text);
|
||||
} else if ("parentPolicy".equalsIgnoreCase(propertyName)) {
|
||||
String text = ((Text)property.getFirstChild()).getData().trim();
|
||||
parent = existingPolicies.get(text);
|
||||
} else if ("property".equalsIgnoreCase(propertyName)) {
|
||||
NodeList nl = property.getChildNodes();
|
||||
String pname=null,pvalue=null;
|
||||
for (int l = 0; l < nl.getLength(); l++){
|
||||
Node node3 = nl.item(l);
|
||||
if (!(node3 instanceof Element)) {
|
||||
continue;
|
||||
}
|
||||
Element item = (Element) node3;
|
||||
String itemName = item.getTagName();
|
||||
if ("name".equalsIgnoreCase(itemName)){
|
||||
pname = ((Text)item.getFirstChild()).getData().trim();
|
||||
} else if ("value".equalsIgnoreCase(itemName)){
|
||||
pvalue = ((Text)item.getFirstChild()).getData().trim();
|
||||
}
|
||||
}
|
||||
if (pname != null && pvalue != null) {
|
||||
LOG.info(policyName + "." + pname + " = " + pvalue);
|
||||
curr.setProperty(pname,pvalue);
|
||||
}
|
||||
} else {
|
||||
LOG.warn("Found bad property " + propertyName +
|
||||
" for srcPath" + srcPathPrefix +
|
||||
" policy name " + policyName +
|
||||
". Ignoring.");
|
||||
}
|
||||
} // done with all properties of this policy
|
||||
|
||||
PolicyInfo pinfo;
|
||||
if (parent != null) {
|
||||
pinfo = new PolicyInfo(policyName, conf);
|
||||
pinfo.copyFrom(parent);
|
||||
pinfo.copyFrom(curr);
|
||||
} else {
|
||||
pinfo = curr;
|
||||
}
|
||||
if (policyList != null) {
|
||||
policyList.add(pinfo);
|
||||
}
|
||||
existingPolicies.put(policyName, pinfo);
|
||||
|
||||
} // done with all policies for this srcpath
|
||||
}
|
||||
} // done with all srcPaths
|
||||
setAllPolicies(all);
|
||||
periodicity = periodicityValue;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
public synchronized long getPeriodicity() {
|
||||
return periodicity;
|
||||
}
|
||||
|
||||
public synchronized long getHarPartfileSize() {
|
||||
return harPartfileSize;
|
||||
}
|
||||
|
||||
public synchronized int getMaxJobsPerPolicy() {
|
||||
return maxJobsPerPolicy;
|
||||
}
|
||||
|
||||
public synchronized int getMaxFilesPerJob() {
|
||||
return maxFilesPerJob;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a collection of all policies
|
||||
*/
|
||||
public synchronized Collection<PolicyList> getAllPolicies() {
|
||||
return new ArrayList(allPolicies);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a collection of all policies
|
||||
*/
|
||||
protected synchronized void setAllPolicies(Collection<PolicyList> value) {
|
||||
this.allPolicies = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a background thread to reload the config file
|
||||
*/
|
||||
void startReload() {
|
||||
if (doReload) {
|
||||
reloadThread = new UpdateThread();
|
||||
reloadThread.start();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the background thread that reload the config file
|
||||
*/
|
||||
void stopReload() throws InterruptedException {
|
||||
if (reloadThread != null) {
|
||||
running = false;
|
||||
reloadThread.interrupt();
|
||||
reloadThread.join();
|
||||
reloadThread = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A thread which reloads the config file.
|
||||
*/
|
||||
private class UpdateThread extends Thread {
|
||||
private UpdateThread() {
|
||||
super("Raid update thread");
|
||||
}
|
||||
|
||||
public void run() {
|
||||
while (running) {
|
||||
try {
|
||||
Thread.sleep(reloadInterval);
|
||||
reloadConfigsIfNecessary();
|
||||
} catch (InterruptedException e) {
|
||||
// do nothing
|
||||
} catch (Exception e) {
|
||||
LOG.error("Failed to reload config file ", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,213 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.BlockMissingException;
|
||||
import org.apache.hadoop.fs.ChecksumException;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**
|
||||
* Represents a generic decoder that can be used to read a file with
|
||||
* corrupt blocks by using the parity file.
|
||||
* This is an abstract class, concrete subclasses need to implement
|
||||
* fixErasedBlock.
|
||||
*/
|
||||
public abstract class Decoder {
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.Decoder");
|
||||
protected Configuration conf;
|
||||
protected int stripeSize;
|
||||
protected int paritySize;
|
||||
protected Random rand;
|
||||
protected int bufSize;
|
||||
protected byte[][] readBufs;
|
||||
protected byte[][] writeBufs;
|
||||
|
||||
Decoder(Configuration conf, int stripeSize, int paritySize) {
|
||||
this.conf = conf;
|
||||
this.stripeSize = stripeSize;
|
||||
this.paritySize = paritySize;
|
||||
this.rand = new Random();
|
||||
this.bufSize = conf.getInt("raid.decoder.bufsize", 1024 * 1024);
|
||||
this.readBufs = new byte[stripeSize + paritySize][];
|
||||
this.writeBufs = new byte[paritySize][];
|
||||
allocateBuffers();
|
||||
}
|
||||
|
||||
private void allocateBuffers() {
|
||||
for (int i = 0; i < stripeSize + paritySize; i++) {
|
||||
readBufs[i] = new byte[bufSize];
|
||||
}
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
writeBufs[i] = new byte[bufSize];
|
||||
}
|
||||
}
|
||||
|
||||
private void configureBuffers(long blockSize) {
|
||||
if ((long)bufSize > blockSize) {
|
||||
bufSize = (int)blockSize;
|
||||
allocateBuffers();
|
||||
} else if (blockSize % bufSize != 0) {
|
||||
bufSize = (int)(blockSize / 256L); // heuristic.
|
||||
if (bufSize == 0) {
|
||||
bufSize = 1024;
|
||||
}
|
||||
bufSize = Math.min(bufSize, 1024 * 1024);
|
||||
allocateBuffers();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The interface to generate a decoded file using the good portion of the
|
||||
* source file and the parity file.
|
||||
* @param fs The filesystem containing the source file.
|
||||
* @param srcFile The damaged source file.
|
||||
* @param parityFs The filesystem containing the parity file. This could be
|
||||
* different from fs in case the parity file is part of a HAR archive.
|
||||
* @param parityFile The parity file.
|
||||
* @param errorOffset Known location of error in the source file. There could
|
||||
* be additional errors in the source file that are discovered during
|
||||
* the decode process.
|
||||
* @param decodedFile The decoded file. This will have the exact same contents
|
||||
* as the source file on success.
|
||||
*/
|
||||
public void decodeFile(
|
||||
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
|
||||
long errorOffset, Path decodedFile) throws IOException {
|
||||
|
||||
LOG.info("Create " + decodedFile + " for error at " +
|
||||
srcFile + ":" + errorOffset);
|
||||
FileStatus srcStat = fs.getFileStatus(srcFile);
|
||||
long blockSize = srcStat.getBlockSize();
|
||||
configureBuffers(blockSize);
|
||||
// Move the offset to the start of the block.
|
||||
errorOffset = (errorOffset / blockSize) * blockSize;
|
||||
|
||||
// Create the decoded file.
|
||||
FSDataOutputStream out = fs.create(
|
||||
decodedFile, false, conf.getInt("io.file.buffer.size", 64 * 1024),
|
||||
srcStat.getReplication(), srcStat.getBlockSize());
|
||||
|
||||
// Open the source file.
|
||||
FSDataInputStream in = fs.open(
|
||||
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
|
||||
|
||||
// Start copying data block-by-block.
|
||||
for (long offset = 0; offset < srcStat.getLen(); offset += blockSize) {
|
||||
long limit = Math.min(blockSize, srcStat.getLen() - offset);
|
||||
long bytesAlreadyCopied = 0;
|
||||
if (offset != errorOffset) {
|
||||
try {
|
||||
in = fs.open(
|
||||
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
|
||||
in.seek(offset);
|
||||
RaidUtils.copyBytes(in, out, readBufs[0], limit);
|
||||
assert(out.getPos() == offset +limit);
|
||||
LOG.info("Copied till " + out.getPos() + " from " + srcFile);
|
||||
continue;
|
||||
} catch (BlockMissingException e) {
|
||||
LOG.warn("Encountered BME at " + srcFile + ":" + offset);
|
||||
bytesAlreadyCopied = out.getPos() - offset;
|
||||
} catch (ChecksumException e) {
|
||||
LOG.warn("Encountered CE at " + srcFile + ":" + offset);
|
||||
bytesAlreadyCopied = out.getPos() - offset;
|
||||
}
|
||||
}
|
||||
// If we are here offset == errorOffset or we got an exception.
|
||||
// Recover the block starting at offset.
|
||||
fixErasedBlock(fs, srcFile, parityFs, parityFile, blockSize, offset,
|
||||
bytesAlreadyCopied, limit, out);
|
||||
}
|
||||
out.close();
|
||||
|
||||
try {
|
||||
fs.setOwner(decodedFile, srcStat.getOwner(), srcStat.getGroup());
|
||||
fs.setPermission(decodedFile, srcStat.getPermission());
|
||||
fs.setTimes(decodedFile, srcStat.getModificationTime(),
|
||||
srcStat.getAccessTime());
|
||||
} catch (Exception exc) {
|
||||
LOG.warn("Didn't manage to copy meta information because of " + exc +
|
||||
" Ignoring...");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Recovers a corrupt block to local file.
|
||||
*
|
||||
* @param srcFs The filesystem containing the source file.
|
||||
* @param srcPath The damaged source file.
|
||||
* @param parityFs The filesystem containing the parity file. This could be
|
||||
* different from fs in case the parity file is part of a HAR archive.
|
||||
* @param parityPath The parity file.
|
||||
* @param blockSize The block size of the file.
|
||||
* @param blockOffset Known location of error in the source file. There could
|
||||
* be additional errors in the source file that are discovered during
|
||||
* the decode process.
|
||||
* @param localBlockFile The file to write the block to.
|
||||
* @param limit The maximum number of bytes to be written out.
|
||||
* This is to prevent writing beyond the end of the file.
|
||||
*/
|
||||
public void recoverBlockToFile(
|
||||
FileSystem srcFs, Path srcPath, FileSystem parityFs, Path parityPath,
|
||||
long blockSize, long blockOffset, File localBlockFile, long limit)
|
||||
throws IOException {
|
||||
OutputStream out = new FileOutputStream(localBlockFile);
|
||||
fixErasedBlock(srcFs, srcPath, parityFs, parityPath,
|
||||
blockSize, blockOffset, 0, limit, out);
|
||||
out.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation-specific mechanism of writing a fixed block.
|
||||
* @param fs The filesystem containing the source file.
|
||||
* @param srcFile The damaged source file.
|
||||
* @param parityFs The filesystem containing the parity file. This could be
|
||||
* different from fs in case the parity file is part of a HAR archive.
|
||||
* @param parityFile The parity file.
|
||||
* @param blockSize The maximum size of a block.
|
||||
* @param errorOffset Known location of error in the source file. There could
|
||||
* be additional errors in the source file that are discovered during
|
||||
* the decode process.
|
||||
* @param bytesToSkip After the block is generated, these many bytes should be
|
||||
* skipped before writing to the output. This is needed because the
|
||||
* output may have a portion of the block written from the source file
|
||||
* before a new corruption is discovered in the block.
|
||||
* @param limit The maximum number of bytes to be written out, including
|
||||
* bytesToSkip. This is to prevent writing beyond the end of the file.
|
||||
* @param out The output.
|
||||
*/
|
||||
protected abstract void fixErasedBlock(
|
||||
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
|
||||
long blockSize, long errorOffset, long bytesToSkip, long limit,
|
||||
OutputStream out) throws IOException;
|
||||
}
|
|
@ -1,323 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.Semaphore;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
/**
|
||||
* Implements depth-first traversal using a Stack object. The traversal
|
||||
* can be stopped at any time and the state of traversal is saved.
|
||||
*/
|
||||
public class DirectoryTraversal {
|
||||
public static final Log LOG =
|
||||
LogFactory.getLog("org.apache.hadoop.raid.DirectoryTraversal");
|
||||
|
||||
private FileSystem fs;
|
||||
private List<FileStatus> paths;
|
||||
private int pathIdx = 0; // Next path to process.
|
||||
private Stack<Node> stack = new Stack<Node>();
|
||||
private ExecutorService executor;
|
||||
|
||||
private int numThreads;
|
||||
|
||||
/**
|
||||
* A FileFilter object can be used to choose files during directory traversal.
|
||||
*/
|
||||
public interface FileFilter {
|
||||
/**
|
||||
* @return a boolean value indicating if the file passes the filter.
|
||||
*/
|
||||
boolean check(FileStatus f) throws IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a directory node in directory traversal.
|
||||
*/
|
||||
static class Node {
|
||||
private FileStatus path; // Path that this node represents.
|
||||
private FileStatus[] elements; // Elements in the node.
|
||||
private int idx = 0;
|
||||
|
||||
public Node(FileStatus path, FileStatus[] elements) {
|
||||
this.path = path;
|
||||
this.elements = elements;
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return idx < elements.length;
|
||||
}
|
||||
|
||||
public FileStatus next() {
|
||||
return elements[idx++];
|
||||
}
|
||||
|
||||
public FileStatus path() {
|
||||
return this.path;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param fs The filesystem to use.
|
||||
* @param startPaths A list of paths that need to be traversed
|
||||
*/
|
||||
public DirectoryTraversal(FileSystem fs, List<FileStatus> startPaths) {
|
||||
this(fs, startPaths, 1);
|
||||
}
|
||||
|
||||
public DirectoryTraversal(
|
||||
FileSystem fs, List<FileStatus> startPaths, int numThreads) {
|
||||
this.fs = fs;
|
||||
paths = startPaths;
|
||||
pathIdx = 0;
|
||||
this.numThreads = numThreads;
|
||||
executor = Executors.newFixedThreadPool(numThreads);
|
||||
}
|
||||
|
||||
public List<FileStatus> getFilteredFiles(FileFilter filter, int limit) {
|
||||
List<FileStatus> filtered = new ArrayList<FileStatus>();
|
||||
if (limit == 0)
|
||||
return filtered;
|
||||
|
||||
// We need this semaphore to block when the number of running workitems
|
||||
// is equal to the number of threads. FixedThreadPool limits the number
|
||||
// of threads, but not the queue size. This way we will limit the memory
|
||||
// usage.
|
||||
Semaphore slots = new Semaphore(numThreads);
|
||||
|
||||
while (true) {
|
||||
FilterFileWorkItem work = null;
|
||||
try {
|
||||
slots.acquire();
|
||||
synchronized(filtered) {
|
||||
if (filtered.size() >= limit) {
|
||||
slots.release();
|
||||
break;
|
||||
}
|
||||
}
|
||||
Node next = getNextDirectoryNode();
|
||||
if (next == null) {
|
||||
slots.release();
|
||||
break;
|
||||
}
|
||||
work = new FilterFileWorkItem(filter, next, filtered, slots);
|
||||
} catch (InterruptedException ie) {
|
||||
slots.release();
|
||||
break;
|
||||
} catch (IOException e) {
|
||||
slots.release();
|
||||
break;
|
||||
}
|
||||
executor.execute(work);
|
||||
}
|
||||
|
||||
try {
|
||||
// Wait for all submitted items to finish.
|
||||
slots.acquire(numThreads);
|
||||
// If this traversal is finished, shutdown the executor.
|
||||
if (doneTraversal()) {
|
||||
executor.shutdown();
|
||||
executor.awaitTermination(1, TimeUnit.HOURS);
|
||||
}
|
||||
} catch (InterruptedException ie) {
|
||||
}
|
||||
|
||||
return filtered;
|
||||
}
|
||||
|
||||
class FilterFileWorkItem implements Runnable {
|
||||
FileFilter filter;
|
||||
Node dir;
|
||||
List<FileStatus> filtered;
|
||||
Semaphore slots;
|
||||
|
||||
FilterFileWorkItem(FileFilter filter, Node dir, List<FileStatus> filtered,
|
||||
Semaphore slots) {
|
||||
this.slots = slots;
|
||||
this.filter = filter;
|
||||
this.dir = dir;
|
||||
this.filtered = filtered;
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
public void run() {
|
||||
try {
|
||||
LOG.info("Initiating file filtering for " + dir.path.getPath());
|
||||
for (FileStatus f: dir.elements) {
|
||||
if (!f.isFile()) {
|
||||
continue;
|
||||
}
|
||||
if (filter.check(f)) {
|
||||
synchronized(filtered) {
|
||||
filtered.add(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error in directory traversal: "
|
||||
+ StringUtils.stringifyException(e));
|
||||
} finally {
|
||||
slots.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the next file.
|
||||
* @throws IOException
|
||||
*/
|
||||
public FileStatus getNextFile() throws IOException {
|
||||
// Check if traversal is done.
|
||||
while (!doneTraversal()) {
|
||||
// If traversal is not done, check if the stack is not empty.
|
||||
while (!stack.isEmpty()) {
|
||||
// If the stack is not empty, look at the top node.
|
||||
Node node = stack.peek();
|
||||
// Check if the top node has an element.
|
||||
if (node.hasNext()) {
|
||||
FileStatus element = node.next();
|
||||
// Is the next element a directory.
|
||||
if (!element.isDir()) {
|
||||
// It is a file, return it.
|
||||
return element;
|
||||
}
|
||||
// Next element is a directory, push it on to the stack and
|
||||
// continue
|
||||
try {
|
||||
pushNewNode(element);
|
||||
} catch (FileNotFoundException e) {
|
||||
// Ignore and move to the next element.
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
// Top node has no next element, pop it and continue.
|
||||
stack.pop();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// If the stack is empty, do we have more paths?
|
||||
while (!paths.isEmpty()) {
|
||||
FileStatus next = paths.remove(0);
|
||||
pathIdx++;
|
||||
if (!next.isDir()) {
|
||||
return next;
|
||||
}
|
||||
try {
|
||||
pushNewNode(next);
|
||||
} catch (FileNotFoundException e) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next directory in the tree. The algorithm returns deeper directories
|
||||
* first.
|
||||
* @return A FileStatus representing the directory.
|
||||
* @throws IOException
|
||||
*/
|
||||
public FileStatus getNextDirectory() throws IOException {
|
||||
Node dirNode = getNextDirectoryNode();
|
||||
if (dirNode != null) {
|
||||
return dirNode.path;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private Node getNextDirectoryNode() throws IOException {
|
||||
|
||||
// Check if traversal is done.
|
||||
while (!doneTraversal()) {
|
||||
// If traversal is not done, check if the stack is not empty.
|
||||
while (!stack.isEmpty()) {
|
||||
// If the stack is not empty, look at the top node.
|
||||
Node node = stack.peek();
|
||||
// Check if the top node has an element.
|
||||
if (node.hasNext()) {
|
||||
FileStatus element = node.next();
|
||||
// Is the next element a directory.
|
||||
if (element.isDir()) {
|
||||
// Next element is a directory, push it on to the stack and
|
||||
// continue
|
||||
try {
|
||||
pushNewNode(element);
|
||||
} catch (FileNotFoundException e) {
|
||||
// Ignore and move to the next element.
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
stack.pop();
|
||||
return node;
|
||||
}
|
||||
}
|
||||
// If the stack is empty, do we have more paths?
|
||||
while (!paths.isEmpty()) {
|
||||
FileStatus next = paths.remove(0);
|
||||
pathIdx++;
|
||||
if (next.isDir()) {
|
||||
try {
|
||||
pushNewNode(next);
|
||||
} catch (FileNotFoundException e) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private void pushNewNode(FileStatus stat) throws IOException {
|
||||
if (!stat.isDir()) {
|
||||
return;
|
||||
}
|
||||
Path p = stat.getPath();
|
||||
FileStatus[] elements = fs.listStatus(p);
|
||||
Node newNode = new Node(stat, (elements == null? new FileStatus[0]: elements));
|
||||
stack.push(newNode);
|
||||
}
|
||||
|
||||
public boolean doneTraversal() {
|
||||
return paths.isEmpty() && stack.isEmpty();
|
||||
}
|
||||
}
|
|
@ -1,660 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.BufferedReader;
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Date;
|
||||
import java.text.SimpleDateFormat;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.InputFormat;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.JobContext;
|
||||
import org.apache.hadoop.mapreduce.InputSplit;
|
||||
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
|
||||
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
||||
|
||||
/**
|
||||
* distributed block fixer, uses map reduce jobs to fix corrupt files
|
||||
*
|
||||
* configuration options
|
||||
* raid.blockfix.filespertask - number of corrupt files to fix in a single
|
||||
* map reduce task (i.e., at one mapper node)
|
||||
*
|
||||
* raid.blockfix.maxpendingfiles - maximum number of files to fix
|
||||
* simultaneously
|
||||
*/
|
||||
public class DistBlockFixer extends BlockFixer {
|
||||
// volatile should be sufficient since only the block fixer thread
|
||||
// updates numJobsRunning (other threads may read)
|
||||
private volatile int numJobsRunning = 0;
|
||||
|
||||
private static final String WORK_DIR_PREFIX = "blockfixer";
|
||||
private static final String IN_FILE_SUFFIX = ".in";
|
||||
private static final String PART_PREFIX = "part-";
|
||||
|
||||
private static final String BLOCKFIX_FILES_PER_TASK =
|
||||
"raid.blockfix.filespertask";
|
||||
private static final String BLOCKFIX_MAX_PENDING_FILES =
|
||||
"raid.blockfix.maxpendingfiles";
|
||||
|
||||
// default number of files to fix in a task
|
||||
private static final long DEFAULT_BLOCKFIX_FILES_PER_TASK = 10L;
|
||||
|
||||
// default number of files to fix simultaneously
|
||||
private static final long DEFAULT_BLOCKFIX_MAX_PENDING_FILES = 1000L;
|
||||
|
||||
protected static final Log LOG = LogFactory.getLog(DistBlockFixer.class);
|
||||
|
||||
// number of files to fix in a task
|
||||
private long filesPerTask;
|
||||
|
||||
// number of files to fix simultaneously
|
||||
final private long maxPendingFiles;
|
||||
|
||||
// number of files being fixed right now
|
||||
private long pendingFiles;
|
||||
|
||||
private long lastCheckTime;
|
||||
|
||||
private final SimpleDateFormat dateFormat =
|
||||
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
private Map<String, CorruptFileInfo> fileIndex =
|
||||
new HashMap<String, CorruptFileInfo>();
|
||||
private Map<Job, List<CorruptFileInfo>> jobIndex =
|
||||
new HashMap<Job, List<CorruptFileInfo>>();
|
||||
|
||||
static enum Counter {
|
||||
FILES_SUCCEEDED, FILES_FAILED, FILES_NOACTION
|
||||
}
|
||||
|
||||
public DistBlockFixer(Configuration conf) {
|
||||
super(conf);
|
||||
filesPerTask = DistBlockFixer.filesPerTask(getConf());
|
||||
maxPendingFiles = DistBlockFixer.maxPendingFiles(getConf());
|
||||
pendingFiles = 0L;
|
||||
|
||||
// start off due for the first iteration
|
||||
lastCheckTime = Time.now() - blockFixInterval;
|
||||
}
|
||||
|
||||
/**
|
||||
* determines how many files to fix in a single task
|
||||
*/
|
||||
protected static long filesPerTask(Configuration conf) {
|
||||
return conf.getLong(BLOCKFIX_FILES_PER_TASK,
|
||||
DEFAULT_BLOCKFIX_FILES_PER_TASK);
|
||||
|
||||
}
|
||||
/**
|
||||
* determines how many files to fix simultaneously
|
||||
*/
|
||||
protected static long maxPendingFiles(Configuration conf) {
|
||||
return conf.getLong(BLOCKFIX_MAX_PENDING_FILES,
|
||||
DEFAULT_BLOCKFIX_MAX_PENDING_FILES);
|
||||
}
|
||||
|
||||
/**
|
||||
* runs the block fixer periodically
|
||||
*/
|
||||
public void run() {
|
||||
while (running) {
|
||||
// check if it is time to run the block fixer
|
||||
long now = Time.now();
|
||||
if (now >= lastCheckTime + blockFixInterval) {
|
||||
lastCheckTime = now;
|
||||
try {
|
||||
checkAndFixBlocks(now);
|
||||
} catch (InterruptedException ignore) {
|
||||
LOG.info("interrupted");
|
||||
} catch (Exception e) {
|
||||
// log exceptions and keep running
|
||||
LOG.error(StringUtils.stringifyException(e));
|
||||
} catch (Error e) {
|
||||
LOG.error(StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
// try to sleep for the remainder of the interval
|
||||
long sleepPeriod = (lastCheckTime - Time.now()) +
|
||||
blockFixInterval;
|
||||
|
||||
if ((sleepPeriod > 0L) && running) {
|
||||
try {
|
||||
Thread.sleep(sleepPeriod);
|
||||
} catch (InterruptedException ignore) {
|
||||
LOG.info("interrupted");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks for corrupt blocks and fixes them (if any)
|
||||
*/
|
||||
private void checkAndFixBlocks(long startTime)
|
||||
throws IOException, InterruptedException, ClassNotFoundException {
|
||||
checkJobs();
|
||||
|
||||
if (pendingFiles >= maxPendingFiles) {
|
||||
return;
|
||||
}
|
||||
|
||||
List<Path> corruptFiles = getCorruptFiles();
|
||||
filterUnfixableSourceFiles(corruptFiles.iterator());
|
||||
|
||||
String startTimeStr = dateFormat.format(new Date(startTime));
|
||||
|
||||
LOG.info("found " + corruptFiles.size() + " corrupt files");
|
||||
|
||||
if (corruptFiles.size() > 0) {
|
||||
String jobName = "blockfixer." + startTime;
|
||||
startJob(jobName, corruptFiles);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a failed job.
|
||||
*/
|
||||
private void failJob(Job job) throws IOException {
|
||||
// assume no files have been fixed
|
||||
LOG.error("DistBlockFixer job " + job.getJobID() + "(" + job.getJobName() +
|
||||
") finished (failed)");
|
||||
for (CorruptFileInfo fileInfo: jobIndex.get(job)) {
|
||||
fileInfo.fail();
|
||||
}
|
||||
numJobsRunning--;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a successful job.
|
||||
*/
|
||||
private void succeedJob(Job job, long filesSucceeded, long filesFailed)
|
||||
throws IOException {
|
||||
LOG.info("DistBlockFixer job " + job.getJobID() + "(" + job.getJobName() +
|
||||
") finished (succeeded)");
|
||||
|
||||
if (filesFailed == 0) {
|
||||
// no files have failed
|
||||
for (CorruptFileInfo fileInfo: jobIndex.get(job)) {
|
||||
fileInfo.succeed();
|
||||
}
|
||||
} else {
|
||||
// we have to look at the output to check which files have failed
|
||||
Set<String> failedFiles = getFailedFiles(job);
|
||||
|
||||
for (CorruptFileInfo fileInfo: jobIndex.get(job)) {
|
||||
if (failedFiles.contains(fileInfo.getFile().toString())) {
|
||||
fileInfo.fail();
|
||||
} else {
|
||||
// call succeed for files that have succeeded or for which no action
|
||||
// was taken
|
||||
fileInfo.succeed();
|
||||
}
|
||||
}
|
||||
}
|
||||
// report succeeded files to metrics
|
||||
incrFilesFixed(filesSucceeded);
|
||||
numJobsRunning--;
|
||||
}
|
||||
|
||||
/**
|
||||
* checks if jobs have completed and updates job and file index
|
||||
* returns a list of failed files for restarting
|
||||
*/
|
||||
private void checkJobs() throws IOException {
|
||||
Iterator<Job> jobIter = jobIndex.keySet().iterator();
|
||||
while(jobIter.hasNext()) {
|
||||
Job job = jobIter.next();
|
||||
|
||||
try {
|
||||
if (job.isComplete()) {
|
||||
long filesSucceeded =
|
||||
job.getCounters().findCounter(Counter.FILES_SUCCEEDED).getValue();
|
||||
long filesFailed =
|
||||
job.getCounters().findCounter(Counter.FILES_FAILED).getValue();
|
||||
long filesNoAction =
|
||||
job.getCounters().findCounter(Counter.FILES_NOACTION).getValue();
|
||||
int files = jobIndex.get(job).size();
|
||||
if (job.isSuccessful() &&
|
||||
(filesSucceeded + filesFailed + filesNoAction ==
|
||||
((long) files))) {
|
||||
// job has processed all files
|
||||
succeedJob(job, filesSucceeded, filesFailed);
|
||||
} else {
|
||||
failJob(job);
|
||||
}
|
||||
jobIter.remove();
|
||||
} else {
|
||||
LOG.info("job " + job.getJobName() + " still running");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error(StringUtils.stringifyException(e));
|
||||
failJob(job);
|
||||
try {
|
||||
job.killJob();
|
||||
} catch (Exception ee) {
|
||||
LOG.error(StringUtils.stringifyException(ee));
|
||||
}
|
||||
jobIter.remove();
|
||||
}
|
||||
}
|
||||
purgeFileIndex();
|
||||
}
|
||||
|
||||
/**
|
||||
* determines which files have failed for a given job
|
||||
*/
|
||||
private Set<String> getFailedFiles(Job job) throws IOException {
|
||||
Set<String> failedFiles = new HashSet<String>();
|
||||
|
||||
Path outDir = SequenceFileOutputFormat.getOutputPath(job);
|
||||
FileSystem fs = outDir.getFileSystem(getConf());
|
||||
if (!fs.getFileStatus(outDir).isDir()) {
|
||||
throw new IOException(outDir.toString() + " is not a directory");
|
||||
}
|
||||
|
||||
FileStatus[] files = fs.listStatus(outDir);
|
||||
|
||||
for (FileStatus f: files) {
|
||||
Path fPath = f.getPath();
|
||||
if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) {
|
||||
LOG.info("opening " + fPath.toString());
|
||||
SequenceFile.Reader reader =
|
||||
new SequenceFile.Reader(fs, fPath, getConf());
|
||||
|
||||
Text key = new Text();
|
||||
Text value = new Text();
|
||||
while (reader.next(key, value)) {
|
||||
failedFiles.add(key.toString());
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
return failedFiles;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* purge expired jobs from the file index
|
||||
*/
|
||||
private void purgeFileIndex() {
|
||||
Iterator<String> fileIter = fileIndex.keySet().iterator();
|
||||
while(fileIter.hasNext()) {
|
||||
String file = fileIter.next();
|
||||
if (fileIndex.get(file).isExpired()) {
|
||||
fileIter.remove();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* creates and submits a job, updates file index and job index
|
||||
*/
|
||||
private Job startJob(String jobName, List<Path> corruptFiles)
|
||||
throws IOException, InterruptedException, ClassNotFoundException {
|
||||
Path inDir = new Path(WORK_DIR_PREFIX + "/in/" + jobName);
|
||||
Path outDir = new Path(WORK_DIR_PREFIX + "/out/" + jobName);
|
||||
List<Path> filesInJob = createInputFile(jobName, inDir, corruptFiles);
|
||||
|
||||
Configuration jobConf = new Configuration(getConf());
|
||||
Job job = new Job(jobConf, jobName);
|
||||
job.setJarByClass(getClass());
|
||||
job.setMapperClass(DistBlockFixerMapper.class);
|
||||
job.setNumReduceTasks(0);
|
||||
job.setInputFormatClass(DistBlockFixerInputFormat.class);
|
||||
job.setOutputFormatClass(SequenceFileOutputFormat.class);
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(Text.class);
|
||||
|
||||
DistBlockFixerInputFormat.setInputPaths(job, inDir);
|
||||
SequenceFileOutputFormat.setOutputPath(job, outDir);
|
||||
|
||||
job.submit();
|
||||
LOG.info("DistBlockFixer job " + job.getJobID() + "(" + job.getJobName() +
|
||||
") started");
|
||||
|
||||
// submit the job before inserting it into the index
|
||||
// this way, if submit fails, we won't have added anything to the index
|
||||
insertJob(job, filesInJob);
|
||||
return job;
|
||||
}
|
||||
|
||||
/**
|
||||
* inserts new job into file index and job index
|
||||
*/
|
||||
private void insertJob(Job job, List<Path> corruptFiles) {
|
||||
List<CorruptFileInfo> fileInfos = new LinkedList<CorruptFileInfo>();
|
||||
|
||||
for (Path file: corruptFiles) {
|
||||
CorruptFileInfo fileInfo = new CorruptFileInfo(file, job);
|
||||
fileInfos.add(fileInfo);
|
||||
fileIndex.put(file.toString(), fileInfo);
|
||||
}
|
||||
|
||||
jobIndex.put(job, fileInfos);
|
||||
numJobsRunning++;
|
||||
}
|
||||
|
||||
/**
|
||||
* creates the input file (containing the names of the files to be fixed
|
||||
*/
|
||||
private List<Path> createInputFile(String jobName, Path inDir,
|
||||
List<Path> corruptFiles)
|
||||
throws IOException {
|
||||
|
||||
Path file = new Path(inDir, jobName + IN_FILE_SUFFIX);
|
||||
FileSystem fs = file.getFileSystem(getConf());
|
||||
SequenceFile.Writer fileOut = SequenceFile.createWriter(fs, getConf(), file,
|
||||
LongWritable.class,
|
||||
Text.class);
|
||||
long index = 0L;
|
||||
|
||||
List<Path> filesAdded = new LinkedList<Path>();
|
||||
|
||||
for (Path corruptFile: corruptFiles) {
|
||||
if (pendingFiles >= maxPendingFiles) {
|
||||
break;
|
||||
}
|
||||
|
||||
String corruptFileName = corruptFile.toString();
|
||||
fileOut.append(new LongWritable(index++), new Text(corruptFileName));
|
||||
filesAdded.add(corruptFile);
|
||||
pendingFiles++;
|
||||
|
||||
if (index % filesPerTask == 0) {
|
||||
fileOut.sync(); // create sync point to make sure we can split here
|
||||
}
|
||||
}
|
||||
|
||||
fileOut.close();
|
||||
return filesAdded;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets a list of corrupt files from the name node
|
||||
* and filters out files that are currently being fixed or
|
||||
* that were recently fixed
|
||||
*/
|
||||
private List<Path> getCorruptFiles() throws IOException {
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)
|
||||
(new Path("/")).getFileSystem(getConf());
|
||||
|
||||
String[] files = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
List<Path> corruptFiles = new LinkedList<Path>();
|
||||
|
||||
for (String f: files) {
|
||||
Path p = new Path(f);
|
||||
// filter out files that are being fixed or that were recently fixed
|
||||
if (!fileIndex.containsKey(p.toString())) {
|
||||
corruptFiles.add(p);
|
||||
}
|
||||
}
|
||||
RaidUtils.filterTrash(getConf(), corruptFiles);
|
||||
|
||||
return corruptFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the number of map reduce jobs running
|
||||
*/
|
||||
public int jobsRunning() {
|
||||
return numJobsRunning;
|
||||
}
|
||||
|
||||
/**
|
||||
* hold information about a corrupt file that is being fixed
|
||||
*/
|
||||
class CorruptFileInfo {
|
||||
|
||||
private Path file;
|
||||
private Job job;
|
||||
private boolean done;
|
||||
private long time;
|
||||
|
||||
public CorruptFileInfo(Path file, Job job) {
|
||||
this.file = file;
|
||||
this.job = job;
|
||||
this.done = false;
|
||||
this.time = 0;
|
||||
}
|
||||
|
||||
public boolean isDone() {
|
||||
return done;
|
||||
}
|
||||
|
||||
public boolean isExpired() {
|
||||
return done && ((Time.now() - time) > historyInterval);
|
||||
}
|
||||
|
||||
public Path getFile() {
|
||||
return file;
|
||||
}
|
||||
|
||||
/**
|
||||
* updates file index to record a failed attempt at fixing a file,
|
||||
* immediately removes the entry from the file index
|
||||
* (instead of letting it expire)
|
||||
* so that we can retry right away
|
||||
*/
|
||||
public void fail() {
|
||||
// remove this file from the index
|
||||
CorruptFileInfo removed = fileIndex.remove(file.toString());
|
||||
if (removed == null) {
|
||||
LOG.error("trying to remove file not in file index: " +
|
||||
file.toString());
|
||||
} else {
|
||||
LOG.error("fixing " + file.toString() + " failed");
|
||||
}
|
||||
pendingFiles--;
|
||||
}
|
||||
|
||||
/**
|
||||
* marks a file as fixed successfully
|
||||
* and sets time stamp for expiry after specified interval
|
||||
*/
|
||||
public void succeed() {
|
||||
// leave the file in the index,
|
||||
// will be pruged later
|
||||
job = null;
|
||||
done = true;
|
||||
time = Time.now();
|
||||
LOG.info("fixing " + file.toString() + " succeeded");
|
||||
pendingFiles--;
|
||||
}
|
||||
}
|
||||
|
||||
static class DistBlockFixerInputFormat
|
||||
extends SequenceFileInputFormat<LongWritable, Text> {
|
||||
|
||||
protected static final Log LOG =
|
||||
LogFactory.getLog(DistBlockFixerMapper.class);
|
||||
|
||||
/**
|
||||
* splits the input files into tasks handled by a single node
|
||||
* we have to read the input files to do this based on a number of
|
||||
* items in a sequence
|
||||
*/
|
||||
@Override
|
||||
public List <InputSplit> getSplits(JobContext job)
|
||||
throws IOException {
|
||||
long filesPerTask = DistBlockFixer.filesPerTask(job.getConfiguration());
|
||||
|
||||
Path[] inPaths = getInputPaths(job);
|
||||
|
||||
List<InputSplit> splits = new LinkedList<InputSplit>();
|
||||
|
||||
long fileCounter = 0;
|
||||
|
||||
for (Path inPath: inPaths) {
|
||||
|
||||
FileSystem fs = inPath.getFileSystem(job.getConfiguration());
|
||||
|
||||
if (!fs.getFileStatus(inPath).isDir()) {
|
||||
throw new IOException(inPath.toString() + " is not a directory");
|
||||
}
|
||||
|
||||
FileStatus[] inFiles = fs.listStatus(inPath);
|
||||
|
||||
for (FileStatus inFileStatus: inFiles) {
|
||||
Path inFile = inFileStatus.getPath();
|
||||
|
||||
if (!inFileStatus.isDir() &&
|
||||
(inFile.getName().equals(job.getJobName() + IN_FILE_SUFFIX))) {
|
||||
|
||||
fileCounter++;
|
||||
SequenceFile.Reader inFileReader =
|
||||
new SequenceFile.Reader(fs, inFile, job.getConfiguration());
|
||||
|
||||
long startPos = inFileReader.getPosition();
|
||||
long counter = 0;
|
||||
|
||||
// create an input split every filesPerTask items in the sequence
|
||||
LongWritable key = new LongWritable();
|
||||
Text value = new Text();
|
||||
try {
|
||||
while (inFileReader.next(key, value)) {
|
||||
if (counter % filesPerTask == filesPerTask - 1L) {
|
||||
splits.add(new FileSplit(inFile, startPos,
|
||||
inFileReader.getPosition() -
|
||||
startPos,
|
||||
null));
|
||||
startPos = inFileReader.getPosition();
|
||||
}
|
||||
counter++;
|
||||
}
|
||||
|
||||
// create input split for remaining items if necessary
|
||||
// this includes the case where no splits were created by the loop
|
||||
if (startPos != inFileReader.getPosition()) {
|
||||
splits.add(new FileSplit(inFile, startPos,
|
||||
inFileReader.getPosition() - startPos,
|
||||
null));
|
||||
}
|
||||
} finally {
|
||||
inFileReader.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOG.info("created " + splits.size() + " input splits from " +
|
||||
fileCounter + " files");
|
||||
|
||||
return splits;
|
||||
}
|
||||
|
||||
/**
|
||||
* indicates that input file can be split
|
||||
*/
|
||||
@Override
|
||||
public boolean isSplitable (JobContext job, Path file) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* mapper for fixing stripes with corrupt blocks
|
||||
*/
|
||||
static class DistBlockFixerMapper
|
||||
extends Mapper<LongWritable, Text, Text, Text> {
|
||||
|
||||
protected static final Log LOG =
|
||||
LogFactory.getLog(DistBlockFixerMapper.class);
|
||||
|
||||
/**
|
||||
* fix a stripe
|
||||
*/
|
||||
@Override
|
||||
public void map(LongWritable key, Text fileText, Context context)
|
||||
throws IOException, InterruptedException {
|
||||
|
||||
BlockFixerHelper helper =
|
||||
new BlockFixerHelper(context.getConfiguration());
|
||||
|
||||
String fileStr = fileText.toString();
|
||||
LOG.info("fixing " + fileStr);
|
||||
|
||||
Path file = new Path(fileStr);
|
||||
boolean success = false;
|
||||
|
||||
try {
|
||||
boolean fixed = helper.fixFile(file, context);
|
||||
|
||||
if (fixed) {
|
||||
context.getCounter(Counter.FILES_SUCCEEDED).increment(1L);
|
||||
} else {
|
||||
context.getCounter(Counter.FILES_NOACTION).increment(1L);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error(StringUtils.stringifyException(e));
|
||||
|
||||
// report file as failed
|
||||
context.getCounter(Counter.FILES_FAILED).increment(1L);
|
||||
String outkey = fileStr;
|
||||
String outval = "failed";
|
||||
context.write(new Text(outkey), new Text(outval));
|
||||
}
|
||||
|
||||
context.progress();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,374 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Date;
|
||||
import java.text.SimpleDateFormat;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.SequenceFile.Writer;
|
||||
import org.apache.hadoop.io.SequenceFile.Reader;
|
||||
|
||||
import org.apache.hadoop.mapreduce.JobContext;
|
||||
import org.apache.hadoop.mapreduce.JobID;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.InputSplit;
|
||||
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.raid.RaidNode.Statistics;
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
public class DistRaid extends Configured {
|
||||
|
||||
protected static final Log LOG = LogFactory.getLog(DistRaid.class);
|
||||
|
||||
static final String NAME = "distRaid";
|
||||
static final String JOB_DIR_LABEL = NAME + ".job.dir";
|
||||
static final int OP_LIST_BLOCK_SIZE = 32 * 1024 * 1024; // block size of control file
|
||||
static final short OP_LIST_REPLICATION = 10; // replication factor of control file
|
||||
|
||||
public static final String OPS_PER_TASK = "raid.distraid.opspertask";
|
||||
private static final int DEFAULT_OPS_PER_TASK = 100;
|
||||
private static final int SYNC_FILE_MAX = 10;
|
||||
private static final SimpleDateFormat dateForm = new SimpleDateFormat("yyyy-MM-dd HH:mm");
|
||||
|
||||
static enum Counter {
|
||||
FILES_SUCCEEDED, FILES_FAILED, PROCESSED_BLOCKS, PROCESSED_SIZE, META_BLOCKS, META_SIZE
|
||||
}
|
||||
|
||||
public DistRaid(Configuration conf) {
|
||||
super(conf);
|
||||
}
|
||||
|
||||
private static final Random RANDOM = new Random();
|
||||
|
||||
protected static String getRandomId() {
|
||||
return Integer.toString(RANDOM.nextInt(Integer.MAX_VALUE), 36);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* helper class which holds the policy and paths
|
||||
*
|
||||
*/
|
||||
public static class RaidPolicyPathPair {
|
||||
public PolicyInfo policy;
|
||||
public List<FileStatus> srcPaths;
|
||||
|
||||
RaidPolicyPathPair(PolicyInfo policy, List<FileStatus> srcPaths) {
|
||||
this.policy = policy;
|
||||
this.srcPaths = srcPaths;
|
||||
}
|
||||
}
|
||||
|
||||
List<RaidPolicyPathPair> raidPolicyPathPairList = new ArrayList<RaidPolicyPathPair>();
|
||||
|
||||
private Job runningJob;
|
||||
private String lastReport = null;
|
||||
|
||||
/** Responsible for generating splits of the src file list. */
|
||||
static class DistRaidInputFormat extends
|
||||
SequenceFileInputFormat<Text, PolicyInfo> {
|
||||
/**
|
||||
* Produce splits such that each is no greater than the quotient of the
|
||||
* total size and the number of splits requested.
|
||||
*
|
||||
* @param job
|
||||
* The handle to the Configuration object
|
||||
* @param numSplits
|
||||
* Number of splits requested
|
||||
*/
|
||||
public List<InputSplit> getSplits(JobContext job) throws IOException {
|
||||
Configuration conf = job.getConfiguration();
|
||||
|
||||
// We create only one input file. So just get the first file in the first
|
||||
// input directory.
|
||||
Path inDir = getInputPaths(job)[0];
|
||||
FileSystem fs = inDir.getFileSystem(conf);
|
||||
FileStatus[] inputFiles = fs.listStatus(inDir);
|
||||
Path inputFile = inputFiles[0].getPath();
|
||||
|
||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||
SequenceFile.Reader in =
|
||||
new SequenceFile.Reader(conf, Reader.file(inputFile));
|
||||
long prev = 0L;
|
||||
final int opsPerTask = conf.getInt(OPS_PER_TASK, DEFAULT_OPS_PER_TASK);
|
||||
try {
|
||||
Text key = new Text();
|
||||
PolicyInfo value = new PolicyInfo();
|
||||
int count = 0; // count src
|
||||
while (in.next(key, value)) {
|
||||
long curr = in.getPosition();
|
||||
long delta = curr - prev;
|
||||
if (++count > opsPerTask) {
|
||||
count = 0;
|
||||
splits.add(new FileSplit(inputFile, prev, delta, (String[]) null));
|
||||
prev = curr;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
long remaining = fs.getFileStatus(inputFile).getLen() - prev;
|
||||
if (remaining != 0) {
|
||||
splits.add(new FileSplit(inputFile, prev, remaining, (String[]) null));
|
||||
}
|
||||
return splits;
|
||||
}
|
||||
}
|
||||
|
||||
/** The mapper for raiding files. */
|
||||
static class DistRaidMapper extends Mapper<Text, PolicyInfo, Text, Text> {
|
||||
private boolean ignoreFailures = false;
|
||||
|
||||
private int failcount = 0;
|
||||
private int succeedcount = 0;
|
||||
private Statistics st = new Statistics();
|
||||
|
||||
private String getCountString() {
|
||||
return "Succeeded: " + succeedcount + " Failed: " + failcount;
|
||||
}
|
||||
|
||||
/** Run a FileOperation
|
||||
* @throws IOException
|
||||
* @throws InterruptedException */
|
||||
public void map(Text key, PolicyInfo policy, Context context)
|
||||
throws IOException, InterruptedException {
|
||||
try {
|
||||
Configuration jobConf = context.getConfiguration();
|
||||
LOG.info("Raiding file=" + key.toString() + " policy=" + policy);
|
||||
Path p = new Path(key.toString());
|
||||
FileStatus fs = p.getFileSystem(jobConf).getFileStatus(p);
|
||||
st.clear();
|
||||
RaidNode.doRaid(jobConf, policy, fs, st, context);
|
||||
|
||||
++succeedcount;
|
||||
|
||||
context.getCounter(Counter.PROCESSED_BLOCKS).increment(st.numProcessedBlocks);
|
||||
context.getCounter(Counter.PROCESSED_SIZE).increment(st.processedSize);
|
||||
context.getCounter(Counter.META_BLOCKS).increment(st.numMetaBlocks);
|
||||
context.getCounter(Counter.META_SIZE).increment(st.metaSize);
|
||||
context.getCounter(Counter.FILES_SUCCEEDED).increment(1);
|
||||
} catch (IOException e) {
|
||||
++failcount;
|
||||
context.getCounter(Counter.FILES_FAILED).increment(1);
|
||||
|
||||
String s = "FAIL: " + policy + ", " + key + " "
|
||||
+ StringUtils.stringifyException(e);
|
||||
context.write(new Text(key), new Text(s));
|
||||
LOG.error(s);
|
||||
} finally {
|
||||
context.setStatus(getCountString());
|
||||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void close() throws IOException {
|
||||
if (failcount == 0 || ignoreFailures) {
|
||||
return;
|
||||
}
|
||||
throw new IOException(getCountString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set options specified in raid.scheduleroption.
|
||||
* The string should be formatted as key:value[,key:value]*
|
||||
*/
|
||||
static void setSchedulerOption(Configuration conf) {
|
||||
String schedulerOption = conf.get("raid.scheduleroption");
|
||||
if (schedulerOption != null) {
|
||||
// Parse the scheduler option to get key:value pairs.
|
||||
String[] keyValues = schedulerOption.trim().split(",");
|
||||
for (String keyValue: keyValues) {
|
||||
String[] fields = keyValue.trim().split(":");
|
||||
String key = fields[0].trim();
|
||||
String value = fields[1].trim();
|
||||
conf.set(key, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new Job object.
|
||||
* @param conf
|
||||
* @return a Job object
|
||||
* @throws IOException
|
||||
*/
|
||||
static Job createJob(Configuration jobConf) throws IOException {
|
||||
String jobName = NAME + " " + dateForm.format(new Date(RaidNode.now()));
|
||||
|
||||
setSchedulerOption(jobConf);
|
||||
|
||||
Job job = Job.getInstance(jobConf, jobName);
|
||||
job.setSpeculativeExecution(false);
|
||||
job.setJarByClass(DistRaid.class);
|
||||
job.setInputFormatClass(DistRaidInputFormat.class);
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(Text.class);
|
||||
|
||||
job.setMapperClass(DistRaidMapper.class);
|
||||
job.setNumReduceTasks(0);
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
/** Add paths to be raided */
|
||||
public void addRaidPaths(PolicyInfo info, List<FileStatus> paths) {
|
||||
raidPolicyPathPairList.add(new RaidPolicyPathPair(info, paths));
|
||||
}
|
||||
|
||||
/** Invokes a map-reduce job do parallel raiding.
|
||||
* @return true if the job was started, false otherwise
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
public boolean startDistRaid() throws IOException {
|
||||
assert(raidPolicyPathPairList.size() > 0);
|
||||
Job job = createJob(getConf());
|
||||
createInputFile(job);
|
||||
try {
|
||||
job.submit();
|
||||
this.runningJob = job;
|
||||
LOG.info("Job Started: " + runningJob.getJobID());
|
||||
return true;
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new IOException(e);
|
||||
} catch (InterruptedException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Checks if the map-reduce job has completed.
|
||||
*
|
||||
* @return true if the job completed, false otherwise.
|
||||
* @throws IOException
|
||||
*/
|
||||
public boolean checkComplete() throws IOException {
|
||||
JobID jobID = runningJob.getJobID();
|
||||
LOG.info("Checking job " + jobID);
|
||||
try {
|
||||
if (runningJob.isComplete()) {
|
||||
// delete job directory
|
||||
Configuration jobConf = runningJob.getConfiguration();
|
||||
final String jobdir = jobConf.get(JOB_DIR_LABEL);
|
||||
if (jobdir != null) {
|
||||
final Path jobpath = new Path(jobdir);
|
||||
jobpath.getFileSystem(jobConf).delete(jobpath, true);
|
||||
}
|
||||
if (runningJob.isSuccessful()) {
|
||||
LOG.info("Job Complete(Succeeded): " + jobID);
|
||||
} else {
|
||||
LOG.error("Job Complete(Failed): " + jobID);
|
||||
}
|
||||
raidPolicyPathPairList.clear();
|
||||
return true;
|
||||
} else {
|
||||
String report = (" job " + jobID +
|
||||
" map " + StringUtils.formatPercent(runningJob.mapProgress(), 0)+
|
||||
" reduce " + StringUtils.formatPercent(runningJob.reduceProgress(), 0));
|
||||
if (!report.equals(lastReport)) {
|
||||
LOG.info(report);
|
||||
lastReport = report;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean successful() throws IOException {
|
||||
try {
|
||||
return runningJob.isSuccessful();
|
||||
} catch (InterruptedException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* set up input file which has the list of input files.
|
||||
*
|
||||
* @return boolean
|
||||
* @throws IOException
|
||||
*/
|
||||
private void createInputFile(Job job) throws IOException {
|
||||
Configuration jobConf = job.getConfiguration();
|
||||
Path jobDir = new Path(JOB_DIR_LABEL + getRandomId());
|
||||
Path inDir = new Path(jobDir, "in");
|
||||
Path outDir = new Path(jobDir, "out");
|
||||
FileInputFormat.setInputPaths(job, inDir);
|
||||
FileOutputFormat.setOutputPath(job, outDir);
|
||||
Path opList = new Path(inDir, NAME);
|
||||
|
||||
Configuration tmp = new Configuration(jobConf);
|
||||
// The control file should have small size blocks. This helps
|
||||
// in spreading out the load from mappers that will be spawned.
|
||||
tmp.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE);
|
||||
FileSystem fs = opList.getFileSystem(tmp);
|
||||
|
||||
int opCount = 0, synCount = 0;
|
||||
SequenceFile.Writer opWriter = null;
|
||||
try {
|
||||
opWriter = SequenceFile.createWriter(
|
||||
jobConf, Writer.file(opList), Writer.keyClass(Text.class),
|
||||
Writer.valueClass(PolicyInfo.class),
|
||||
Writer.compression(SequenceFile.CompressionType.NONE));
|
||||
for (RaidPolicyPathPair p : raidPolicyPathPairList) {
|
||||
// If a large set of files are Raided for the first time, files
|
||||
// in the same directory that tend to have the same size will end up
|
||||
// with the same map. This shuffle mixes things up, allowing a better
|
||||
// mix of files.
|
||||
java.util.Collections.shuffle(p.srcPaths);
|
||||
for (FileStatus st : p.srcPaths) {
|
||||
opWriter.append(new Text(st.getPath().toString()), p.policy);
|
||||
opCount++;
|
||||
if (++synCount > SYNC_FILE_MAX) {
|
||||
opWriter.sync();
|
||||
synCount = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} finally {
|
||||
if (opWriter != null) {
|
||||
opWriter.close();
|
||||
}
|
||||
// increase replication for control file
|
||||
fs.setReplication(opList, OP_LIST_REPLICATION);
|
||||
}
|
||||
raidPolicyPathPairList.clear();
|
||||
LOG.info("Number of files=" + opCount);
|
||||
}
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
import org.apache.hadoop.util.Daemon;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
||||
|
||||
/**
|
||||
* Implementation of {@link RaidNode} that uses map reduce jobs to raid files.
|
||||
*/
|
||||
public class DistRaidNode extends RaidNode {
|
||||
|
||||
public static final Log LOG = LogFactory.getLog(DistRaidNode.class);
|
||||
|
||||
/** Daemon thread to monitor raid job progress */
|
||||
JobMonitor jobMonitor = null;
|
||||
Daemon jobMonitorThread = null;
|
||||
|
||||
public DistRaidNode(Configuration conf) throws IOException {
|
||||
super(conf);
|
||||
this.jobMonitor = new JobMonitor(conf);
|
||||
this.jobMonitorThread = new Daemon(this.jobMonitor);
|
||||
this.jobMonitorThread.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void join() {
|
||||
super.join();
|
||||
try {
|
||||
if (jobMonitorThread != null) jobMonitorThread.join();
|
||||
} catch (InterruptedException ie) {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void stop() {
|
||||
if (stopRequested) {
|
||||
return;
|
||||
}
|
||||
super.stop();
|
||||
if (jobMonitor != null) jobMonitor.running = false;
|
||||
if (jobMonitorThread != null) jobMonitorThread.interrupt();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
void raidFiles(PolicyInfo info, List<FileStatus> paths) throws IOException {
|
||||
// We already checked that no job for this policy is running
|
||||
// So we can start a new job.
|
||||
DistRaid dr = new DistRaid(conf);
|
||||
//add paths for distributed raiding
|
||||
dr.addRaidPaths(info, paths);
|
||||
boolean started = dr.startDistRaid();
|
||||
if (started) {
|
||||
jobMonitor.monitorJob(info.getName(), dr);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
int getRunningJobsForPolicy(String policyName) {
|
||||
return jobMonitor.runningJobsCount(policyName);
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1,350 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
/**
|
||||
* Represents a generic encoder that can generate a parity file for a source
|
||||
* file.
|
||||
* This is an abstract class, concrete subclasses need to implement
|
||||
* encodeFileImpl.
|
||||
*/
|
||||
public abstract class Encoder {
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.Encoder");
|
||||
protected Configuration conf;
|
||||
protected int stripeSize;
|
||||
protected int paritySize;
|
||||
protected Random rand;
|
||||
protected int bufSize;
|
||||
protected byte[][] readBufs;
|
||||
protected byte[][] writeBufs;
|
||||
|
||||
/**
|
||||
* A class that acts as a sink for data, similar to /dev/null.
|
||||
*/
|
||||
static class NullOutputStream extends OutputStream {
|
||||
public void write(byte[] b) throws IOException {}
|
||||
public void write(int b) throws IOException {}
|
||||
public void write(byte[] b, int off, int len) throws IOException {}
|
||||
}
|
||||
|
||||
Encoder(
|
||||
Configuration conf, int stripeSize, int paritySize) {
|
||||
this.conf = conf;
|
||||
this.stripeSize = stripeSize;
|
||||
this.paritySize = paritySize;
|
||||
this.rand = new Random();
|
||||
this.bufSize = conf.getInt("raid.encoder.bufsize", 1024 * 1024);
|
||||
this.readBufs = new byte[stripeSize][];
|
||||
this.writeBufs = new byte[paritySize][];
|
||||
allocateBuffers();
|
||||
}
|
||||
|
||||
private void allocateBuffers() {
|
||||
for (int i = 0; i < stripeSize; i++) {
|
||||
readBufs[i] = new byte[bufSize];
|
||||
}
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
writeBufs[i] = new byte[bufSize];
|
||||
}
|
||||
}
|
||||
|
||||
private void configureBuffers(long blockSize) {
|
||||
if ((long)bufSize > blockSize) {
|
||||
bufSize = (int)blockSize;
|
||||
allocateBuffers();
|
||||
} else if (blockSize % bufSize != 0) {
|
||||
bufSize = (int)(blockSize / 256L); // heuristic.
|
||||
if (bufSize == 0) {
|
||||
bufSize = 1024;
|
||||
}
|
||||
bufSize = Math.min(bufSize, 1024 * 1024);
|
||||
allocateBuffers();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The interface to use to generate a parity file.
|
||||
* This method can be called multiple times with the same Encoder object,
|
||||
* thus allowing reuse of the buffers allocated by the Encoder object.
|
||||
*
|
||||
* @param fs The filesystem containing the source file.
|
||||
* @param srcFile The source file.
|
||||
* @param parityFile The parity file to be generated.
|
||||
*/
|
||||
public void encodeFile(
|
||||
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
|
||||
short parityRepl, Progressable reporter) throws IOException {
|
||||
FileStatus srcStat = fs.getFileStatus(srcFile);
|
||||
long srcSize = srcStat.getLen();
|
||||
long blockSize = srcStat.getBlockSize();
|
||||
|
||||
configureBuffers(blockSize);
|
||||
|
||||
// Create a tmp file to which we will write first.
|
||||
Path tmpDir = getParityTempPath();
|
||||
if (!parityFs.mkdirs(tmpDir)) {
|
||||
throw new IOException("Could not create tmp dir " + tmpDir);
|
||||
}
|
||||
Path parityTmp = new Path(tmpDir,
|
||||
parityFile.getName() + rand.nextLong());
|
||||
FSDataOutputStream out = parityFs.create(
|
||||
parityTmp,
|
||||
true,
|
||||
conf.getInt("io.file.buffer.size", 64 * 1024),
|
||||
parityRepl,
|
||||
blockSize);
|
||||
|
||||
try {
|
||||
encodeFileToStream(fs, srcFile, srcSize, blockSize, out, reporter);
|
||||
out.close();
|
||||
out = null;
|
||||
LOG.info("Wrote temp parity file " + parityTmp);
|
||||
|
||||
// delete destination if exists
|
||||
if (parityFs.exists(parityFile)){
|
||||
parityFs.delete(parityFile, false);
|
||||
}
|
||||
parityFs.mkdirs(parityFile.getParent());
|
||||
if (!parityFs.rename(parityTmp, parityFile)) {
|
||||
String msg = "Unable to rename file " + parityTmp + " to " + parityFile;
|
||||
throw new IOException (msg);
|
||||
}
|
||||
LOG.info("Wrote parity file " + parityFile);
|
||||
} finally {
|
||||
if (out != null) {
|
||||
out.close();
|
||||
}
|
||||
parityFs.delete(parityTmp, false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recovers a corrupt block in a parity file to a local file.
|
||||
*
|
||||
* The encoder generates paritySize parity blocks for a source file stripe.
|
||||
* Since we want only one of the parity blocks, this function creates
|
||||
* null outputs for the blocks to be discarded.
|
||||
*
|
||||
* @param fs The filesystem in which both srcFile and parityFile reside.
|
||||
* @param srcFile The source file.
|
||||
* @param srcSize The size of the source file.
|
||||
* @param blockSize The block size for the source/parity files.
|
||||
* @param corruptOffset The location of corruption in the parity file.
|
||||
* @param localBlockFile The destination for the reovered block.
|
||||
*/
|
||||
public void recoverParityBlockToFile(
|
||||
FileSystem fs,
|
||||
Path srcFile, long srcSize, long blockSize,
|
||||
Path parityFile, long corruptOffset,
|
||||
File localBlockFile) throws IOException {
|
||||
OutputStream out = new FileOutputStream(localBlockFile);
|
||||
try {
|
||||
recoverParityBlockToStream(fs, srcFile, srcSize, blockSize, parityFile,
|
||||
corruptOffset, out);
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recovers a corrupt block in a parity file to a local file.
|
||||
*
|
||||
* The encoder generates paritySize parity blocks for a source file stripe.
|
||||
* Since we want only one of the parity blocks, this function creates
|
||||
* null outputs for the blocks to be discarded.
|
||||
*
|
||||
* @param fs The filesystem in which both srcFile and parityFile reside.
|
||||
* @param srcFile The source file.
|
||||
* @param srcSize The size of the source file.
|
||||
* @param blockSize The block size for the source/parity files.
|
||||
* @param corruptOffset The location of corruption in the parity file.
|
||||
* @param out The destination for the reovered block.
|
||||
*/
|
||||
public void recoverParityBlockToStream(
|
||||
FileSystem fs,
|
||||
Path srcFile, long srcSize, long blockSize,
|
||||
Path parityFile, long corruptOffset,
|
||||
OutputStream out) throws IOException {
|
||||
LOG.info("Recovering parity block" + parityFile + ":" + corruptOffset);
|
||||
// Get the start offset of the corrupt block.
|
||||
corruptOffset = (corruptOffset / blockSize) * blockSize;
|
||||
// Output streams to each block in the parity file stripe.
|
||||
OutputStream[] outs = new OutputStream[paritySize];
|
||||
long indexOfCorruptBlockInParityStripe =
|
||||
(corruptOffset / blockSize) % paritySize;
|
||||
LOG.info("Index of corrupt block in parity stripe: " +
|
||||
indexOfCorruptBlockInParityStripe);
|
||||
// Create a real output stream for the block we want to recover,
|
||||
// and create null streams for the rest.
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
if (indexOfCorruptBlockInParityStripe == i) {
|
||||
outs[i] = out;
|
||||
} else {
|
||||
outs[i] = new NullOutputStream();
|
||||
}
|
||||
}
|
||||
// Get the stripe index and start offset of stripe.
|
||||
long stripeIdx = corruptOffset / (paritySize * blockSize);
|
||||
long stripeStart = stripeIdx * blockSize * stripeSize;
|
||||
|
||||
// Get input streams to each block in the source file stripe.
|
||||
InputStream[] blocks = stripeInputs(fs, srcFile, stripeStart,
|
||||
srcSize, blockSize);
|
||||
LOG.info("Starting recovery by using source stripe " +
|
||||
srcFile + ":" + stripeStart);
|
||||
// Read the data from the blocks and write to the parity file.
|
||||
encodeStripe(blocks, stripeStart, blockSize, outs,
|
||||
new RaidUtils.DummyProgressable());
|
||||
}
|
||||
|
||||
/**
|
||||
* Recovers a corrupt block in a parity file to an output stream.
|
||||
*
|
||||
* The encoder generates paritySize parity blocks for a source file stripe.
|
||||
* Since there is only one output provided, some blocks are written out to
|
||||
* files before being written out to the output.
|
||||
*
|
||||
* @param fs The filesystem in which both srcFile and parityFile reside.
|
||||
* @param srcFile The source file.
|
||||
* @param srcSize The size of the source file.
|
||||
* @param blockSize The block size for the source/parity files.
|
||||
* @param out The destination for the reovered block.
|
||||
*/
|
||||
private void encodeFileToStream(FileSystem fs, Path srcFile, long srcSize,
|
||||
long blockSize, OutputStream out, Progressable reporter) throws IOException {
|
||||
OutputStream[] tmpOuts = new OutputStream[paritySize];
|
||||
// One parity block can be written directly to out, rest to local files.
|
||||
tmpOuts[0] = out;
|
||||
File[] tmpFiles = new File[paritySize - 1];
|
||||
for (int i = 0; i < paritySize - 1; i++) {
|
||||
tmpFiles[i] = File.createTempFile("parity", "_" + i);
|
||||
LOG.info("Created tmp file " + tmpFiles[i]);
|
||||
tmpFiles[i].deleteOnExit();
|
||||
}
|
||||
try {
|
||||
// Loop over stripes in the file.
|
||||
for (long stripeStart = 0; stripeStart < srcSize;
|
||||
stripeStart += blockSize * stripeSize) {
|
||||
reporter.progress();
|
||||
LOG.info("Starting encoding of stripe " + srcFile + ":" + stripeStart);
|
||||
// Create input streams for blocks in the stripe.
|
||||
InputStream[] blocks = stripeInputs(fs, srcFile, stripeStart,
|
||||
srcSize, blockSize);
|
||||
// Create output streams to the temp files.
|
||||
for (int i = 0; i < paritySize - 1; i++) {
|
||||
tmpOuts[i + 1] = new FileOutputStream(tmpFiles[i]);
|
||||
}
|
||||
// Call the implementation of encoding.
|
||||
encodeStripe(blocks, stripeStart, blockSize, tmpOuts, reporter);
|
||||
// Close output streams to the temp files and write the temp files
|
||||
// to the output provided.
|
||||
for (int i = 0; i < paritySize - 1; i++) {
|
||||
tmpOuts[i + 1].close();
|
||||
tmpOuts[i + 1] = null;
|
||||
InputStream in = new FileInputStream(tmpFiles[i]);
|
||||
RaidUtils.copyBytes(in, out, writeBufs[i], blockSize);
|
||||
reporter.progress();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
for (int i = 0; i < paritySize - 1; i++) {
|
||||
if (tmpOuts[i + 1] != null) {
|
||||
tmpOuts[i + 1].close();
|
||||
}
|
||||
tmpFiles[i].delete();
|
||||
LOG.info("Deleted tmp file " + tmpFiles[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return input streams for each block in a source file's stripe.
|
||||
* @param fs The filesystem where the file resides.
|
||||
* @param srcFile The source file.
|
||||
* @param stripeStartOffset The start offset of the stripe.
|
||||
* @param srcSize The size of the source file.
|
||||
* @param blockSize The block size for the source file.
|
||||
*/
|
||||
protected InputStream[] stripeInputs(
|
||||
FileSystem fs,
|
||||
Path srcFile,
|
||||
long stripeStartOffset,
|
||||
long srcSize,
|
||||
long blockSize
|
||||
) throws IOException {
|
||||
InputStream[] blocks = new InputStream[stripeSize];
|
||||
for (int i = 0; i < stripeSize; i++) {
|
||||
long seekOffset = stripeStartOffset + i * blockSize;
|
||||
if (seekOffset < srcSize) {
|
||||
FSDataInputStream in = fs.open(
|
||||
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
|
||||
in.seek(seekOffset);
|
||||
LOG.info("Opening stream at " + srcFile + ":" + seekOffset);
|
||||
blocks[i] = in;
|
||||
} else {
|
||||
LOG.info("Using zeros at offset " + seekOffset);
|
||||
// We have no src data at this offset.
|
||||
blocks[i] = new RaidUtils.ZeroInputStream(
|
||||
seekOffset + blockSize);
|
||||
}
|
||||
}
|
||||
return blocks;
|
||||
}
|
||||
|
||||
/**
|
||||
* The implementation of generating parity data for a stripe.
|
||||
*
|
||||
* @param blocks The streams to blocks in the stripe.
|
||||
* @param stripeStartOffset The start offset of the stripe
|
||||
* @param blockSize The maximum size of a block.
|
||||
* @param outs output streams to the parity blocks.
|
||||
* @param reporter progress indicator.
|
||||
*/
|
||||
protected abstract void encodeStripe(
|
||||
InputStream[] blocks,
|
||||
long stripeStartOffset,
|
||||
long blockSize,
|
||||
OutputStream[] outs,
|
||||
Progressable reporter) throws IOException;
|
||||
|
||||
/**
|
||||
* Return the temp path for the parity file
|
||||
*/
|
||||
protected abstract Path getParityTempPath();
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
public interface ErasureCode {
|
||||
/**
|
||||
* Encodes the given message.
|
||||
* @param message The data of the message. The data is present in the least
|
||||
* significant bits of each int. The number of data bits is
|
||||
* symbolSize(). The number of elements of message is
|
||||
* stripeSize().
|
||||
* @param parity (out) The information is present in the least
|
||||
* significant bits of each int. The number of parity bits is
|
||||
* symbolSize(). The number of elements in the code is
|
||||
* paritySize().
|
||||
*/
|
||||
public void encode(int[] message, int[] parity);
|
||||
|
||||
/**
|
||||
* Generates missing portions of data.
|
||||
* @param data The message and parity. The parity should be placed in the
|
||||
* first part of the array. In each integer, the relevant portion
|
||||
* is present in the least significant bits of each int.
|
||||
* The number of elements in data is stripeSize() + paritySize().
|
||||
* @param erasedLocations The indexes in data which are not available.
|
||||
* @param erasedValues (out)The decoded values corresponding to erasedLocations.
|
||||
*/
|
||||
public void decode(int[] data, int[] erasedLocations, int[] erasedValues);
|
||||
|
||||
/**
|
||||
* The number of elements in the message.
|
||||
*/
|
||||
public int stripeSize();
|
||||
|
||||
/**
|
||||
* The number of elements in the code.
|
||||
*/
|
||||
public int paritySize();
|
||||
|
||||
/**
|
||||
* Number of bits for each symbol.
|
||||
*/
|
||||
public int symbolSize();
|
||||
}
|
|
@ -1,350 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Implementation of Galois field arithmetics with 2^p elements.
|
||||
* The input must be unsigned integers.
|
||||
*/
|
||||
public class GaloisField {
|
||||
|
||||
private final int[] logTable;
|
||||
private final int[] powTable;
|
||||
private final int[][] mulTable;
|
||||
private final int[][] divTable;
|
||||
private final int fieldSize;
|
||||
private final int primitivePeriod;
|
||||
private final int primitivePolynomial;
|
||||
|
||||
// Field size 256 is good for byte based system
|
||||
private static final int DEFAULT_FIELD_SIZE = 256;
|
||||
// primitive polynomial 1 + X^2 + X^3 + X^4 + X^8
|
||||
private static final int DEFAULT_PRIMITIVE_POLYNOMIAL = 285;
|
||||
|
||||
static private final Map<Integer, GaloisField> instances =
|
||||
new HashMap<Integer, GaloisField>();
|
||||
|
||||
/**
|
||||
* Get the object performs Galois field arithmetics
|
||||
* @param fieldSize size of the field
|
||||
* @param primitivePolynomial a primitive polynomial corresponds to the size
|
||||
*/
|
||||
public static GaloisField getInstance(int fieldSize,
|
||||
int primitivePolynomial) {
|
||||
int key = ((fieldSize << 16) & 0xFFFF0000) + (primitivePolynomial & 0x0000FFFF);
|
||||
GaloisField gf;
|
||||
synchronized (instances) {
|
||||
gf = instances.get(key);
|
||||
if (gf == null) {
|
||||
gf = new GaloisField(fieldSize, primitivePolynomial);
|
||||
instances.put(key, gf);
|
||||
}
|
||||
}
|
||||
return gf;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the object performs Galois field arithmetics with default setting
|
||||
*/
|
||||
public static GaloisField getInstance() {
|
||||
return getInstance(DEFAULT_FIELD_SIZE, DEFAULT_PRIMITIVE_POLYNOMIAL);
|
||||
}
|
||||
|
||||
private GaloisField(int fieldSize, int primitivePolynomial) {
|
||||
assert fieldSize > 0;
|
||||
assert primitivePolynomial > 0;
|
||||
|
||||
this.fieldSize = fieldSize;
|
||||
this.primitivePeriod = fieldSize - 1;
|
||||
this.primitivePolynomial = primitivePolynomial;
|
||||
logTable = new int[fieldSize];
|
||||
powTable = new int[fieldSize];
|
||||
mulTable = new int[fieldSize][fieldSize];
|
||||
divTable = new int[fieldSize][fieldSize];
|
||||
int value = 1;
|
||||
for (int pow = 0; pow < fieldSize - 1; pow++) {
|
||||
powTable[pow] = value;
|
||||
logTable[value] = pow;
|
||||
value = value * 2;
|
||||
if (value >= fieldSize) {
|
||||
value = value ^ primitivePolynomial;
|
||||
}
|
||||
}
|
||||
// building multiplication table
|
||||
for (int i = 0; i < fieldSize; i++) {
|
||||
for (int j = 0; j < fieldSize; j++) {
|
||||
if (i == 0 || j == 0) {
|
||||
mulTable[i][j] = 0;
|
||||
continue;
|
||||
}
|
||||
int z = logTable[i] + logTable[j];
|
||||
z = z >= primitivePeriod ? z - primitivePeriod : z;
|
||||
z = powTable[z];
|
||||
mulTable[i][j] = z;
|
||||
}
|
||||
}
|
||||
// building division table
|
||||
for (int i = 0; i < fieldSize; i++) {
|
||||
for (int j = 1; j < fieldSize; j++) {
|
||||
if (i == 0) {
|
||||
divTable[i][j] = 0;
|
||||
continue;
|
||||
}
|
||||
int z = logTable[i] - logTable[j];
|
||||
z = z < 0 ? z + primitivePeriod : z;
|
||||
z = powTable[z];
|
||||
divTable[i][j] = z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return number of elements in the field
|
||||
* @return number of elements in the field
|
||||
*/
|
||||
public int getFieldSize() {
|
||||
return fieldSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the primitive polynomial in GF(2)
|
||||
* @return primitive polynomial as a integer
|
||||
*/
|
||||
public int getPrimitivePolynomial() {
|
||||
return primitivePolynomial;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the sum of two fields
|
||||
* @param x input field
|
||||
* @param y input field
|
||||
* @return result of addition
|
||||
*/
|
||||
public int add(int x, int y) {
|
||||
assert(x >= 0 && x < getFieldSize() && y >= 0 && y < getFieldSize());
|
||||
return x ^ y;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the multiplication of two fields
|
||||
* @param x input field
|
||||
* @param y input field
|
||||
* @return result of multiplication
|
||||
*/
|
||||
public int multiply(int x, int y) {
|
||||
assert(x >= 0 && x < getFieldSize() && y >= 0 && y < getFieldSize());
|
||||
return mulTable[x][y];
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the division of two fields
|
||||
* @param x input field
|
||||
* @param y input field
|
||||
* @return x/y
|
||||
*/
|
||||
public int divide(int x, int y) {
|
||||
assert(x >= 0 && x < getFieldSize() && y > 0 && y < getFieldSize());
|
||||
return divTable[x][y];
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute power n of a field
|
||||
* @param x input field
|
||||
* @param n power
|
||||
* @return x^n
|
||||
*/
|
||||
public int power(int x, int n) {
|
||||
assert(x >= 0 && x < getFieldSize());
|
||||
if (n == 0) {
|
||||
return 1;
|
||||
}
|
||||
if (x == 0) {
|
||||
return 0;
|
||||
}
|
||||
x = logTable[x] * n;
|
||||
if (x < primitivePeriod) {
|
||||
return powTable[x];
|
||||
}
|
||||
x = x % primitivePeriod;
|
||||
return powTable[x];
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a Vandermonde matrix V[i][j]=x[j]^i and vector y, solve for z such
|
||||
* that Vz=y. The output z will be placed in y.
|
||||
* @param x the vector which describe the Vandermonde matrix
|
||||
* @param y right-hand side of the Vandermonde system equation.
|
||||
* will be replaced the output in this vector
|
||||
*/
|
||||
public void solveVandermondeSystem(int[] x, int[] y) {
|
||||
solveVandermondeSystem(x, y, x.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a Vandermonde matrix V[i][j]=x[j]^i and vector y, solve for z such
|
||||
* that Vz=y. The output z will be placed in y.
|
||||
* @param x the vector which describe the Vandermonde matrix
|
||||
* @param y right-hand side of the Vandermonde system equation.
|
||||
* will be replaced the output in this vector
|
||||
* @param len consider x and y only from 0...len-1
|
||||
*/
|
||||
public void solveVandermondeSystem(int[] x, int[] y, int len) {
|
||||
assert(y.length <= len);
|
||||
for (int i = 0; i < len - 1; i++) {
|
||||
for (int j = len - 1; j > i; j--) {
|
||||
y[j] = y[j] ^ mulTable[x[i]][y[j - 1]];
|
||||
}
|
||||
}
|
||||
for (int i = len - 1; i >= 0; i--) {
|
||||
for (int j = i + 1; j < len; j++) {
|
||||
y[j] = divTable[y[j]][x[j] ^ x[j - i - 1]];
|
||||
}
|
||||
for (int j = i; j < len - 1; j++) {
|
||||
y[j] = y[j] ^ y[j + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the multiplication of two polynomials. The index in the
|
||||
* array corresponds to the power of the entry. For example p[0] is the
|
||||
* constant term of the polynomial p.
|
||||
* @param p input polynomial
|
||||
* @param q input polynomial
|
||||
* @return polynomial represents p*q
|
||||
*/
|
||||
public int[] multiply(int[] p, int[] q) {
|
||||
int len = p.length + q.length - 1;
|
||||
int[] result = new int[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
result[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < p.length; i++) {
|
||||
for (int j = 0; j < q.length; j++) {
|
||||
result[i + j] = add(result[i + j], multiply(p[i], q[j]));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the remainder of a dividend and divisor pair. The index in the
|
||||
* array corresponds to the power of the entry. For example p[0] is the
|
||||
* constant term of the polynomial p.
|
||||
* @param dividend dividend polynomial, the remainder will be placed here when return
|
||||
* @param divisor divisor polynomial
|
||||
*/
|
||||
public void remainder(int[] dividend, int[] divisor) {
|
||||
for (int i = dividend.length - divisor.length; i >= 0; i--) {
|
||||
int ratio =
|
||||
divTable[dividend[i + divisor.length - 1]][divisor[divisor.length - 1]];
|
||||
for (int j = 0; j < divisor.length; j++) {
|
||||
int k = j + i;
|
||||
dividend[k] = dividend[k] ^ mulTable[ratio][divisor[j]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the sum of two polynomials. The index in the
|
||||
* array corresponds to the power of the entry. For example p[0] is the
|
||||
* constant term of the polynomial p.
|
||||
* @param p input polynomial
|
||||
* @param q input polynomial
|
||||
* @return polynomial represents p+q
|
||||
*/
|
||||
public int[] add(int[] p, int[] q) {
|
||||
int len = Math.max(p.length, q.length);
|
||||
int[] result = new int[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (i < p.length && i < q.length) {
|
||||
result[i] = add(p[i], q[i]);
|
||||
} else if (i < p.length) {
|
||||
result[i] = p[i];
|
||||
} else {
|
||||
result[i] = q[i];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Substitute x into polynomial p(x).
|
||||
* @param p input polynomial
|
||||
* @param x input field
|
||||
* @return p(x)
|
||||
*/
|
||||
public int substitute(int[] p, int x) {
|
||||
int result = 0;
|
||||
int y = 1;
|
||||
for (int i = 0; i < p.length; i++) {
|
||||
result = result ^ mulTable[p[i]][y];
|
||||
y = mulTable[x][y];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform Gaussian elimination on the given matrix. This matrix has to be a
|
||||
* fat matrix (number of rows > number of columns).
|
||||
*/
|
||||
public void gaussianElimination(int[][] matrix) {
|
||||
assert(matrix != null && matrix.length > 0 && matrix[0].length > 0
|
||||
&& matrix.length < matrix[0].length);
|
||||
int height = matrix.length;
|
||||
int width = matrix[0].length;
|
||||
for (int i = 0; i < height; i++) {
|
||||
boolean pivotFound = false;
|
||||
// scan the column for a nonzero pivot and swap it to the diagonal
|
||||
for (int j = i; j < height; j++) {
|
||||
if (matrix[i][j] != 0) {
|
||||
int[] tmp = matrix[i];
|
||||
matrix[i] = matrix[j];
|
||||
matrix[j] = tmp;
|
||||
pivotFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!pivotFound) {
|
||||
continue;
|
||||
}
|
||||
int pivot = matrix[i][i];
|
||||
for (int j = i; j < width; j++) {
|
||||
matrix[i][j] = divide(matrix[i][j], pivot);
|
||||
}
|
||||
for (int j = i + 1; j < height; j++) {
|
||||
int lead = matrix[j][i];
|
||||
for (int k = i; k < width; k++) {
|
||||
matrix[j][k] = add(matrix[j][k], multiply(lead, matrix[i][k]));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = height - 1; i >=0; i--) {
|
||||
for (int j = 0; j < i; j++) {
|
||||
int lead = matrix[j][i];
|
||||
for (int k = i; k < width; k++) {
|
||||
matrix[j][k] = add(matrix[j][k], multiply(lead, matrix[i][k]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,144 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import org.apache.hadoop.util.LineReader;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
/**
|
||||
* Represents the contents of a HAR Index file. The HAR is assumed to be
|
||||
* comprising of RAID parity files only and no directories.
|
||||
*/
|
||||
public class HarIndex {
|
||||
public static final String indexFileName = "_index";
|
||||
private List<IndexEntry> entries = new LinkedList<IndexEntry>();
|
||||
|
||||
/**
|
||||
* Represents information in a single line of the HAR index file.
|
||||
*/
|
||||
public static class IndexEntry {
|
||||
String fileName; // Name of the file in the part file.
|
||||
long startOffset; // Start offset within the part file.
|
||||
long length; // Length of this file within the part file.
|
||||
long mtime; // Modification time of the file.
|
||||
String partFileName; // Name of the part file.
|
||||
|
||||
IndexEntry(String fileName, long startOffset, long length,
|
||||
long mtime, String partFileName) {
|
||||
this.fileName = fileName;
|
||||
this.startOffset = startOffset;
|
||||
this.length = length;
|
||||
this.mtime = mtime;
|
||||
this.partFileName = partFileName;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "fileName=" + fileName +
|
||||
", startOffset=" + startOffset +
|
||||
", length=" + length +
|
||||
", mtime=" + mtime +
|
||||
", partFileName=" + partFileName;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that reads the contents of the index file.
|
||||
* @param in An input stream to the index file.
|
||||
* @param max The size of the index file.
|
||||
* @throws IOException
|
||||
*/
|
||||
public HarIndex(InputStream in, long max) throws IOException {
|
||||
LineReader lineReader = new LineReader(in);
|
||||
Text text = new Text();
|
||||
long nread = 0;
|
||||
while (nread < max) {
|
||||
int n = lineReader.readLine(text);
|
||||
nread += n;
|
||||
String line = text.toString();
|
||||
try {
|
||||
parseLine(line);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new IOException("UnsupportedEncodingException after reading " +
|
||||
nread + "bytes");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses each line and extracts relevant information.
|
||||
* @param line
|
||||
* @throws UnsupportedEncodingException
|
||||
*/
|
||||
void parseLine(String line) throws UnsupportedEncodingException {
|
||||
String[] splits = line.split(" ");
|
||||
|
||||
boolean isDir = "dir".equals(splits[1]) ? true: false;
|
||||
if (!isDir && splits.length >= 6) {
|
||||
String name = URLDecoder.decode(splits[0], "UTF-8");
|
||||
String partName = URLDecoder.decode(splits[2], "UTF-8");
|
||||
long startIndex = Long.parseLong(splits[3]);
|
||||
long length = Long.parseLong(splits[4]);
|
||||
String[] newsplits = URLDecoder.decode(splits[5],"UTF-8").split(" ");
|
||||
if (newsplits != null && newsplits.length >= 4) {
|
||||
long mtime = Long.parseLong(newsplits[0]);
|
||||
IndexEntry entry = new IndexEntry(
|
||||
name, startIndex, length, mtime, partName);
|
||||
entries.add(entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the index entry corresponding to a HAR partFile at an offset.
|
||||
* @param partName The name of the part file (part-*).
|
||||
* @param partFileOffset The offset into the part file.
|
||||
* @return The entry corresponding to partName:partFileOffset.
|
||||
*/
|
||||
public IndexEntry findEntry(String partName, long partFileOffset) {
|
||||
for (IndexEntry e: entries) {
|
||||
boolean nameMatch = partName.equals(e.partFileName);
|
||||
boolean inRange = (partFileOffset >= e.startOffset) &&
|
||||
(partFileOffset < e.startOffset + e.length);
|
||||
if (nameMatch && inRange) {
|
||||
return e;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the index entry corresponding to a file in the archive
|
||||
*/
|
||||
public IndexEntry findEntryByFileName(String fileName) {
|
||||
for (IndexEntry e: entries) {
|
||||
if (fileName.equals(e.fileName)) {
|
||||
return e;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,211 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
/**
|
||||
* Periodically monitors the status of jobs registered with it.
|
||||
*
|
||||
* Jobs that are submitted for the same policy name are kept in the same list,
|
||||
* and the list itself is kept in a map that has the policy name as the key and
|
||||
* the list as value.
|
||||
*/
|
||||
class JobMonitor implements Runnable {
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.JobMonitor");
|
||||
|
||||
volatile boolean running = true;
|
||||
|
||||
private Map<String, List<DistRaid>> jobs;
|
||||
public static final String JOBMONITOR_INTERVAL_KEY = "raid.jobmonitor.interval";
|
||||
private long jobMonitorInterval;
|
||||
private volatile long jobsMonitored = 0;
|
||||
private volatile long jobsSucceeded = 0;
|
||||
|
||||
public JobMonitor(Configuration conf) {
|
||||
jobMonitorInterval = conf.getLong(JOBMONITOR_INTERVAL_KEY, 60000);
|
||||
jobs = new java.util.HashMap<String, List<DistRaid>>();
|
||||
}
|
||||
|
||||
public void run() {
|
||||
while (running) {
|
||||
try {
|
||||
LOG.info("JobMonitor thread continuing to run...");
|
||||
doMonitor();
|
||||
} catch (Throwable e) {
|
||||
LOG.error("JobMonitor encountered exception " +
|
||||
StringUtils.stringifyException(e));
|
||||
// All expected exceptions are caught by doMonitor(). It is better
|
||||
// to exit now, this will prevent RaidNode from submitting more jobs
|
||||
// since the number of running jobs will never decrease.
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Periodically checks status of running map-reduce jobs.
|
||||
*/
|
||||
public void doMonitor() {
|
||||
while (running) {
|
||||
String[] keys = null;
|
||||
// Make a copy of the names of the current jobs.
|
||||
synchronized(jobs) {
|
||||
keys = jobs.keySet().toArray(new String[0]);
|
||||
}
|
||||
|
||||
// Check all the jobs. We do not want to block access to `jobs`
|
||||
// because that will prevent new jobs from being added.
|
||||
// This is safe because JobMonitor.run is the only code that can
|
||||
// remove a job from `jobs`. Thus all elements in `keys` will have
|
||||
// valid values.
|
||||
Map<String, List<DistRaid>> finishedJobs =
|
||||
new HashMap<String, List<DistRaid>>();
|
||||
|
||||
for (String key: keys) {
|
||||
// For each policy being monitored, get the list of jobs running.
|
||||
DistRaid[] jobListCopy = null;
|
||||
synchronized(jobs) {
|
||||
List<DistRaid> jobList = jobs.get(key);
|
||||
synchronized(jobList) {
|
||||
jobListCopy = jobList.toArray(new DistRaid[jobList.size()]);
|
||||
}
|
||||
}
|
||||
// The code that actually contacts the JobTracker is not synchronized,
|
||||
// it uses copies of the list of jobs.
|
||||
for (DistRaid job: jobListCopy) {
|
||||
// Check each running job.
|
||||
try {
|
||||
boolean complete = job.checkComplete();
|
||||
if (complete) {
|
||||
addJob(finishedJobs, key, job);
|
||||
if (job.successful()) {
|
||||
jobsSucceeded++;
|
||||
}
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
// If there was an error, consider the job finished.
|
||||
addJob(finishedJobs, key, job);
|
||||
LOG.error("JobMonitor exception", ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (finishedJobs.size() > 0) {
|
||||
for (String key: finishedJobs.keySet()) {
|
||||
List<DistRaid> finishedJobList = finishedJobs.get(key);
|
||||
// Iterate through finished jobs and remove from jobs.
|
||||
// removeJob takes care of locking.
|
||||
for (DistRaid job: finishedJobList) {
|
||||
removeJob(jobs, key, job);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
Thread.sleep(jobMonitorInterval);
|
||||
} catch (InterruptedException ie) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int runningJobsCount(String key) {
|
||||
int count = 0;
|
||||
synchronized(jobs) {
|
||||
if (jobs.containsKey(key)) {
|
||||
List<DistRaid> jobList = jobs.get(key);
|
||||
synchronized(jobList) {
|
||||
count = jobList.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
public void monitorJob(String key, DistRaid job) {
|
||||
addJob(jobs, key, job);
|
||||
jobsMonitored++;
|
||||
}
|
||||
|
||||
public long jobsMonitored() {
|
||||
return this.jobsMonitored;
|
||||
}
|
||||
|
||||
public long jobsSucceeded() {
|
||||
return this.jobsSucceeded;
|
||||
}
|
||||
|
||||
// For test code
|
||||
int runningJobsCount() {
|
||||
int total = 0;
|
||||
synchronized(jobs) {
|
||||
for (String key: jobs.keySet()) {
|
||||
total += jobs.get(key).size();
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
private static void addJob(Map<String, List<DistRaid>> jobsMap,
|
||||
String jobName, DistRaid job) {
|
||||
synchronized(jobsMap) {
|
||||
List<DistRaid> list = null;
|
||||
if (jobsMap.containsKey(jobName)) {
|
||||
list = jobsMap.get(jobName);
|
||||
} else {
|
||||
list = new LinkedList<DistRaid>();
|
||||
jobsMap.put(jobName, list);
|
||||
}
|
||||
synchronized(list) {
|
||||
list.add(job);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void removeJob(Map<String, List<DistRaid>> jobsMap,
|
||||
String jobName, DistRaid job) {
|
||||
synchronized(jobsMap) {
|
||||
if (jobsMap.containsKey(jobName)) {
|
||||
List<DistRaid> list = jobsMap.get(jobName);
|
||||
synchronized(list) {
|
||||
for (Iterator<DistRaid> it = list.iterator(); it.hasNext(); ) {
|
||||
DistRaid val = it.next();
|
||||
if (val == job) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
if (list.size() == 0) {
|
||||
jobsMap.remove(jobName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,171 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
|
||||
import org.apache.hadoop.net.NetUtils;
|
||||
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
|
||||
import org.apache.hadoop.raid.RaidNode;
|
||||
import org.apache.hadoop.raid.RaidUtils;
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType;
|
||||
|
||||
/**
|
||||
* This class fixes source file blocks using the parity file,
|
||||
* and parity file blocks using the source file.
|
||||
* It periodically fetches the list of corrupt files from the namenode,
|
||||
* and figures out the location of the bad block by reading through
|
||||
* the corrupt file.
|
||||
*/
|
||||
public class LocalBlockFixer extends BlockFixer {
|
||||
public static final Log LOG = LogFactory.getLog(LocalBlockFixer.class);
|
||||
|
||||
private java.util.HashMap<String, java.util.Date> history;
|
||||
|
||||
private BlockFixerHelper helper;
|
||||
|
||||
public LocalBlockFixer(Configuration conf) throws IOException {
|
||||
super(conf);
|
||||
history = new java.util.HashMap<String, java.util.Date>();
|
||||
helper = new BlockFixerHelper(getConf());
|
||||
}
|
||||
|
||||
public void run() {
|
||||
while (running) {
|
||||
try {
|
||||
LOG.info("LocalBlockFixer continuing to run...");
|
||||
doFix();
|
||||
} catch (Exception e) {
|
||||
LOG.error(StringUtils.stringifyException(e));
|
||||
} catch (Error err) {
|
||||
LOG.error("Exiting after encountering " +
|
||||
StringUtils.stringifyException(err));
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void doFix() throws InterruptedException, IOException {
|
||||
while (running) {
|
||||
// Sleep before proceeding to fix files.
|
||||
Thread.sleep(blockFixInterval);
|
||||
|
||||
// Purge history older than the history interval.
|
||||
purgeHistory();
|
||||
|
||||
List<Path> corruptFiles = getCorruptFiles();
|
||||
|
||||
filterUnfixableSourceFiles(corruptFiles.iterator());
|
||||
|
||||
if (corruptFiles.isEmpty()) {
|
||||
// If there are no corrupt files, retry after some time.
|
||||
continue;
|
||||
}
|
||||
LOG.info("Found " + corruptFiles.size() + " corrupt files.");
|
||||
|
||||
helper.sortCorruptFiles(corruptFiles);
|
||||
|
||||
for (Path srcPath: corruptFiles) {
|
||||
if (!running) break;
|
||||
try {
|
||||
boolean fixed = helper.fixFile(srcPath);
|
||||
LOG.info("Adding " + srcPath + " to history");
|
||||
history.put(srcPath.toString(), new java.util.Date());
|
||||
if (fixed) {
|
||||
incrFilesFixed();
|
||||
}
|
||||
} catch (IOException ie) {
|
||||
LOG.error("Hit error while processing " + srcPath +
|
||||
": " + StringUtils.stringifyException(ie));
|
||||
// Do nothing, move on to the next file.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* We maintain history of fixed files because a fixed file may appear in
|
||||
* the list of corrupt files if we loop around too quickly.
|
||||
* This function removes the old items in the history so that we can
|
||||
* recognize files that have actually become corrupt since being fixed.
|
||||
*/
|
||||
void purgeHistory() {
|
||||
java.util.Date cutOff = new java.util.Date(Time.now() -
|
||||
historyInterval);
|
||||
List<String> toRemove = new java.util.ArrayList<String>();
|
||||
|
||||
for (String key: history.keySet()) {
|
||||
java.util.Date item = history.get(key);
|
||||
if (item.before(cutOff)) {
|
||||
toRemove.add(key);
|
||||
}
|
||||
}
|
||||
for (String key: toRemove) {
|
||||
LOG.info("Removing " + key + " from history");
|
||||
history.remove(key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A list of corrupt files as obtained from the namenode
|
||||
*/
|
||||
List<Path> getCorruptFiles() throws IOException {
|
||||
DistributedFileSystem dfs = helper.getDFS(new Path("/"));
|
||||
|
||||
String[] files = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
List<Path> corruptFiles = new LinkedList<Path>();
|
||||
for (String f: files) {
|
||||
Path p = new Path(f);
|
||||
if (!history.containsKey(p.toString())) {
|
||||
corruptFiles.add(p);
|
||||
}
|
||||
}
|
||||
RaidUtils.filterTrash(getConf(), corruptFiles);
|
||||
return corruptFiles;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
||||
|
||||
/**
|
||||
* Implementation of {@link RaidNode} that performs raiding locally.
|
||||
*/
|
||||
public class LocalRaidNode extends RaidNode {
|
||||
|
||||
public static final Log LOG = LogFactory.getLog(LocalRaidNode.class);
|
||||
|
||||
public LocalRaidNode(Configuration conf) throws IOException {
|
||||
super(conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDocs}
|
||||
*/
|
||||
@Override
|
||||
void raidFiles(PolicyInfo info, List<FileStatus> paths) throws IOException {
|
||||
doRaid(conf, info, paths);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDocs}
|
||||
*/
|
||||
@Override
|
||||
int getRunningJobsForPolicy(String policyName) {
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -1,151 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
/**
|
||||
* Wraps over multiple input streams and provides an input stream that is
|
||||
* an XOR of the streams.
|
||||
*/
|
||||
class ParityInputStream extends InputStream {
|
||||
private static final int DEFAULT_BUFSIZE = 5*1024*1024;
|
||||
private InputStream[] streams;
|
||||
private byte[] xor;
|
||||
private byte[] buf;
|
||||
private int bufSize;
|
||||
private long remaining;
|
||||
private int available = 0;
|
||||
private int readPos = 0;
|
||||
|
||||
public ParityInputStream(
|
||||
InputStream[] streams, long parityBlockSize, byte[] buf, byte[] xor) {
|
||||
assert buf.length == xor.length;
|
||||
bufSize = buf.length;
|
||||
this.streams = streams;
|
||||
remaining = parityBlockSize;
|
||||
this.buf = buf;
|
||||
this.xor = xor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
makeAvailable();
|
||||
if (available == 0) {
|
||||
return -1;
|
||||
}
|
||||
int ret = xor[readPos];
|
||||
readPos++;
|
||||
available--;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte b[], int off, int len) throws IOException {
|
||||
makeAvailable();
|
||||
if (available == 0) {
|
||||
return -1;
|
||||
}
|
||||
int ret = Math.min(len, available);
|
||||
for (int i = 0; i < ret; ++i) {
|
||||
b[off+i] = xor[readPos+i];
|
||||
}
|
||||
readPos += ret;
|
||||
available -= ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
for (InputStream i: streams) {
|
||||
i.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send the contents of the stream to the sink.
|
||||
* @param sink
|
||||
* @param reporter
|
||||
* @throws IOException
|
||||
*/
|
||||
public void drain(OutputStream sink, Progressable reporter)
|
||||
throws IOException {
|
||||
|
||||
while (true) {
|
||||
makeAvailable();
|
||||
if (available == 0) {
|
||||
break;
|
||||
}
|
||||
sink.write(xor, readPos, available);
|
||||
available = 0;
|
||||
if (reporter != null) {
|
||||
reporter.progress();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Make some bytes available for reading in the internal buffer.
|
||||
* @throws IOException
|
||||
*/
|
||||
private void makeAvailable() throws IOException {
|
||||
if (available > 0 || remaining <= 0) {
|
||||
return;
|
||||
}
|
||||
// Read some bytes from the first stream.
|
||||
int xorlen = (int)Math.min(remaining, bufSize);
|
||||
readExact(streams[0], xor, xorlen);
|
||||
|
||||
// Read bytes from all the other streams and xor them.
|
||||
for (int i = 1; i < streams.length; i++) {
|
||||
readExact(streams[i], buf, xorlen);
|
||||
|
||||
for (int j = 0; j < xorlen; j++) {
|
||||
xor[j] ^= buf[j];
|
||||
}
|
||||
}
|
||||
|
||||
remaining -= xorlen;
|
||||
available = xorlen;
|
||||
readPos = 0;
|
||||
readPos = 0;
|
||||
}
|
||||
|
||||
private static void readExact(InputStream in, byte[] bufs, int toRead)
|
||||
throws IOException {
|
||||
int tread = 0;
|
||||
while (tread < toRead) {
|
||||
int read = in.read(bufs, tread, toRead - tread);
|
||||
if (read == -1) {
|
||||
// If the stream ends, fill in zeros.
|
||||
Arrays.fill(bufs, tread, toRead, (byte)0);
|
||||
tread = toRead;
|
||||
} else {
|
||||
tread += read;
|
||||
}
|
||||
}
|
||||
assert tread == toRead;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
/**
|
||||
* Thrown when the config file for {@link RaidNode} is malformed.
|
||||
*/
|
||||
public class RaidConfigurationException extends Exception {
|
||||
private static final long serialVersionUID = 4046516718965587999L;
|
||||
|
||||
public RaidConfigurationException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
|
@ -1,259 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
||||
|
||||
public class RaidFilter {
|
||||
static class Statistics {
|
||||
long numRaided = 0;
|
||||
long numTooNew = 0;
|
||||
long sizeTooNew = 0;
|
||||
long numTooSmall = 0;
|
||||
long sizeTooSmall = 0;
|
||||
|
||||
public void aggregate(Statistics other) {
|
||||
this.numRaided += other.numRaided;
|
||||
this.numTooNew += other.numTooNew;
|
||||
this.sizeTooNew += other.sizeTooNew;
|
||||
this.numTooSmall += other.numTooSmall;
|
||||
this.sizeTooSmall += other.sizeTooSmall;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "numRaided = " + numRaided +
|
||||
", numTooNew = " + numTooNew +
|
||||
", sizeTooNew = " + sizeTooNew +
|
||||
", numTooSmall = " + numTooSmall +
|
||||
", sizeTooSmall = " + sizeTooSmall;
|
||||
}
|
||||
}
|
||||
|
||||
static class TimeBasedFilter extends Configured
|
||||
implements DirectoryTraversal.FileFilter {
|
||||
int targetRepl;
|
||||
Path raidDestPrefix;
|
||||
long modTimePeriod;
|
||||
long startTime;
|
||||
Statistics stats = new Statistics();
|
||||
String currentSrcPath = null;
|
||||
long[] modTimePeriods = new long[0];
|
||||
String[] otherSrcPaths = new String[0];
|
||||
|
||||
TimeBasedFilter(Configuration conf, Path destPrefix, int targetRepl,
|
||||
long startTime, long modTimePeriod) {
|
||||
super(conf);
|
||||
this.raidDestPrefix = destPrefix;
|
||||
this.targetRepl = targetRepl;
|
||||
this.startTime = startTime;
|
||||
this.modTimePeriod = modTimePeriod;
|
||||
}
|
||||
|
||||
TimeBasedFilter(Configuration conf,
|
||||
Path destPrefix, PolicyInfo info,
|
||||
List<PolicyInfo> allPolicies, long startTime, Statistics stats) {
|
||||
super(conf);
|
||||
this.raidDestPrefix = destPrefix;
|
||||
this.targetRepl = Integer.parseInt(info.getProperty("targetReplication"));
|
||||
this.modTimePeriod = Long.parseLong(info.getProperty("modTimePeriod"));
|
||||
this.startTime = startTime;
|
||||
this.stats = stats;
|
||||
this.currentSrcPath = info.getSrcPath().toUri().getPath();
|
||||
initializeOtherPaths(allPolicies);
|
||||
}
|
||||
|
||||
private void initializeOtherPaths(List<PolicyInfo> allPolicies) {
|
||||
ArrayList<PolicyInfo> tmp = new ArrayList<PolicyInfo>(allPolicies);
|
||||
// Remove all policies where srcPath <= currentSrcPath or
|
||||
// matchingPrefixLength is < length(currentSrcPath)
|
||||
// The policies remaining are the only ones that could better
|
||||
// select a file chosen by the current policy.
|
||||
for (Iterator<PolicyInfo> it = tmp.iterator(); it.hasNext(); ) {
|
||||
String src = it.next().getSrcPath().toUri().getPath();
|
||||
if (src.compareTo(currentSrcPath) <= 0) {
|
||||
it.remove();
|
||||
continue;
|
||||
}
|
||||
int matchLen = matchingPrefixLength(src, currentSrcPath);
|
||||
if (matchLen < currentSrcPath.length()) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
// Sort in reverse lexicographic order.
|
||||
Collections.sort(tmp, new Comparator() {
|
||||
public int compare(Object o1, Object o2) {
|
||||
return 0 -
|
||||
((PolicyInfo)o1).getSrcPath().toUri().getPath().compareTo(
|
||||
((PolicyInfo)o1).getSrcPath().toUri().getPath());
|
||||
}
|
||||
});
|
||||
otherSrcPaths = new String[tmp.size()];
|
||||
modTimePeriods = new long[otherSrcPaths.length];
|
||||
for (int i = 0; i < otherSrcPaths.length; i++) {
|
||||
otherSrcPaths[i] = tmp.get(i).getSrcPath().toUri().getPath();
|
||||
modTimePeriods[i] = Long.parseLong(
|
||||
tmp.get(i).getProperty("modTimePeriod"));
|
||||
}
|
||||
}
|
||||
|
||||
public boolean check(FileStatus f) throws IOException {
|
||||
if (!canChooseForCurrentPolicy(f)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the source file has fewer than or equal to 2 blocks, then skip it.
|
||||
long blockSize = f.getBlockSize();
|
||||
if (2 * blockSize >= f.getLen()) {
|
||||
stats.numTooSmall++;
|
||||
stats.sizeTooSmall += f.getLen();
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean select = false;
|
||||
try {
|
||||
Object ppair = RaidNode.getParityFile(
|
||||
raidDestPrefix, f.getPath(), getConf());
|
||||
// Is there is a valid parity file?
|
||||
if (ppair != null) {
|
||||
// Is the source at the target replication?
|
||||
if (f.getReplication() != targetRepl) {
|
||||
// Select the file so that its replication can be set.
|
||||
select = true;
|
||||
} else {
|
||||
stats.numRaided++;
|
||||
// Nothing to do, don't select the file.
|
||||
select = false;
|
||||
}
|
||||
} else {
|
||||
// No parity file.
|
||||
if (f.getModificationTime() + modTimePeriod < startTime) {
|
||||
// If the file is not too new, choose it for raiding.
|
||||
select = true;
|
||||
} else {
|
||||
select = false;
|
||||
stats.numTooNew++;
|
||||
stats.sizeTooNew += f.getLen();
|
||||
}
|
||||
}
|
||||
} catch (java.io.FileNotFoundException e) {
|
||||
select = true; // destination file does not exist
|
||||
} catch (java.io.IOException e) {
|
||||
// If there is a problem with the har path, this will let us continue.
|
||||
DirectoryTraversal.LOG.error(
|
||||
"Error while selecting " + StringUtils.stringifyException(e));
|
||||
}
|
||||
return select;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a file can be chosen for the current policy.
|
||||
*/
|
||||
boolean canChooseForCurrentPolicy(FileStatus stat) {
|
||||
boolean choose = true;
|
||||
if (otherSrcPaths.length > 0) {
|
||||
String fileStr = stat.getPath().toUri().getPath();
|
||||
|
||||
// For a given string, find the best matching srcPath.
|
||||
int matchWithCurrent = matchingPrefixLength(fileStr, currentSrcPath);
|
||||
for (int i = 0; i < otherSrcPaths.length; i++) {
|
||||
// If the file is too new, move to the next.
|
||||
if (stat.getModificationTime() > startTime - modTimePeriods[i]) {
|
||||
continue;
|
||||
}
|
||||
int matchLen = matchingPrefixLength(fileStr, otherSrcPaths[i]);
|
||||
if (matchLen > 0 &&
|
||||
fileStr.charAt(matchLen - 1) == Path.SEPARATOR_CHAR) {
|
||||
matchLen--;
|
||||
}
|
||||
if (matchLen > matchWithCurrent) {
|
||||
choose = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return choose;
|
||||
}
|
||||
|
||||
int matchingPrefixLength(final String s1, final String s2) {
|
||||
int len = 0;
|
||||
for (int j = 0; j < s1.length() && j < s2.length(); j++) {
|
||||
if (s1.charAt(j) == s2.charAt(j)) {
|
||||
len++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
static class PreferenceFilter extends Configured
|
||||
implements DirectoryTraversal.FileFilter {
|
||||
Path firstChoicePrefix;
|
||||
DirectoryTraversal.FileFilter secondChoiceFilter;
|
||||
|
||||
PreferenceFilter(Configuration conf,
|
||||
Path firstChoicePrefix, Path secondChoicePrefix,
|
||||
int targetRepl, long startTime, long modTimePeriod) {
|
||||
super(conf);
|
||||
this.firstChoicePrefix = firstChoicePrefix;
|
||||
this.secondChoiceFilter = new TimeBasedFilter(conf,
|
||||
secondChoicePrefix, targetRepl, startTime, modTimePeriod);
|
||||
}
|
||||
|
||||
PreferenceFilter(Configuration conf,
|
||||
Path firstChoicePrefix, Path secondChoicePrefix,
|
||||
PolicyInfo info, List<PolicyInfo> allPolicies, long startTime,
|
||||
Statistics stats) {
|
||||
super(conf);
|
||||
this.firstChoicePrefix = firstChoicePrefix;
|
||||
this.secondChoiceFilter = new TimeBasedFilter(
|
||||
conf, secondChoicePrefix, info, allPolicies, startTime, stats);
|
||||
}
|
||||
|
||||
public boolean check(FileStatus f) throws IOException {
|
||||
Object firstChoicePPair =
|
||||
RaidNode.getParityFile(firstChoicePrefix, f.getPath(), getConf());
|
||||
if (firstChoicePPair == null) {
|
||||
// The decision is upto the the second choice filter.
|
||||
return secondChoiceFilter.check(f);
|
||||
} else {
|
||||
// There is already a parity file under the first choice path.
|
||||
// We dont want to choose this file.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,682 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.net.InetSocketAddress;
|
||||
import javax.security.auth.login.LoginException;
|
||||
|
||||
import org.apache.hadoop.ipc.*;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.net.NetUtils;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.hadoop.io.retry.RetryPolicy;
|
||||
import org.apache.hadoop.io.retry.RetryPolicies;
|
||||
import org.apache.hadoop.io.retry.RetryProxy;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.BlockLocation;
|
||||
import org.apache.hadoop.fs.HarFileSystem;
|
||||
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.DistributedRaidFileSystem;
|
||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
||||
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
||||
import org.apache.hadoop.raid.protocol.PolicyList;
|
||||
import org.apache.hadoop.raid.protocol.RaidProtocol;
|
||||
|
||||
/**
|
||||
* A {@link RaidShell} that allows browsing configured raid policies.
|
||||
*/
|
||||
public class RaidShell extends Configured implements Tool {
|
||||
static {
|
||||
Configuration.addDefaultResource("hdfs-default.xml");
|
||||
Configuration.addDefaultResource("hdfs-site.xml");
|
||||
}
|
||||
public static final Log LOG = LogFactory.getLog( "org.apache.hadoop.RaidShell");
|
||||
public RaidProtocol raidnode;
|
||||
RaidProtocol rpcRaidnode;
|
||||
private UserGroupInformation ugi;
|
||||
volatile boolean clientRunning = true;
|
||||
private Configuration conf;
|
||||
|
||||
/**
|
||||
* Start RaidShell.
|
||||
* <p>
|
||||
* The RaidShell connects to the specified RaidNode and performs basic
|
||||
* configuration options.
|
||||
* @throws IOException
|
||||
*/
|
||||
public RaidShell(Configuration conf) throws IOException {
|
||||
super(conf);
|
||||
this.conf = conf;
|
||||
}
|
||||
|
||||
void initializeRpc(Configuration conf, InetSocketAddress address) throws IOException {
|
||||
this.ugi = UserGroupInformation.getCurrentUser();
|
||||
this.rpcRaidnode = createRPCRaidnode(address, conf, ugi);
|
||||
this.raidnode = createRaidnode(rpcRaidnode);
|
||||
}
|
||||
|
||||
void initializeLocal(Configuration conf) throws IOException {
|
||||
this.ugi = UserGroupInformation.getCurrentUser();
|
||||
}
|
||||
|
||||
public static RaidProtocol createRaidnode(Configuration conf) throws IOException {
|
||||
return createRaidnode(RaidNode.getAddress(conf), conf);
|
||||
}
|
||||
|
||||
public static RaidProtocol createRaidnode(InetSocketAddress raidNodeAddr,
|
||||
Configuration conf) throws IOException {
|
||||
return createRaidnode(createRPCRaidnode(raidNodeAddr, conf,
|
||||
UserGroupInformation.getCurrentUser()));
|
||||
}
|
||||
|
||||
private static RaidProtocol createRPCRaidnode(InetSocketAddress raidNodeAddr,
|
||||
Configuration conf, UserGroupInformation ugi)
|
||||
throws IOException {
|
||||
LOG.debug("RaidShell connecting to " + raidNodeAddr);
|
||||
return (RaidProtocol)RPC.getProxy(RaidProtocol.class,
|
||||
RaidProtocol.versionID, raidNodeAddr, ugi, conf,
|
||||
NetUtils.getSocketFactory(conf, RaidProtocol.class));
|
||||
}
|
||||
|
||||
private static RaidProtocol createRaidnode(RaidProtocol rpcRaidnode)
|
||||
throws IOException {
|
||||
RetryPolicy createPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
|
||||
5, 5000, TimeUnit.MILLISECONDS);
|
||||
|
||||
Map<Class<? extends Exception>,RetryPolicy> remoteExceptionToPolicyMap =
|
||||
new HashMap<Class<? extends Exception>, RetryPolicy>();
|
||||
|
||||
Map<Class<? extends Exception>,RetryPolicy> exceptionToPolicyMap =
|
||||
new HashMap<Class<? extends Exception>, RetryPolicy>();
|
||||
exceptionToPolicyMap.put(RemoteException.class,
|
||||
RetryPolicies.retryByRemoteException(
|
||||
RetryPolicies.TRY_ONCE_THEN_FAIL, remoteExceptionToPolicyMap));
|
||||
RetryPolicy methodPolicy = RetryPolicies.retryByException(
|
||||
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
|
||||
Map<String,RetryPolicy> methodNameToPolicyMap = new HashMap<String,RetryPolicy>();
|
||||
|
||||
methodNameToPolicyMap.put("create", methodPolicy);
|
||||
|
||||
return (RaidProtocol) RetryProxy.create(RaidProtocol.class,
|
||||
rpcRaidnode, methodNameToPolicyMap);
|
||||
}
|
||||
|
||||
private void checkOpen() throws IOException {
|
||||
if (!clientRunning) {
|
||||
IOException result = new IOException("RaidNode closed");
|
||||
throw result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the connection to the raidNode.
|
||||
*/
|
||||
public synchronized void close() throws IOException {
|
||||
if(clientRunning) {
|
||||
clientRunning = false;
|
||||
RPC.stopProxy(rpcRaidnode);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Displays format of commands.
|
||||
*/
|
||||
private static void printUsage(String cmd) {
|
||||
String prefix = "Usage: java " + RaidShell.class.getSimpleName();
|
||||
if ("-showConfig".equals(cmd)) {
|
||||
System.err.println("Usage: java RaidShell" +
|
||||
" [-showConfig]");
|
||||
} else if ("-recover".equals(cmd)) {
|
||||
System.err.println("Usage: java RaidShell" +
|
||||
" [-recover srcPath1 corruptOffset]");
|
||||
} else if ("-recoverBlocks".equals(cmd)) {
|
||||
System.err.println("Usage: java RaidShell" +
|
||||
" [-recoverBlocks path1 path2...]");
|
||||
} else {
|
||||
System.err.println("Usage: java RaidShell");
|
||||
System.err.println(" [-showConfig ]");
|
||||
System.err.println(" [-help [cmd]]");
|
||||
System.err.println(" [-recover srcPath1 corruptOffset]");
|
||||
System.err.println(" [-recoverBlocks path1 path2...]");
|
||||
System.err.println(" [-fsck [path]]");
|
||||
System.err.println();
|
||||
ToolRunner.printGenericCommandUsage(System.err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* run
|
||||
*/
|
||||
public int run(String argv[]) throws Exception {
|
||||
|
||||
if (argv.length < 1) {
|
||||
printUsage("");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int exitCode = -1;
|
||||
int i = 0;
|
||||
String cmd = argv[i++];
|
||||
//
|
||||
// verify that we have enough command line parameters
|
||||
//
|
||||
if ("-showConfig".equals(cmd)) {
|
||||
if (argv.length < 1) {
|
||||
printUsage(cmd);
|
||||
return exitCode;
|
||||
}
|
||||
} else if ("-recover".equals(cmd)) {
|
||||
if (argv.length < 3) {
|
||||
printUsage(cmd);
|
||||
return exitCode;
|
||||
}
|
||||
} else if ("-fsck".equals(cmd)) {
|
||||
if ((argv.length < 1) || (argv.length > 2)) {
|
||||
printUsage(cmd);
|
||||
return exitCode;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if ("-showConfig".equals(cmd)) {
|
||||
initializeRpc(conf, RaidNode.getAddress(conf));
|
||||
exitCode = showConfig(cmd, argv, i);
|
||||
} else if ("-recover".equals(cmd)) {
|
||||
initializeRpc(conf, RaidNode.getAddress(conf));
|
||||
exitCode = recoverAndPrint(cmd, argv, i);
|
||||
} else if ("-recoverBlocks".equals(cmd)) {
|
||||
initializeLocal(conf);
|
||||
recoverBlocks(argv, i);
|
||||
exitCode = 0;
|
||||
} else if ("-fsck".equals(cmd)) {
|
||||
if (argv.length == 1) {
|
||||
// if there are no args, check the whole file system
|
||||
exitCode = fsck("/");
|
||||
} else {
|
||||
// argv.length == 2
|
||||
// otherwise, check the path passed
|
||||
exitCode = fsck(argv[1]);
|
||||
}
|
||||
} else {
|
||||
exitCode = -1;
|
||||
System.err.println(cmd.substring(1) + ": Unknown command");
|
||||
printUsage("");
|
||||
}
|
||||
} catch (IllegalArgumentException arge) {
|
||||
exitCode = -1;
|
||||
System.err.println(cmd.substring(1) + ": " + arge.getLocalizedMessage());
|
||||
printUsage(cmd);
|
||||
} catch (RemoteException e) {
|
||||
//
|
||||
// This is a error returned by raidnode server. Print
|
||||
// out the first line of the error mesage, ignore the stack trace.
|
||||
exitCode = -1;
|
||||
try {
|
||||
String[] content;
|
||||
content = e.getLocalizedMessage().split("\n");
|
||||
System.err.println(cmd.substring(1) + ": " +
|
||||
content[0]);
|
||||
} catch (Exception ex) {
|
||||
System.err.println(cmd.substring(1) + ": " +
|
||||
ex.getLocalizedMessage());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
//
|
||||
// IO exception encountered locally.
|
||||
//
|
||||
exitCode = -1;
|
||||
System.err.println(cmd.substring(1) + ": " +
|
||||
e.getLocalizedMessage());
|
||||
} catch (Exception re) {
|
||||
exitCode = -1;
|
||||
System.err.println(cmd.substring(1) + ": " + re.getLocalizedMessage());
|
||||
} finally {
|
||||
}
|
||||
return exitCode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply operation specified by 'cmd' on all parameters
|
||||
* starting from argv[startindex].
|
||||
*/
|
||||
private int showConfig(String cmd, String argv[], int startindex) throws IOException {
|
||||
int exitCode = 0;
|
||||
int i = startindex;
|
||||
PolicyList[] all = raidnode.getAllPolicies();
|
||||
for (PolicyList list: all) {
|
||||
for (PolicyInfo p : list.getAll()) {
|
||||
System.out.println(p);
|
||||
}
|
||||
}
|
||||
return exitCode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recovers the specified path from the parity file
|
||||
*/
|
||||
public Path[] recover(String cmd, String argv[], int startindex)
|
||||
throws IOException {
|
||||
Path[] paths = new Path[(argv.length - startindex) / 2];
|
||||
int j = 0;
|
||||
for (int i = startindex; i < argv.length; i = i + 2) {
|
||||
String path = argv[i];
|
||||
long corruptOffset = Long.parseLong(argv[i+1]);
|
||||
LOG.info("RaidShell recoverFile for " + path + " corruptOffset " + corruptOffset);
|
||||
Path recovered = new Path("/tmp/recovered." + Time.now());
|
||||
FileSystem fs = recovered.getFileSystem(conf);
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fs;
|
||||
Configuration raidConf = new Configuration(conf);
|
||||
raidConf.set("fs.hdfs.impl",
|
||||
"org.apache.hadoop.hdfs.DistributedRaidFileSystem");
|
||||
raidConf.set("fs.raid.underlyingfs.impl",
|
||||
"org.apache.hadoop.hdfs.DistributedFileSystem");
|
||||
raidConf.setBoolean("fs.hdfs.impl.disable.cache", true);
|
||||
java.net.URI dfsUri = dfs.getUri();
|
||||
FileSystem raidFs = FileSystem.get(dfsUri, raidConf);
|
||||
FileUtil.copy(raidFs, new Path(path), fs, recovered, false, conf);
|
||||
|
||||
paths[j] = recovered;
|
||||
LOG.info("Raidshell created recovery file " + paths[j]);
|
||||
j++;
|
||||
}
|
||||
return paths;
|
||||
}
|
||||
|
||||
public int recoverAndPrint(String cmd, String argv[], int startindex)
|
||||
throws IOException {
|
||||
int exitCode = 0;
|
||||
for (Path p : recover(cmd,argv,startindex)) {
|
||||
System.out.println(p);
|
||||
}
|
||||
return exitCode;
|
||||
}
|
||||
|
||||
public void recoverBlocks(String[] args, int startIndex)
|
||||
throws IOException {
|
||||
LOG.debug("Recovering blocks for " + (args.length - startIndex) + " files");
|
||||
BlockFixer.BlockFixerHelper fixer = new BlockFixer.BlockFixerHelper(conf);
|
||||
for (int i = startIndex; i < args.length; i++) {
|
||||
String path = args[i];
|
||||
fixer.fixFile(new Path(path));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks whether a file has more than the allowable number of
|
||||
* corrupt blocks and must therefore be considered corrupt
|
||||
*/
|
||||
private boolean isFileCorrupt(final DistributedFileSystem dfs,
|
||||
final Path filePath)
|
||||
throws IOException {
|
||||
// corruptBlocksPerStripe:
|
||||
// map stripe # -> # of corrupt blocks in that stripe (data + parity)
|
||||
HashMap<Integer, Integer> corruptBlocksPerStripe =
|
||||
new LinkedHashMap<Integer, Integer>();
|
||||
|
||||
// read conf
|
||||
final int stripeBlocks = RaidNode.getStripeLength(conf);
|
||||
|
||||
// figure out which blocks are missing/corrupted
|
||||
final FileStatus fileStatus = dfs.getFileStatus(filePath);
|
||||
final long blockSize = fileStatus.getBlockSize();
|
||||
final long fileLength = fileStatus.getLen();
|
||||
final long fileLengthInBlocks = (fileLength / blockSize) +
|
||||
(((fileLength % blockSize) == 0) ? 0L : 1L);
|
||||
final long fileStripes = (fileLengthInBlocks / stripeBlocks) +
|
||||
(((fileLengthInBlocks % stripeBlocks) == 0) ? 0L : 1L);
|
||||
final BlockLocation[] fileBlocks =
|
||||
dfs.getFileBlockLocations(fileStatus, 0, fileLength);
|
||||
|
||||
// figure out which stripes these corrupted blocks belong to
|
||||
for (BlockLocation fileBlock: fileBlocks) {
|
||||
int blockNo = (int) (fileBlock.getOffset() / blockSize);
|
||||
final int stripe = (int) (blockNo / stripeBlocks);
|
||||
if (fileBlock.isCorrupt() ||
|
||||
(fileBlock.getNames().length == 0 && fileBlock.getLength() > 0)) {
|
||||
if (corruptBlocksPerStripe.get(stripe) == null) {
|
||||
corruptBlocksPerStripe.put(stripe, 1);
|
||||
} else {
|
||||
corruptBlocksPerStripe.put(stripe, corruptBlocksPerStripe.
|
||||
get(stripe) + 1);
|
||||
}
|
||||
LOG.debug("file " + filePath.toString() + " corrupt in block " +
|
||||
blockNo + "/" + fileLengthInBlocks + ", stripe " + stripe +
|
||||
"/" + fileStripes);
|
||||
} else {
|
||||
LOG.debug("file " + filePath.toString() + " OK in block " + blockNo +
|
||||
"/" + fileLengthInBlocks + ", stripe " + stripe + "/" +
|
||||
fileStripes);
|
||||
}
|
||||
}
|
||||
|
||||
RaidInfo raidInfo = getFileRaidInfo(dfs, filePath);
|
||||
|
||||
// now check parity blocks
|
||||
if (raidInfo.raidType != RaidType.NONE) {
|
||||
checkParityBlocks(filePath, corruptBlocksPerStripe, blockSize,
|
||||
fileStripes, raidInfo);
|
||||
}
|
||||
|
||||
final int maxCorruptBlocksPerStripe = raidInfo.parityBlocksPerStripe;
|
||||
|
||||
for (int corruptBlocksInStripe: corruptBlocksPerStripe.values()) {
|
||||
if (corruptBlocksInStripe > maxCorruptBlocksPerStripe) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* holds the type of raid used for a particular file
|
||||
*/
|
||||
private enum RaidType {
|
||||
XOR,
|
||||
RS,
|
||||
NONE
|
||||
}
|
||||
|
||||
/**
|
||||
* holds raid type and parity file pair
|
||||
*/
|
||||
private class RaidInfo {
|
||||
public RaidInfo(final RaidType raidType,
|
||||
final RaidNode.ParityFilePair parityPair,
|
||||
final int parityBlocksPerStripe) {
|
||||
this.raidType = raidType;
|
||||
this.parityPair = parityPair;
|
||||
this.parityBlocksPerStripe = parityBlocksPerStripe;
|
||||
}
|
||||
public final RaidType raidType;
|
||||
public final RaidNode.ParityFilePair parityPair;
|
||||
public final int parityBlocksPerStripe;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the raid for a given file
|
||||
*/
|
||||
private RaidInfo getFileRaidInfo(final DistributedFileSystem dfs,
|
||||
final Path filePath)
|
||||
throws IOException {
|
||||
// now look for the parity file
|
||||
Path destPath = null;
|
||||
RaidNode.ParityFilePair ppair = null;
|
||||
try {
|
||||
// look for xor parity file first
|
||||
destPath = RaidNode.xorDestinationPath(conf);
|
||||
ppair = RaidNode.getParityFile(destPath, filePath, conf);
|
||||
} catch (FileNotFoundException ignore) {
|
||||
}
|
||||
if (ppair != null) {
|
||||
return new RaidInfo(RaidType.XOR, ppair, 1);
|
||||
} else {
|
||||
// failing that, look for rs parity file
|
||||
try {
|
||||
destPath = RaidNode.rsDestinationPath(conf);
|
||||
ppair = RaidNode.getParityFile(destPath, filePath, conf);
|
||||
} catch (FileNotFoundException ignore) {
|
||||
}
|
||||
if (ppair != null) {
|
||||
return new RaidInfo(RaidType.RS, ppair, RaidNode.rsParityLength(conf));
|
||||
} else {
|
||||
return new RaidInfo(RaidType.NONE, null, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the parity blocks corresponding to file
|
||||
* returns the parity blocks in case of DFS
|
||||
* and the part blocks containing parity blocks
|
||||
* in case of HAR FS
|
||||
*/
|
||||
private BlockLocation[] getParityBlocks(final Path filePath,
|
||||
final long blockSize,
|
||||
final long fileStripes,
|
||||
final RaidInfo raidInfo)
|
||||
throws IOException {
|
||||
|
||||
|
||||
final String parityPathStr = raidInfo.parityPair.getPath().toUri().
|
||||
getPath();
|
||||
FileSystem parityFS = raidInfo.parityPair.getFileSystem();
|
||||
|
||||
// get parity file metadata
|
||||
FileStatus parityFileStatus = parityFS.
|
||||
getFileStatus(new Path(parityPathStr));
|
||||
long parityFileLength = parityFileStatus.getLen();
|
||||
|
||||
if (parityFileLength != fileStripes * raidInfo.parityBlocksPerStripe *
|
||||
blockSize) {
|
||||
throw new IOException("expected parity file of length" +
|
||||
(fileStripes * raidInfo.parityBlocksPerStripe *
|
||||
blockSize) +
|
||||
" but got parity file of length " +
|
||||
parityFileLength);
|
||||
}
|
||||
|
||||
BlockLocation[] parityBlocks =
|
||||
parityFS.getFileBlockLocations(parityFileStatus, 0L, parityFileLength);
|
||||
|
||||
if (parityFS instanceof DistributedFileSystem ||
|
||||
parityFS instanceof DistributedRaidFileSystem) {
|
||||
long parityBlockSize = parityFileStatus.getBlockSize();
|
||||
if (parityBlockSize != blockSize) {
|
||||
throw new IOException("file block size is " + blockSize +
|
||||
" but parity file block size is " +
|
||||
parityBlockSize);
|
||||
}
|
||||
} else if (parityFS instanceof HarFileSystem) {
|
||||
LOG.debug("HAR FS found");
|
||||
} else {
|
||||
LOG.warn("parity file system is not of a supported type");
|
||||
}
|
||||
|
||||
return parityBlocks;
|
||||
}
|
||||
|
||||
/**
|
||||
* checks the parity blocks for a given file and modifies
|
||||
* corruptBlocksPerStripe accordingly
|
||||
*/
|
||||
private void checkParityBlocks(final Path filePath,
|
||||
final HashMap<Integer, Integer>
|
||||
corruptBlocksPerStripe,
|
||||
final long blockSize,
|
||||
final long fileStripes,
|
||||
final RaidInfo raidInfo)
|
||||
throws IOException {
|
||||
|
||||
// get the blocks of the parity file
|
||||
// because of har, multiple blocks may be returned as one container block
|
||||
BlockLocation[] containerBlocks = getParityBlocks(filePath, blockSize,
|
||||
fileStripes, raidInfo);
|
||||
|
||||
long parityStripeLength = blockSize *
|
||||
((long) raidInfo.parityBlocksPerStripe);
|
||||
|
||||
long parityFileLength = parityStripeLength * fileStripes;
|
||||
|
||||
long parityBlocksFound = 0L;
|
||||
|
||||
for (BlockLocation cb: containerBlocks) {
|
||||
if (cb.getLength() % blockSize != 0) {
|
||||
throw new IOException("container block size is not " +
|
||||
"multiple of parity block size");
|
||||
}
|
||||
int blocksInContainer = (int) (cb.getLength() / blockSize);
|
||||
LOG.debug("found container with offset " + cb.getOffset() +
|
||||
", length " + cb.getLength());
|
||||
|
||||
for (long offset = cb.getOffset();
|
||||
offset < cb.getOffset() + cb.getLength();
|
||||
offset += blockSize) {
|
||||
long block = offset / blockSize;
|
||||
|
||||
int stripe = (int) (offset / parityStripeLength);
|
||||
|
||||
if (stripe < 0) {
|
||||
// before the beginning of the parity file
|
||||
continue;
|
||||
}
|
||||
if (stripe >= fileStripes) {
|
||||
// past the end of the parity file
|
||||
break;
|
||||
}
|
||||
|
||||
parityBlocksFound++;
|
||||
|
||||
if (cb.isCorrupt() ||
|
||||
(cb.getNames().length == 0 && cb.getLength() > 0)) {
|
||||
LOG.debug("parity file for " + filePath.toString() +
|
||||
" corrupt in block " + block +
|
||||
", stripe " + stripe + "/" + fileStripes);
|
||||
|
||||
if (corruptBlocksPerStripe.get(stripe) == null) {
|
||||
corruptBlocksPerStripe.put(stripe, 1);
|
||||
} else {
|
||||
corruptBlocksPerStripe.put(stripe,
|
||||
corruptBlocksPerStripe.get(stripe) +
|
||||
1);
|
||||
}
|
||||
} else {
|
||||
LOG.debug("parity file for " + filePath.toString() +
|
||||
" OK in block " + block +
|
||||
", stripe " + stripe + "/" + fileStripes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
long parityBlocksExpected = raidInfo.parityBlocksPerStripe * fileStripes;
|
||||
if (parityBlocksFound != parityBlocksExpected ) {
|
||||
throw new IOException("expected " + parityBlocksExpected +
|
||||
" parity blocks but got " + parityBlocksFound);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* checks the raided file system, prints a list of corrupt files to
|
||||
* System.out and returns the number of corrupt files
|
||||
*/
|
||||
public int fsck(final String path) throws IOException {
|
||||
|
||||
FileSystem fs = (new Path(path)).getFileSystem(conf);
|
||||
|
||||
// if we got a raid fs, get the underlying fs
|
||||
if (fs instanceof DistributedRaidFileSystem) {
|
||||
fs = ((DistributedRaidFileSystem) fs).getFileSystem();
|
||||
}
|
||||
|
||||
// check that we have a distributed fs
|
||||
if (!(fs instanceof DistributedFileSystem)) {
|
||||
throw new IOException("expected DistributedFileSystem but got " +
|
||||
fs.getClass().getName());
|
||||
}
|
||||
final DistributedFileSystem dfs = (DistributedFileSystem) fs;
|
||||
|
||||
// get conf settings
|
||||
String xorPrefix = RaidNode.xorDestinationPath(conf).toUri().getPath();
|
||||
String rsPrefix = RaidNode.rsDestinationPath(conf).toUri().getPath();
|
||||
if (!xorPrefix.endsWith("/")) {
|
||||
xorPrefix = xorPrefix + "/";
|
||||
}
|
||||
if (!rsPrefix.endsWith("/")) {
|
||||
rsPrefix = rsPrefix + "/";
|
||||
}
|
||||
LOG.debug("prefixes: " + xorPrefix + ", " + rsPrefix);
|
||||
|
||||
// get a list of corrupted files (not considering parity blocks just yet)
|
||||
// from the name node
|
||||
// these are the only files we need to consider:
|
||||
// if a file has no corrupted data blocks, it is OK even if some
|
||||
// of its parity blocks are corrupted, so no further checking is
|
||||
// necessary
|
||||
final String[] files = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
final List<Path> corruptFileCandidates = new LinkedList<Path>();
|
||||
for (final String f: files) {
|
||||
final Path p = new Path(f);
|
||||
// if this file is a parity file
|
||||
// or if it does not start with the specified path,
|
||||
// ignore it
|
||||
if (!p.toString().startsWith(xorPrefix) &&
|
||||
!p.toString().startsWith(rsPrefix) &&
|
||||
p.toString().startsWith(path)) {
|
||||
corruptFileCandidates.add(p);
|
||||
}
|
||||
}
|
||||
// filter files marked for deletion
|
||||
RaidUtils.filterTrash(conf, corruptFileCandidates);
|
||||
|
||||
int numberOfCorruptFiles = 0;
|
||||
|
||||
for (final Path corruptFileCandidate: corruptFileCandidates) {
|
||||
if (isFileCorrupt(dfs, corruptFileCandidate)) {
|
||||
System.out.println(corruptFileCandidate.toString());
|
||||
numberOfCorruptFiles++;
|
||||
}
|
||||
}
|
||||
|
||||
return numberOfCorruptFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* main() has some simple utility methods
|
||||
*/
|
||||
public static void main(String argv[]) throws Exception {
|
||||
RaidShell shell = null;
|
||||
try {
|
||||
shell = new RaidShell(new Configuration());
|
||||
int res = ToolRunner.run(shell, argv);
|
||||
System.exit(res);
|
||||
} catch (RPC.VersionMismatch v) {
|
||||
System.err.println("Version Mismatch between client and server" +
|
||||
"... command aborted.");
|
||||
System.exit(-1);
|
||||
} catch (IOException e) {
|
||||
System.err.
|
||||
println("Bad connection to RaidNode or NameNode. command aborted.");
|
||||
System.err.println(e.getMessage());
|
||||
System.exit(-1);
|
||||
} finally {
|
||||
shell.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,171 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.PositionedReadable;
|
||||
import org.apache.hadoop.fs.Seekable;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
public class RaidUtils {
|
||||
/**
|
||||
* A {@link Progressable} that does nothing.
|
||||
*
|
||||
* We could have used Reporter.NULL here but that would introduce
|
||||
* a dependency on mapreduce.
|
||||
*/
|
||||
public static class DummyProgressable implements Progressable {
|
||||
/**
|
||||
* Do nothing.
|
||||
*/
|
||||
@Override
|
||||
public void progress() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes files matching the trash file pattern.
|
||||
*/
|
||||
public static void filterTrash(Configuration conf, List<Path> files) {
|
||||
// Remove files under Trash.
|
||||
String trashPattern = conf.get("raid.blockfixer.trash.pattern",
|
||||
"^/user/.*/\\.Trash.*");
|
||||
for (Iterator<Path> it = files.iterator(); it.hasNext(); ) {
|
||||
String pathStr = it.next().toString();
|
||||
if (Pattern.matches(trashPattern, pathStr)) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void readTillEnd(InputStream in, byte[] buf, boolean eofOK)
|
||||
throws IOException {
|
||||
int toRead = buf.length;
|
||||
int numRead = 0;
|
||||
while (numRead < toRead) {
|
||||
int nread = in.read(buf, numRead, toRead - numRead);
|
||||
if (nread < 0) {
|
||||
if (eofOK) {
|
||||
// EOF hit, fill with zeros
|
||||
Arrays.fill(buf, numRead, toRead, (byte)0);
|
||||
numRead = toRead;
|
||||
} else {
|
||||
// EOF hit, throw.
|
||||
throw new IOException("Premature EOF");
|
||||
}
|
||||
} else {
|
||||
numRead += nread;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void copyBytes(
|
||||
InputStream in, OutputStream out, byte[] buf, long count)
|
||||
throws IOException {
|
||||
for (long bytesRead = 0; bytesRead < count; ) {
|
||||
int toRead = Math.min(buf.length, (int)(count - bytesRead));
|
||||
IOUtils.readFully(in, buf, 0, toRead);
|
||||
bytesRead += toRead;
|
||||
out.write(buf, 0, toRead);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ZeroInputStream extends InputStream
|
||||
implements Seekable, PositionedReadable {
|
||||
private long endOffset;
|
||||
private long pos;
|
||||
|
||||
public ZeroInputStream(long endOffset) {
|
||||
this.endOffset = endOffset;
|
||||
this.pos = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
if (pos < endOffset) {
|
||||
pos++;
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int available() throws IOException {
|
||||
return (int)(endOffset - pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getPos() throws IOException {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(long seekOffset) throws IOException {
|
||||
if (seekOffset < endOffset) {
|
||||
pos = seekOffset;
|
||||
} else {
|
||||
throw new IOException("Illegal Offset" + pos);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(long position, byte[] buffer, int offset, int length)
|
||||
throws IOException {
|
||||
int count = 0;
|
||||
for (; position < endOffset && count < length; position++) {
|
||||
buffer[offset + count] = 0;
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFully(long position, byte[] buffer, int offset, int length)
|
||||
throws IOException {
|
||||
int count = 0;
|
||||
for (; position < endOffset && count < length; position++) {
|
||||
buffer[offset + count] = 0;
|
||||
count++;
|
||||
}
|
||||
if (count < length) {
|
||||
throw new IOException("Premature EOF");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFully(long position, byte[] buffer) throws IOException {
|
||||
readFully(position, buffer, 0, buffer.length);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,183 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
public class ReedSolomonCode implements ErasureCode {
|
||||
|
||||
private final int stripeSize;
|
||||
private final int paritySize;
|
||||
private final int[] generatingPolynomial;
|
||||
private final int PRIMITIVE_ROOT = 2;
|
||||
private final int[] primitivePower;
|
||||
private final GaloisField GF = GaloisField.getInstance();
|
||||
private int[] errSignature;
|
||||
private final int[] paritySymbolLocations;
|
||||
private final int[] dataBuff;
|
||||
|
||||
public ReedSolomonCode(int stripeSize, int paritySize) {
|
||||
assert(stripeSize + paritySize < GF.getFieldSize());
|
||||
this.stripeSize = stripeSize;
|
||||
this.paritySize = paritySize;
|
||||
this.errSignature = new int[paritySize];
|
||||
this.paritySymbolLocations = new int[paritySize];
|
||||
this.dataBuff = new int[paritySize + stripeSize];
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
paritySymbolLocations[i] = i;
|
||||
}
|
||||
|
||||
this.primitivePower = new int[stripeSize + paritySize];
|
||||
// compute powers of the primitive root
|
||||
for (int i = 0; i < stripeSize + paritySize; i++) {
|
||||
primitivePower[i] = GF.power(PRIMITIVE_ROOT, i);
|
||||
}
|
||||
// compute generating polynomial
|
||||
int[] gen = {1};
|
||||
int[] poly = new int[2];
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
poly[0] = primitivePower[i];
|
||||
poly[1] = 1;
|
||||
gen = GF.multiply(gen, poly);
|
||||
}
|
||||
// generating polynomial has all generating roots
|
||||
generatingPolynomial = gen;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encode(int[] message, int[] parity) {
|
||||
assert(message.length == stripeSize && parity.length == paritySize);
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
dataBuff[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < stripeSize; i++) {
|
||||
dataBuff[i + paritySize] = message[i];
|
||||
}
|
||||
GF.remainder(dataBuff, generatingPolynomial);
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
parity[i] = dataBuff[i];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void decode(int[] data, int[] erasedLocation, int[] erasedValue) {
|
||||
if (erasedLocation.length == 0) {
|
||||
return;
|
||||
}
|
||||
assert(erasedLocation.length == erasedValue.length);
|
||||
for (int i = 0; i < erasedLocation.length; i++) {
|
||||
data[erasedLocation[i]] = 0;
|
||||
}
|
||||
for (int i = 0; i < erasedLocation.length; i++) {
|
||||
errSignature[i] = primitivePower[erasedLocation[i]];
|
||||
erasedValue[i] = GF.substitute(data, primitivePower[i]);
|
||||
}
|
||||
GF.solveVandermondeSystem(errSignature, erasedValue, erasedLocation.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int stripeSize() {
|
||||
return this.stripeSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int paritySize() {
|
||||
return this.paritySize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int symbolSize() {
|
||||
return (int) Math.round(Math.log(GF.getFieldSize()) / Math.log(2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Given parity symbols followed by message symbols, return the locations of
|
||||
* symbols that are corrupted. Can resolve up to (parity length / 2) error
|
||||
* locations.
|
||||
* @param data The message and parity. The parity should be placed in the
|
||||
* first part of the array. In each integer, the relevant portion
|
||||
* is present in the least significant bits of each int.
|
||||
* The number of elements in data is stripeSize() + paritySize().
|
||||
* <b>Note that data may be changed after calling this method.</b>
|
||||
* @param errorLocations The set to put the error location results
|
||||
* @return true If the locations can be resolved, return true.
|
||||
*/
|
||||
public boolean computeErrorLocations(int[] data,
|
||||
Set<Integer> errorLocations) {
|
||||
assert(data.length == paritySize + stripeSize && errorLocations != null);
|
||||
errorLocations.clear();
|
||||
int maxError = paritySize / 2;
|
||||
int[][] syndromeMatrix = new int[maxError][];
|
||||
for (int i = 0; i < syndromeMatrix.length; ++i) {
|
||||
syndromeMatrix[i] = new int[maxError + 1];
|
||||
}
|
||||
int[] syndrome = new int[paritySize];
|
||||
|
||||
if (computeSyndrome(data, syndrome)) {
|
||||
// Parity check OK. No error location added.
|
||||
return true;
|
||||
}
|
||||
for (int i = 0; i < maxError; ++i) {
|
||||
for (int j = 0; j < maxError + 1; ++j) {
|
||||
syndromeMatrix[i][j] = syndrome[i + j];
|
||||
}
|
||||
}
|
||||
GF.gaussianElimination(syndromeMatrix);
|
||||
int[] polynomial = new int[maxError + 1];
|
||||
polynomial[0] = 1;
|
||||
for (int i = 0; i < maxError; ++i) {
|
||||
polynomial[i + 1] = syndromeMatrix[maxError - 1 - i][maxError];
|
||||
}
|
||||
for (int i = 0; i < paritySize + stripeSize; ++i) {
|
||||
int possibleRoot = GF.divide(1, primitivePower[i]);
|
||||
if (GF.substitute(polynomial, possibleRoot) == 0) {
|
||||
errorLocations.add(i);
|
||||
}
|
||||
}
|
||||
// Now recover with error locations and check the syndrome again
|
||||
int[] locations = new int[errorLocations.size()];
|
||||
int k = 0;
|
||||
for (int loc : errorLocations) {
|
||||
locations[k++] = loc;
|
||||
}
|
||||
int [] erasedValue = new int[locations.length];
|
||||
decode(data, locations, erasedValue);
|
||||
for (int i = 0; i < locations.length; ++i) {
|
||||
data[locations[i]] = erasedValue[i];
|
||||
}
|
||||
return computeSyndrome(data, syndrome);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the syndrome of the input [parity, message]
|
||||
* @param data [parity, message]
|
||||
* @param syndrome The syndromes (checksums) of the data
|
||||
* @return true If syndromes are all zeros
|
||||
*/
|
||||
private boolean computeSyndrome(int[] data, int [] syndrome) {
|
||||
boolean corruptionFound = false;
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
syndrome[i] = GF.substitute(data, primitivePower[i]);
|
||||
if (syndrome[i] != 0) {
|
||||
corruptionFound = true;
|
||||
}
|
||||
}
|
||||
return !corruptionFound;
|
||||
}
|
||||
}
|
|
@ -1,226 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.ChecksumException;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.BlockMissingException;
|
||||
|
||||
public class ReedSolomonDecoder extends Decoder {
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.ReedSolomonDecoder");
|
||||
private ErasureCode reedSolomonCode;
|
||||
|
||||
public ReedSolomonDecoder(
|
||||
Configuration conf, int stripeSize, int paritySize) {
|
||||
super(conf, stripeSize, paritySize);
|
||||
this.reedSolomonCode = new ReedSolomonCode(stripeSize, paritySize);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void fixErasedBlock(
|
||||
FileSystem fs, Path srcFile,
|
||||
FileSystem parityFs, Path parityFile,
|
||||
long blockSize, long errorOffset, long bytesToSkip, long limit,
|
||||
OutputStream out) throws IOException {
|
||||
FSDataInputStream[] inputs = new FSDataInputStream[stripeSize + paritySize];
|
||||
int[] erasedLocations = buildInputs(fs, srcFile, parityFs, parityFile,
|
||||
errorOffset, inputs);
|
||||
int blockIdxInStripe = ((int)(errorOffset/blockSize)) % stripeSize;
|
||||
int erasedLocationToFix = paritySize + blockIdxInStripe;
|
||||
writeFixedBlock(inputs, erasedLocations, erasedLocationToFix,
|
||||
bytesToSkip, limit, out);
|
||||
}
|
||||
|
||||
protected int[] buildInputs(FileSystem fs, Path srcFile,
|
||||
FileSystem parityFs, Path parityFile,
|
||||
long errorOffset, FSDataInputStream[] inputs)
|
||||
throws IOException {
|
||||
LOG.info("Building inputs to recover block starting at " + errorOffset);
|
||||
FileStatus srcStat = fs.getFileStatus(srcFile);
|
||||
long blockSize = srcStat.getBlockSize();
|
||||
long blockIdx = (int)(errorOffset / blockSize);
|
||||
long stripeIdx = blockIdx / stripeSize;
|
||||
LOG.info("FileSize = " + srcStat.getLen() + ", blockSize = " + blockSize +
|
||||
", blockIdx = " + blockIdx + ", stripeIdx = " + stripeIdx);
|
||||
ArrayList<Integer> erasedLocations = new ArrayList<Integer>();
|
||||
// First open streams to the parity blocks.
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
long offset = blockSize * (stripeIdx * paritySize + i);
|
||||
FSDataInputStream in = parityFs.open(
|
||||
parityFile, conf.getInt("io.file.buffer.size", 64 * 1024));
|
||||
in.seek(offset);
|
||||
LOG.info("Adding " + parityFile + ":" + offset + " as input " + i);
|
||||
inputs[i] = in;
|
||||
}
|
||||
// Now open streams to the data blocks.
|
||||
for (int i = paritySize; i < paritySize + stripeSize; i++) {
|
||||
long offset = blockSize * (stripeIdx * stripeSize + i - paritySize);
|
||||
if (offset == errorOffset) {
|
||||
LOG.info(srcFile + ":" + offset +
|
||||
" is known to have error, adding zeros as input " + i);
|
||||
inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(
|
||||
offset + blockSize));
|
||||
erasedLocations.add(i);
|
||||
} else if (offset > srcStat.getLen()) {
|
||||
LOG.info(srcFile + ":" + offset +
|
||||
" is past file size, adding zeros as input " + i);
|
||||
inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(
|
||||
offset + blockSize));
|
||||
} else {
|
||||
FSDataInputStream in = fs.open(
|
||||
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
|
||||
in.seek(offset);
|
||||
LOG.info("Adding " + srcFile + ":" + offset + " as input " + i);
|
||||
inputs[i] = in;
|
||||
}
|
||||
}
|
||||
if (erasedLocations.size() > paritySize) {
|
||||
String msg = "Too many erased locations: " + erasedLocations.size();
|
||||
LOG.error(msg);
|
||||
throw new IOException(msg);
|
||||
}
|
||||
int[] locs = new int[erasedLocations.size()];
|
||||
for (int i = 0; i < locs.length; i++) {
|
||||
locs[i] = erasedLocations.get(i);
|
||||
}
|
||||
return locs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the inputs provided and write to the output.
|
||||
* @param inputs array of inputs.
|
||||
* @param erasedLocations indexes in the inputs which are known to be erased.
|
||||
* @param erasedLocationToFix index in the inputs which needs to be fixed.
|
||||
* @param skipBytes number of bytes to skip before writing to output.
|
||||
* @param limit maximum number of bytes to be written/skipped.
|
||||
* @param out the output.
|
||||
* @throws IOException
|
||||
*/
|
||||
void writeFixedBlock(
|
||||
FSDataInputStream[] inputs,
|
||||
int[] erasedLocations,
|
||||
int erasedLocationToFix,
|
||||
long skipBytes,
|
||||
long limit,
|
||||
OutputStream out) throws IOException {
|
||||
|
||||
LOG.info("Need to write " + (limit - skipBytes) +
|
||||
" bytes for erased location index " + erasedLocationToFix);
|
||||
int[] tmp = new int[inputs.length];
|
||||
int[] decoded = new int[erasedLocations.length];
|
||||
long toDiscard = skipBytes;
|
||||
// Loop while the number of skipped + written bytes is less than the max.
|
||||
for (long written = 0; skipBytes + written < limit; ) {
|
||||
erasedLocations = readFromInputs(inputs, erasedLocations, limit);
|
||||
if (decoded.length != erasedLocations.length) {
|
||||
decoded = new int[erasedLocations.length];
|
||||
}
|
||||
|
||||
int toWrite = (int)Math.min((long)bufSize, limit - (skipBytes + written));
|
||||
if (toDiscard >= toWrite) {
|
||||
toDiscard -= toWrite;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Decoded bufSize amount of data.
|
||||
for (int i = 0; i < bufSize; i++) {
|
||||
performDecode(readBufs, writeBufs, i, tmp, erasedLocations, decoded);
|
||||
}
|
||||
|
||||
for (int i = 0; i < erasedLocations.length; i++) {
|
||||
if (erasedLocations[i] == erasedLocationToFix) {
|
||||
toWrite -= toDiscard;
|
||||
out.write(writeBufs[i], (int)toDiscard, toWrite);
|
||||
toDiscard = 0;
|
||||
written += toWrite;
|
||||
LOG.debug("Wrote " + toWrite + " bytes for erased location index " +
|
||||
erasedLocationToFix);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int[] readFromInputs(
|
||||
FSDataInputStream[] inputs,
|
||||
int[] erasedLocations,
|
||||
long limit) throws IOException {
|
||||
// For every input, read some data = bufSize.
|
||||
for (int i = 0; i < inputs.length; i++) {
|
||||
long curPos = inputs[i].getPos();
|
||||
try {
|
||||
RaidUtils.readTillEnd(inputs[i], readBufs[i], true);
|
||||
continue;
|
||||
} catch (BlockMissingException e) {
|
||||
LOG.error("Encountered BlockMissingException in stream " + i);
|
||||
} catch (ChecksumException e) {
|
||||
LOG.error("Encountered ChecksumException in stream " + i);
|
||||
}
|
||||
|
||||
// Found a new erased location.
|
||||
if (erasedLocations.length == paritySize) {
|
||||
String msg = "Too many read errors";
|
||||
LOG.error(msg);
|
||||
throw new IOException(msg);
|
||||
}
|
||||
|
||||
// Add this stream to the set of erased locations.
|
||||
int[] newErasedLocations = new int[erasedLocations.length + 1];
|
||||
for (int j = 0; j < erasedLocations.length; j++) {
|
||||
newErasedLocations[j] = erasedLocations[j];
|
||||
}
|
||||
newErasedLocations[newErasedLocations.length - 1] = i;
|
||||
erasedLocations = newErasedLocations;
|
||||
|
||||
LOG.info("Using zeros for stream " + i);
|
||||
inputs[i] = new FSDataInputStream(
|
||||
new RaidUtils.ZeroInputStream(curPos + limit));
|
||||
inputs[i].seek(curPos);
|
||||
RaidUtils.readTillEnd(inputs[i], readBufs[i], true);
|
||||
}
|
||||
return erasedLocations;
|
||||
}
|
||||
|
||||
void performDecode(byte[][] readBufs, byte[][] writeBufs,
|
||||
int idx, int[] inputs,
|
||||
int[] erasedLocations, int[] decoded) {
|
||||
for (int i = 0; i < decoded.length; i++) {
|
||||
decoded[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < inputs.length; i++) {
|
||||
inputs[i] = readBufs[i][idx] & 0x000000FF;
|
||||
}
|
||||
reedSolomonCode.decode(inputs, erasedLocations, decoded);
|
||||
for (int i = 0; i < decoded.length; i++) {
|
||||
writeBufs[i][idx] = (byte)decoded[i];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,96 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
public class ReedSolomonEncoder extends Encoder {
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.ReedSolomonEncoder");
|
||||
private ErasureCode reedSolomonCode;
|
||||
|
||||
public ReedSolomonEncoder(
|
||||
Configuration conf, int stripeSize, int paritySize) {
|
||||
super(conf, stripeSize, paritySize);
|
||||
this.reedSolomonCode = new ReedSolomonCode(stripeSize, paritySize);
|
||||
}
|
||||
|
||||
protected void encodeStripe(
|
||||
InputStream[] blocks,
|
||||
long stripeStartOffset,
|
||||
long blockSize,
|
||||
OutputStream[] outs,
|
||||
Progressable reporter) throws IOException {
|
||||
|
||||
int[] data = new int[stripeSize];
|
||||
int[] code = new int[paritySize];
|
||||
|
||||
for (long encoded = 0; encoded < blockSize; encoded += bufSize) {
|
||||
// Read some data from each block = bufSize.
|
||||
for (int i = 0; i < blocks.length; i++) {
|
||||
RaidUtils.readTillEnd(blocks[i], readBufs[i], true);
|
||||
}
|
||||
|
||||
// Encode the data read.
|
||||
for (int j = 0; j < bufSize; j++) {
|
||||
performEncode(readBufs, writeBufs, j, data, code);
|
||||
}
|
||||
|
||||
// Now that we have some data to write, send it to the temp files.
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
outs[i].write(writeBufs[i], 0, bufSize);
|
||||
}
|
||||
|
||||
if (reporter != null) {
|
||||
reporter.progress();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void performEncode(byte[][] readBufs, byte[][] writeBufs, int idx,
|
||||
int[] data, int[] code) {
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
code[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < stripeSize; i++) {
|
||||
data[i] = readBufs[i][idx] & 0x000000FF;
|
||||
}
|
||||
reedSolomonCode.encode(data, code);
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
writeBufs[i][idx] = (byte)code[i];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getParityTempPath() {
|
||||
return new Path(RaidNode.rsTempPrefix(conf));
|
||||
}
|
||||
|
||||
}
|
|
@ -1,92 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
public class XORDecoder extends Decoder {
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.XORDecoder");
|
||||
|
||||
public XORDecoder(
|
||||
Configuration conf, int stripeSize) {
|
||||
super(conf, stripeSize, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void fixErasedBlock(
|
||||
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
|
||||
long blockSize, long errorOffset, long bytesToSkip, long limit,
|
||||
OutputStream out) throws IOException {
|
||||
LOG.info("Fixing block at " + srcFile + ":" + errorOffset +
|
||||
", skipping " + bytesToSkip + ", limit " + limit);
|
||||
FileStatus srcStat = fs.getFileStatus(srcFile);
|
||||
ArrayList<FSDataInputStream> xorinputs = new ArrayList<FSDataInputStream>();
|
||||
|
||||
FSDataInputStream parityFileIn = parityFs.open(parityFile);
|
||||
parityFileIn.seek(parityOffset(errorOffset, blockSize));
|
||||
xorinputs.add(parityFileIn);
|
||||
|
||||
long errorBlockOffset = (errorOffset / blockSize) * blockSize;
|
||||
long[] srcOffsets = stripeOffsets(errorOffset, blockSize);
|
||||
for (int i = 0; i < srcOffsets.length; i++) {
|
||||
if (srcOffsets[i] == errorBlockOffset) {
|
||||
LOG.info("Skipping block at " + srcFile + ":" + errorBlockOffset);
|
||||
continue;
|
||||
}
|
||||
if (srcOffsets[i] < srcStat.getLen()) {
|
||||
FSDataInputStream in = fs.open(srcFile);
|
||||
in.seek(srcOffsets[i]);
|
||||
xorinputs.add(in);
|
||||
}
|
||||
}
|
||||
FSDataInputStream[] inputs = xorinputs.toArray(
|
||||
new FSDataInputStream[]{null});
|
||||
ParityInputStream recovered =
|
||||
new ParityInputStream(inputs, limit, readBufs[0], writeBufs[0]);
|
||||
recovered.skip(bytesToSkip);
|
||||
recovered.drain(out, null);
|
||||
}
|
||||
|
||||
protected long[] stripeOffsets(long errorOffset, long blockSize) {
|
||||
long[] offsets = new long[stripeSize];
|
||||
long stripeIdx = errorOffset / (blockSize * stripeSize);
|
||||
long startOffsetOfStripe = stripeIdx * stripeSize * blockSize;
|
||||
for (int i = 0; i < stripeSize; i++) {
|
||||
offsets[i] = startOffsetOfStripe + i * blockSize;
|
||||
}
|
||||
return offsets;
|
||||
}
|
||||
|
||||
protected long parityOffset(long errorOffset, long blockSize) {
|
||||
long stripeIdx = errorOffset / (blockSize * stripeSize);
|
||||
return stripeIdx * blockSize;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,63 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
public class XOREncoder extends Encoder {
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.XOREncoder");
|
||||
public XOREncoder(
|
||||
Configuration conf, int stripeSize) {
|
||||
super(conf, stripeSize, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void encodeStripe(
|
||||
InputStream[] blocks,
|
||||
long stripeStartOffset,
|
||||
long blockSize,
|
||||
OutputStream[] outs,
|
||||
Progressable reporter) throws IOException {
|
||||
LOG.info("Peforming XOR ");
|
||||
ParityInputStream parityIn =
|
||||
new ParityInputStream(blocks, blockSize, readBufs[0], writeBufs[0]);
|
||||
try {
|
||||
parityIn.drain(outs[0], reporter);
|
||||
} finally {
|
||||
parityIn.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Path getParityTempPath() {
|
||||
return new Path(RaidNode.unraidTmpDirectory(conf));
|
||||
}
|
||||
}
|
|
@ -1,256 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid.protocol;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.util.Properties;
|
||||
import java.util.Enumeration;
|
||||
import java.lang.Math;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.io.WritableFactories;
|
||||
import org.apache.hadoop.io.WritableFactory;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
/**
|
||||
* Maintains information about one policy
|
||||
*/
|
||||
public class PolicyInfo implements Writable {
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.protocol.PolicyInfo");
|
||||
protected static final SimpleDateFormat dateFormat =
|
||||
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
private Path srcPath; // the specified src path
|
||||
private String policyName; // name of policy
|
||||
private ErasureCodeType codeType;// the erasure code used
|
||||
private String description; // A verbose description of this policy
|
||||
private Configuration conf; // Hadoop configuration
|
||||
|
||||
private Properties properties; // Policy-dependent properties
|
||||
|
||||
private ReentrantReadWriteLock plock; // protects policy operations.
|
||||
public static enum ErasureCodeType {
|
||||
XOR, RS;
|
||||
public static ErasureCodeType fromString(String s) {
|
||||
if (XOR.toString().equalsIgnoreCase(s)) {
|
||||
return XOR;
|
||||
}
|
||||
if (RS.toString().equalsIgnoreCase(s)) {
|
||||
return RS;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the empty object
|
||||
*/
|
||||
public PolicyInfo() {
|
||||
this.conf = null;
|
||||
this.policyName = "";
|
||||
this.description = "";
|
||||
this.srcPath = null;
|
||||
this.properties = new Properties();
|
||||
this.plock = new ReentrantReadWriteLock();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the metadata that describes a policy
|
||||
*/
|
||||
public PolicyInfo(String policyName, Configuration conf) {
|
||||
this.conf = conf;
|
||||
this.policyName = policyName;
|
||||
this.description = "";
|
||||
this.srcPath = null;
|
||||
this.properties = new Properties();
|
||||
this.plock = new ReentrantReadWriteLock();
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy fields from another PolicyInfo
|
||||
*/
|
||||
public void copyFrom(PolicyInfo other) {
|
||||
if (other.conf != null) {
|
||||
this.conf = other.conf;
|
||||
}
|
||||
if (other.policyName != null && other.policyName.length() > 0) {
|
||||
this.policyName = other.policyName;
|
||||
}
|
||||
if (other.description != null && other.description.length() > 0) {
|
||||
this.description = other.description;
|
||||
}
|
||||
if (other.codeType != null) {
|
||||
this.codeType = other.codeType;
|
||||
}
|
||||
if (other.srcPath != null) {
|
||||
this.srcPath = other.srcPath;
|
||||
}
|
||||
for (Object key : other.properties.keySet()) {
|
||||
String skey = (String) key;
|
||||
this.properties.setProperty(skey, other.properties.getProperty(skey));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the input path on which this policy has to be applied
|
||||
*/
|
||||
public void setSrcPath(String in) throws IOException {
|
||||
srcPath = new Path(in);
|
||||
srcPath = srcPath.makeQualified(srcPath.getFileSystem(conf));
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the erasure code type used in this policy
|
||||
*/
|
||||
public void setErasureCode(String code) {
|
||||
this.codeType = ErasureCodeType.fromString(code);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the description of this policy.
|
||||
*/
|
||||
public void setDescription(String des) {
|
||||
this.description = des;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets an internal property.
|
||||
* @param name property name.
|
||||
* @param value property value.
|
||||
*/
|
||||
public void setProperty(String name, String value) {
|
||||
properties.setProperty(name, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value of an internal property.
|
||||
* @param name property name.
|
||||
*/
|
||||
public String getProperty(String name) {
|
||||
return properties.getProperty(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the name of this policy.
|
||||
*/
|
||||
public String getName() {
|
||||
return this.policyName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the destination path of this policy.
|
||||
*/
|
||||
public ErasureCodeType getErasureCode() {
|
||||
return this.codeType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the srcPath
|
||||
*/
|
||||
public Path getSrcPath() {
|
||||
return srcPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the expanded (unglobbed) forms of the srcPaths
|
||||
*/
|
||||
public Path[] getSrcPathExpanded() throws IOException {
|
||||
FileSystem fs = srcPath.getFileSystem(conf);
|
||||
|
||||
// globbing on srcPath
|
||||
FileStatus[] gpaths = fs.globStatus(srcPath);
|
||||
if (gpaths == null) {
|
||||
return null;
|
||||
}
|
||||
Path[] values = new Path[gpaths.length];
|
||||
for (int i = 0; i < gpaths.length; i++) {
|
||||
Path p = gpaths[i].getPath();
|
||||
values[i] = p.makeQualified(fs);
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert this policy into a printable form
|
||||
*/
|
||||
public String toString() {
|
||||
StringBuffer buff = new StringBuffer();
|
||||
buff.append("Policy Name:\t" + policyName + " --------------------\n");
|
||||
buff.append("Source Path:\t" + srcPath + "\n");
|
||||
buff.append("Erasure Code:\t" + codeType + "\n");
|
||||
for (Enumeration<?> e = properties.propertyNames(); e.hasMoreElements();) {
|
||||
String name = (String) e.nextElement();
|
||||
buff.append( name + ":\t" + properties.getProperty(name) + "\n");
|
||||
}
|
||||
if (description.length() > 0) {
|
||||
int len = Math.min(description.length(), 80);
|
||||
String sub = description.substring(0, len).trim();
|
||||
sub = sub.replaceAll("\n", " ");
|
||||
buff.append("Description:\t" + sub + "...\n");
|
||||
}
|
||||
return buff.toString();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Writable
|
||||
//////////////////////////////////////////////////
|
||||
static { // register a ctor
|
||||
WritableFactories.setFactory
|
||||
(PolicyInfo.class,
|
||||
new WritableFactory() {
|
||||
public Writable newInstance() { return new PolicyInfo(); }
|
||||
});
|
||||
}
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
Text.writeString(out, srcPath.toString());
|
||||
Text.writeString(out, policyName);
|
||||
Text.writeString(out, codeType.toString());
|
||||
Text.writeString(out, description);
|
||||
out.writeInt(properties.size());
|
||||
for (Enumeration<?> e = properties.propertyNames(); e.hasMoreElements();) {
|
||||
String name = (String) e.nextElement();
|
||||
Text.writeString(out, name);
|
||||
Text.writeString(out, properties.getProperty(name));
|
||||
}
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.srcPath = new Path(Text.readString(in));
|
||||
this.policyName = Text.readString(in);
|
||||
this.codeType = ErasureCodeType.fromString(Text.readString(in));
|
||||
this.description = Text.readString(in);
|
||||
for (int n = in.readInt(); n>0; n--) {
|
||||
String name = Text.readString(in);
|
||||
String value = Text.readString(in);
|
||||
properties.setProperty(name,value);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid.protocol;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.io.WritableFactories;
|
||||
import org.apache.hadoop.io.WritableFactory;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
||||
/**
|
||||
* Maintains informatiom about all policies that belong to a category.
|
||||
* These policies have to be applied one-at-a-time and cannot be run
|
||||
* simultaneously.
|
||||
*/
|
||||
public class PolicyList implements Writable {
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.protocol.PolicyList");
|
||||
|
||||
private List<PolicyInfo> category; // list of policies
|
||||
private Path srcPath;
|
||||
|
||||
/**
|
||||
* Create a new category of policies.
|
||||
*/
|
||||
public PolicyList() {
|
||||
this.category = new LinkedList<PolicyInfo>();
|
||||
this.srcPath = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new policy to this category.
|
||||
*/
|
||||
public void add(PolicyInfo info) {
|
||||
category.add(info);
|
||||
}
|
||||
|
||||
public void setSrcPath(Configuration conf, String src) throws IOException {
|
||||
srcPath = new Path(src);
|
||||
srcPath = srcPath.makeQualified(srcPath.getFileSystem(conf));
|
||||
}
|
||||
|
||||
public Path getSrcPath() {
|
||||
return srcPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the policies in this category
|
||||
*/
|
||||
public Collection<PolicyInfo> getAll() {
|
||||
return category;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// Writable
|
||||
//////////////////////////////////////////////////
|
||||
static { // register a ctor
|
||||
WritableFactories.setFactory
|
||||
(PolicyList.class,
|
||||
new WritableFactory() {
|
||||
public Writable newInstance() { return new PolicyList(); }
|
||||
});
|
||||
}
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeInt(category.size());
|
||||
for (PolicyInfo p : category) {
|
||||
p.write(out);
|
||||
}
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
int count = in.readInt();
|
||||
for (int i = 0; i < count; i++) {
|
||||
PolicyInfo p = new PolicyInfo();
|
||||
p.readFields(in);
|
||||
add(p);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid.protocol;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.ipc.VersionedProtocol;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**********************************************************************
|
||||
* RaidProtocol is used by user code
|
||||
* {@link org.apache.hadoop.raid.RaidShell} class to communicate
|
||||
* with the RaidNode. User code can manipulate the configured policies.
|
||||
*
|
||||
**********************************************************************/
|
||||
public interface RaidProtocol extends VersionedProtocol {
|
||||
|
||||
/**
|
||||
* Compared to the previous version the following changes have been introduced:
|
||||
* Only the latest change is reflected.
|
||||
* 1: new protocol introduced
|
||||
*/
|
||||
public static final long versionID = 1L;
|
||||
|
||||
/**
|
||||
* Get a listing of all configured policies
|
||||
* @throws IOException
|
||||
* return all categories of configured policies
|
||||
*/
|
||||
public PolicyList[] getAllPolicies() throws IOException;
|
||||
|
||||
/**
|
||||
* Unraid the specified input path. This is called when the specified file
|
||||
* is corrupted. This call will move the specified file to file.old
|
||||
* and then recover it from the RAID subsystem.
|
||||
*
|
||||
* @param inputPath The absolute pathname of the file to be recovered.
|
||||
* @param corruptOffset The offset that has the corruption
|
||||
*/
|
||||
public String recoverFile(String inputPath, long corruptOffset) throws IOException;
|
||||
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Start hadoop RaidNode process on machine specified on file conf/raidnode
|
||||
|
||||
usage="Usage: start-raidnode-remote.sh"
|
||||
|
||||
params=$#
|
||||
bin=`dirname "$0"`
|
||||
bin=`cd "$bin"; pwd`
|
||||
|
||||
DEFAULT_LIBEXEC_DIR="$bin"
|
||||
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
||||
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
||||
|
||||
# get arguments
|
||||
if [ $# -ge 1 ]; then
|
||||
echo $usage
|
||||
exit
|
||||
fi
|
||||
|
||||
if [ -f "${HADOOP_CONF_DIR}/raidnode" ]; then
|
||||
export HADOOP_SLAVES="${HADOOP_CONF_DIR}/raidnode"
|
||||
echo "Starting raidnode at "`cat ${HADOOP_SLAVES}`
|
||||
"$bin"/slaves.sh --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/start-raidnode.sh"
|
||||
else
|
||||
echo "No raidnode file in ${HADOOP_CONF_DIR}/raidnode"
|
||||
fi
|
||||
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Start hadoop RaidNode process
|
||||
# Run this on RaidNode machine
|
||||
|
||||
usage="Usage: start-raidnode.sh"
|
||||
|
||||
params=$#
|
||||
bin=`dirname "$0"`
|
||||
bin=`cd "$bin"; pwd`
|
||||
|
||||
DEFAULT_LIBEXEC_DIR="$bin"
|
||||
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
||||
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
||||
|
||||
# get arguments
|
||||
if [ $# -ge 1 ]; then
|
||||
echo $usage
|
||||
fi
|
||||
|
||||
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
|
||||
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
|
||||
fi
|
||||
export HADOOP_OPTS="$HADOOP_OPTS $HADOOP_RAIDNODE_OPTS"
|
||||
|
||||
"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start org.apache.hadoop.raid.RaidNode
|
|
@ -1,42 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Stop hadoop RaidNode process on machine specified on file conf/raidnode
|
||||
|
||||
usage="Usage: stop-raidnode-remote.sh"
|
||||
|
||||
params=$#
|
||||
bin=`dirname "$0"`
|
||||
bin=`cd "$bin"; pwd`
|
||||
|
||||
DEFAULT_LIBEXEC_DIR="$bin"
|
||||
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
||||
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
||||
|
||||
# get arguments
|
||||
if [ $# -ge 1 ]; then
|
||||
echo $usage
|
||||
fi
|
||||
|
||||
if [ -f "${HADOOP_CONF_DIR}/raidnode" ]; then
|
||||
export HADOOP_SLAVES="${HADOOP_CONF_DIR}/raidnode"
|
||||
echo "Stopping raidnode at "`cat ${HADOOP_SLAVES}`
|
||||
"$bin"/slaves.sh --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/stop-raidnode.sh"
|
||||
else
|
||||
echo "No raidnode file in ${HADOOP_CONF_DIR}/raidnode"
|
||||
fi
|
|
@ -1,39 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Stop hadoop RaidNode process
|
||||
# Run this on RaidNode machine.
|
||||
|
||||
usage="Usage: stop-raidnode.sh"
|
||||
|
||||
params=$#
|
||||
bin=`dirname "$0"`
|
||||
bin=`cd "$bin"; pwd`
|
||||
|
||||
DEFAULT_LIBEXEC_DIR="$bin"
|
||||
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
||||
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
||||
|
||||
# get arguments
|
||||
if [ $# -ge 1 ]; then
|
||||
echo $usage
|
||||
fi
|
||||
|
||||
export HADOOP_OPTS="$HADOOP_OPTS $HADOOP_RAIDNODE_OPTS"
|
||||
|
||||
"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop org.apache.hadoop.raid.RaidNode
|
|
@ -1,501 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.net.URI;
|
||||
import java.util.Random;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.zip.CRC32;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.raid.RaidNode;
|
||||
import org.apache.hadoop.raid.RaidUtils;
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestRaidDfs {
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
||||
"target/test-data")).getAbsolutePath();
|
||||
final static String LOG_DIR = "target/raidlog";
|
||||
final static long RELOAD_INTERVAL = 1000;
|
||||
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidDfs");
|
||||
final static int NUM_DATANODES = 3;
|
||||
|
||||
Configuration conf;
|
||||
String namenode = null;
|
||||
String hftp = null;
|
||||
MiniDFSCluster dfs = null;
|
||||
FileSystem fileSys = null;
|
||||
String jobTrackerName = null;
|
||||
ErasureCodeType code;
|
||||
int stripeLength;
|
||||
|
||||
private void mySetup(
|
||||
String erasureCode, int rsParityLength) throws Exception {
|
||||
|
||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
||||
conf = new Configuration();
|
||||
|
||||
conf.set("fs.raid.recoverylogdir", LOG_DIR);
|
||||
conf.setInt(RaidNode.RS_PARITY_LENGTH_KEY, rsParityLength);
|
||||
|
||||
// scan all policies once every 5 second
|
||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
||||
|
||||
// make all deletions not go through Trash
|
||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
||||
|
||||
// do not use map-reduce cluster for Raiding
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
||||
|
||||
conf.set("raid.server.address", "localhost:0");
|
||||
conf.setInt("hdfs.raid.stripeLength", stripeLength);
|
||||
conf.set("xor".equals(erasureCode) ? RaidNode.RAID_LOCATION_KEY :
|
||||
RaidNode.RAIDRS_LOCATION_KEY, "/destraid");
|
||||
|
||||
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
|
||||
dfs.waitActive();
|
||||
fileSys = dfs.getFileSystem();
|
||||
namenode = fileSys.getUri().toString();
|
||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
||||
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
}
|
||||
|
||||
private void myTearDown() throws Exception {
|
||||
if (dfs != null) { dfs.shutdown(); }
|
||||
}
|
||||
|
||||
private LocatedBlocks getBlockLocations(Path file, long length)
|
||||
throws IOException {
|
||||
DistributedFileSystem dfs = (DistributedFileSystem) fileSys;
|
||||
return RaidDFSUtil.getBlockLocations(
|
||||
dfs, file.toUri().getPath(), 0, length);
|
||||
}
|
||||
|
||||
private LocatedBlocks getBlockLocations(Path file)
|
||||
throws IOException {
|
||||
FileStatus stat = fileSys.getFileStatus(file);
|
||||
return getBlockLocations(file, stat.getLen());
|
||||
}
|
||||
|
||||
private DistributedRaidFileSystem getRaidFS() throws IOException {
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
||||
Configuration clientConf = new Configuration(conf);
|
||||
clientConf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedRaidFileSystem");
|
||||
clientConf.set("fs.raid.underlyingfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
|
||||
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
|
||||
URI dfsUri = dfs.getUri();
|
||||
return (DistributedRaidFileSystem)FileSystem.get(dfsUri, clientConf);
|
||||
}
|
||||
|
||||
public static void waitForFileRaided(
|
||||
Log logger, FileSystem fileSys, Path file, Path destPath)
|
||||
throws IOException, InterruptedException {
|
||||
FileStatus parityStat = null;
|
||||
String fileName = file.getName().toString();
|
||||
// wait till file is raided
|
||||
while (parityStat == null) {
|
||||
logger.info("Waiting for files to be raided.");
|
||||
try {
|
||||
FileStatus[] listPaths = fileSys.listStatus(destPath);
|
||||
if (listPaths != null) {
|
||||
for (FileStatus f : listPaths) {
|
||||
logger.info("File raided so far : " + f.getPath());
|
||||
String found = f.getPath().getName().toString();
|
||||
if (fileName.equals(found)) {
|
||||
parityStat = f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
//ignore
|
||||
}
|
||||
Thread.sleep(1000); // keep waiting
|
||||
}
|
||||
|
||||
while (true) {
|
||||
LocatedBlocks locations = null;
|
||||
DistributedFileSystem dfs = (DistributedFileSystem) fileSys;
|
||||
locations = RaidDFSUtil.getBlockLocations(
|
||||
dfs, file.toUri().getPath(), 0, parityStat.getLen());
|
||||
if (!locations.isUnderConstruction()) {
|
||||
break;
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
FileStatus stat = fileSys.getFileStatus(file);
|
||||
if (stat.getReplication() == 1) break;
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
}
|
||||
|
||||
private void corruptBlockAndValidate(Path srcFile, Path destPath,
|
||||
int[] listBlockNumToCorrupt, long blockSize, int numBlocks)
|
||||
throws IOException, InterruptedException {
|
||||
int repl = 1;
|
||||
long crc = createTestFilePartialLastBlock(fileSys, srcFile, repl,
|
||||
numBlocks, blockSize);
|
||||
long length = fileSys.getFileStatus(srcFile).getLen();
|
||||
|
||||
RaidNode.doRaid(conf, fileSys.getFileStatus(srcFile),
|
||||
destPath, code, new RaidNode.Statistics(), new RaidUtils.DummyProgressable(),
|
||||
false, repl, repl, stripeLength);
|
||||
|
||||
// Delete first block of file
|
||||
for (int blockNumToCorrupt : listBlockNumToCorrupt) {
|
||||
LOG.info("Corrupt block " + blockNumToCorrupt + " of file " + srcFile);
|
||||
LocatedBlocks locations = getBlockLocations(srcFile);
|
||||
corruptBlock(dfs, srcFile, locations.get(blockNumToCorrupt).getBlock(),
|
||||
NUM_DATANODES, true);
|
||||
}
|
||||
|
||||
// Validate
|
||||
DistributedRaidFileSystem raidfs = getRaidFS();
|
||||
assertTrue(validateFile(raidfs, srcFile, length, crc));
|
||||
validateLogFile(getRaidFS(), new Path(LOG_DIR));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a file, corrupt several blocks in it and ensure that the file can be
|
||||
* read through DistributedRaidFileSystem by ReedSolomon coding.
|
||||
*/
|
||||
@Test
|
||||
public void testRaidDfsRs() throws Exception {
|
||||
LOG.info("Test testRaidDfs started.");
|
||||
|
||||
code = ErasureCodeType.RS;
|
||||
long blockSize = 8192L;
|
||||
int numBlocks = 8;
|
||||
stripeLength = 3;
|
||||
mySetup("rs", 3);
|
||||
|
||||
int[][] corrupt = {{1, 2, 3}, {1, 4, 7}, {3, 6, 7}};
|
||||
try {
|
||||
for (int i = 0; i < corrupt.length; i++) {
|
||||
Path file = new Path("/user/dhruba/raidtest/file" + i);
|
||||
corruptBlockAndValidate(
|
||||
file, new Path("/destraid"), corrupt[i], blockSize, numBlocks);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.info("testRaidDfs Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
LOG.info("Test testRaidDfs completed.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test DistributedRaidFileSystem.readFully()
|
||||
*/
|
||||
@Test
|
||||
public void testReadFully() throws Exception {
|
||||
code = ErasureCodeType.XOR;
|
||||
stripeLength = 3;
|
||||
mySetup("xor", 1);
|
||||
|
||||
try {
|
||||
Path file = new Path("/user/raid/raidtest/file1");
|
||||
long crc = createTestFile(fileSys, file, 1, 8, 8192L);
|
||||
FileStatus stat = fileSys.getFileStatus(file);
|
||||
LOG.info("Created " + file + ", crc=" + crc + ", len=" + stat.getLen());
|
||||
|
||||
byte[] filebytes = new byte[(int)stat.getLen()];
|
||||
// Test that readFully returns the correct CRC when there are no errors.
|
||||
DistributedRaidFileSystem raidfs = getRaidFS();
|
||||
FSDataInputStream stm = raidfs.open(file);
|
||||
stm.readFully(0, filebytes);
|
||||
assertEquals(crc, bufferCRC(filebytes));
|
||||
stm.close();
|
||||
|
||||
// Generate parity.
|
||||
RaidNode.doRaid(conf, fileSys.getFileStatus(file),
|
||||
new Path("/destraid"), code, new RaidNode.Statistics(),
|
||||
new RaidUtils.DummyProgressable(),
|
||||
false, 1, 1, stripeLength);
|
||||
int[] corrupt = {0, 4, 7}; // first, last and middle block
|
||||
for (int blockIdx : corrupt) {
|
||||
LOG.info("Corrupt block " + blockIdx + " of file " + file);
|
||||
LocatedBlocks locations = getBlockLocations(file);
|
||||
corruptBlock(dfs, file, locations.get(blockIdx).getBlock(),
|
||||
NUM_DATANODES, true);
|
||||
}
|
||||
// Test that readFully returns the correct CRC when there are errors.
|
||||
stm = raidfs.open(file);
|
||||
stm.readFully(0, filebytes);
|
||||
assertEquals(crc, bufferCRC(filebytes));
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that access time and mtime of a source file do not change after
|
||||
* raiding.
|
||||
*/
|
||||
@Test
|
||||
public void testAccessTime() throws Exception {
|
||||
LOG.info("Test testAccessTime started.");
|
||||
|
||||
code = ErasureCodeType.XOR;
|
||||
long blockSize = 8192L;
|
||||
int numBlocks = 8;
|
||||
int repl = 1;
|
||||
stripeLength = 3;
|
||||
mySetup("xor", 1);
|
||||
|
||||
Path file = new Path("/user/dhruba/raidtest/file");
|
||||
createTestFilePartialLastBlock(fileSys, file, repl, numBlocks, blockSize);
|
||||
FileStatus stat = fileSys.getFileStatus(file);
|
||||
|
||||
try {
|
||||
RaidNode.doRaid(conf, fileSys.getFileStatus(file),
|
||||
new Path("/destraid"), code, new RaidNode.Statistics(),
|
||||
new RaidUtils.DummyProgressable(), false, repl, repl, stripeLength);
|
||||
|
||||
FileStatus newStat = fileSys.getFileStatus(file);
|
||||
|
||||
assertEquals(stat.getModificationTime(), newStat.getModificationTime());
|
||||
assertEquals(stat.getAccessTime(), newStat.getAccessTime());
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a file, corrupt a block in it and ensure that the file can be
|
||||
* read through DistributedRaidFileSystem by XOR code.
|
||||
*/
|
||||
@Test
|
||||
public void testRaidDfsXor() throws Exception {
|
||||
LOG.info("Test testRaidDfs started.");
|
||||
|
||||
code = ErasureCodeType.XOR;
|
||||
long blockSize = 8192L;
|
||||
int numBlocks = 8;
|
||||
stripeLength = 3;
|
||||
mySetup("xor", 1);
|
||||
|
||||
int[][] corrupt = {{0}, {4}, {7}}; // first, last and middle block
|
||||
try {
|
||||
for (int i = 0; i < corrupt.length; i++) {
|
||||
Path file = new Path("/user/dhruba/raidtest/" + i);
|
||||
corruptBlockAndValidate(
|
||||
file, new Path("/destraid"), corrupt[i], blockSize, numBlocks);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.info("testRaidDfs Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
LOG.info("Test testRaidDfs completed.");
|
||||
}
|
||||
|
||||
//
|
||||
// creates a file and populate it with random data. Returns its crc.
|
||||
//
|
||||
public static long createTestFile(FileSystem fileSys, Path name, int repl,
|
||||
int numBlocks, long blocksize)
|
||||
throws IOException {
|
||||
CRC32 crc = new CRC32();
|
||||
Random rand = new Random();
|
||||
FSDataOutputStream stm = fileSys.create(name, true,
|
||||
fileSys.getConf().getInt("io.file.buffer.size", 4096),
|
||||
(short)repl, blocksize);
|
||||
// fill random data into file
|
||||
final byte[] b = new byte[(int)blocksize];
|
||||
for (int i = 0; i < numBlocks; i++) {
|
||||
rand.nextBytes(b);
|
||||
stm.write(b);
|
||||
crc.update(b);
|
||||
}
|
||||
stm.close();
|
||||
return crc.getValue();
|
||||
}
|
||||
|
||||
//
|
||||
// Creates a file with partially full last block. Populate it with random
|
||||
// data. Returns its crc.
|
||||
//
|
||||
public static long createTestFilePartialLastBlock(
|
||||
FileSystem fileSys, Path name, int repl, int numBlocks, long blocksize)
|
||||
throws IOException {
|
||||
CRC32 crc = new CRC32();
|
||||
Random rand = new Random();
|
||||
FSDataOutputStream stm = fileSys.create(name, true,
|
||||
fileSys.getConf().getInt("io.file.buffer.size", 4096),
|
||||
(short)repl, blocksize);
|
||||
// Write whole blocks.
|
||||
byte[] b = new byte[(int)blocksize];
|
||||
for (int i = 1; i < numBlocks; i++) {
|
||||
rand.nextBytes(b);
|
||||
stm.write(b);
|
||||
crc.update(b);
|
||||
}
|
||||
// Write partial block.
|
||||
b = new byte[(int)blocksize/2 - 1];
|
||||
rand.nextBytes(b);
|
||||
stm.write(b);
|
||||
crc.update(b);
|
||||
|
||||
stm.close();
|
||||
return crc.getValue();
|
||||
}
|
||||
|
||||
static long bufferCRC(byte[] buf) {
|
||||
CRC32 crc = new CRC32();
|
||||
crc.update(buf, 0, buf.length);
|
||||
return crc.getValue();
|
||||
}
|
||||
|
||||
//
|
||||
// validates that file matches the crc.
|
||||
//
|
||||
public static boolean validateFile(FileSystem fileSys, Path name, long length,
|
||||
long crc)
|
||||
throws IOException {
|
||||
|
||||
long numRead = 0;
|
||||
CRC32 newcrc = new CRC32();
|
||||
FSDataInputStream stm = fileSys.open(name);
|
||||
final byte[] b = new byte[4192];
|
||||
int num = 0;
|
||||
while (num >= 0) {
|
||||
num = stm.read(b);
|
||||
if (num < 0) {
|
||||
break;
|
||||
}
|
||||
numRead += num;
|
||||
newcrc.update(b, 0, num);
|
||||
}
|
||||
stm.close();
|
||||
|
||||
if (numRead != length) {
|
||||
LOG.info("Number of bytes read " + numRead +
|
||||
" does not match file size " + length);
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG.info(" Newcrc " + newcrc.getValue() + " old crc " + crc);
|
||||
if (newcrc.getValue() != crc) {
|
||||
LOG.info("CRC mismatch of file " + name + ": " + newcrc + " vs. " + crc);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
//
|
||||
// validates the contents of raid recovery log file
|
||||
//
|
||||
public static void validateLogFile(FileSystem fileSys, Path logDir)
|
||||
throws IOException {
|
||||
FileStatus f = fileSys.listStatus(logDir)[0];
|
||||
FSDataInputStream stm = fileSys.open(f.getPath());
|
||||
try {
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(stm));
|
||||
assertEquals("Recovery attempt log", reader.readLine());
|
||||
assertTrue(Pattern.matches("Source path : /user/dhruba/raidtest/.*",
|
||||
reader.readLine()));
|
||||
assertTrue(Pattern.matches("Alternate path : .*/destraid",
|
||||
reader.readLine()));
|
||||
assertEquals("Stripe lentgh : 3", reader.readLine());
|
||||
assertTrue(Pattern.matches("Corrupt offset : \\d*", reader.readLine()));
|
||||
assertTrue(Pattern.matches("Output from unRaid : " +
|
||||
"hdfs://.*/tmp/raid/user/dhruba/raidtest/.*recovered",
|
||||
reader.readLine()));
|
||||
} finally {
|
||||
stm.close();
|
||||
}
|
||||
LOG.info("Raid HDFS Recovery log verified");
|
||||
}
|
||||
|
||||
//
|
||||
// Delete/Corrupt specified block of file
|
||||
//
|
||||
public static void corruptBlock(MiniDFSCluster dfs, Path file, ExtendedBlock blockNum,
|
||||
int numDataNodes, boolean delete) throws IOException {
|
||||
// Now deliberately remove/truncate replicas of blocks
|
||||
int numDeleted = 0;
|
||||
int numCorrupted = 0;
|
||||
for (int i = 0; i < numDataNodes; i++) {
|
||||
File block = MiniDFSCluster.getBlockFile(i, blockNum);
|
||||
if (block == null || !block.exists()) {
|
||||
continue;
|
||||
}
|
||||
if (delete) {
|
||||
block.delete();
|
||||
LOG.info("Deleted block " + block);
|
||||
numDeleted++;
|
||||
} else {
|
||||
// Corrupt
|
||||
long seekPos = block.length()/2;
|
||||
RandomAccessFile raf = new RandomAccessFile(block, "rw");
|
||||
raf.seek(seekPos);
|
||||
int data = raf.readInt();
|
||||
raf.seek(seekPos);
|
||||
raf.writeInt(data+1);
|
||||
LOG.info("Corrupted block " + block);
|
||||
numCorrupted++;
|
||||
}
|
||||
}
|
||||
assertTrue("Nothing corrupted or deleted",
|
||||
(numCorrupted + numDeleted) > 0);
|
||||
}
|
||||
|
||||
public static void corruptBlock(Path file, ExtendedBlock blockNum,
|
||||
int numDataNodes, long offset) throws IOException {
|
||||
// Now deliberately corrupt replicas of the the block.
|
||||
for (int i = 0; i < numDataNodes; i++) {
|
||||
File block = MiniDFSCluster.getBlockFile(i, blockNum);
|
||||
if (block == null || !block.exists()) {
|
||||
continue;
|
||||
}
|
||||
RandomAccessFile raf = new RandomAccessFile(block, "rw");
|
||||
raf.seek(offset);
|
||||
int data = raf.readInt();
|
||||
raf.seek(offset);
|
||||
raf.writeInt(data+1);
|
||||
LOG.info("Corrupted block " + block);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,518 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.server.blockmanagement;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.BlockLocation;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRaid.CachedFullPathNames;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRaid.CachedLocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRaid.FileType;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeRaidTestUtil;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeRaidUtil;
|
||||
import org.apache.hadoop.net.NetworkTopology;
|
||||
import org.apache.hadoop.raid.RaidNode;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestBlockPlacementPolicyRaid {
|
||||
private Configuration conf = null;
|
||||
private MiniDFSCluster cluster = null;
|
||||
private FSNamesystem namesystem = null;
|
||||
private BlockManager blockManager;
|
||||
private NetworkTopology networktopology;
|
||||
private BlockPlacementPolicyRaid policy = null;
|
||||
private FileSystem fs = null;
|
||||
String[] rack1 = {"/rack1"};
|
||||
String[] rack2 = {"/rack2"};
|
||||
String[] host1 = {"host1.rack1.com"};
|
||||
String[] host2 = {"host2.rack2.com"};
|
||||
String xorPrefix = null;
|
||||
String raidTempPrefix = null;
|
||||
String raidrsTempPrefix = null;
|
||||
String raidrsHarTempPrefix = null;
|
||||
|
||||
final static Log LOG =
|
||||
LogFactory.getLog(TestBlockPlacementPolicyRaid.class);
|
||||
|
||||
protected void setupCluster() throws IOException {
|
||||
conf = new Configuration();
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
|
||||
conf.set("dfs.replication.pending.timeout.sec", "2");
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1L);
|
||||
conf.set("dfs.block.replicator.classname",
|
||||
BlockPlacementPolicyRaid.class.getName());
|
||||
conf.set(RaidNode.STRIPE_LENGTH_KEY, "2");
|
||||
conf.set(RaidNode.RS_PARITY_LENGTH_KEY, "3");
|
||||
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 1);
|
||||
// start the cluster with one datanode first
|
||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).
|
||||
format(true).racks(rack1).hosts(host1).build();
|
||||
cluster.waitActive();
|
||||
namesystem = cluster.getNameNode().getNamesystem();
|
||||
blockManager = namesystem.getBlockManager();
|
||||
networktopology = blockManager.getDatanodeManager().getNetworkTopology();
|
||||
|
||||
Assert.assertTrue("BlockPlacementPolicy type is not correct.",
|
||||
blockManager.getBlockPlacementPolicy() instanceof BlockPlacementPolicyRaid);
|
||||
policy = (BlockPlacementPolicyRaid)blockManager.getBlockPlacementPolicy();
|
||||
fs = cluster.getFileSystem();
|
||||
xorPrefix = RaidNode.xorDestinationPath(conf).toUri().getPath();
|
||||
raidTempPrefix = RaidNode.xorTempPrefix(conf);
|
||||
raidrsTempPrefix = RaidNode.rsTempPrefix(conf);
|
||||
raidrsHarTempPrefix = RaidNode.rsHarTempPrefix(conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that the parity files will be placed at the good locations when we
|
||||
* create them.
|
||||
*/
|
||||
@Test
|
||||
public void testChooseTargetForRaidFile() throws IOException {
|
||||
setupCluster();
|
||||
try {
|
||||
String src = "/dir/file";
|
||||
String parity = raidrsTempPrefix + src;
|
||||
DFSTestUtil.createFile(fs, new Path(src), 4, (short)1, 0L);
|
||||
DFSTestUtil.waitReplication(fs, new Path(src), (short)1);
|
||||
refreshPolicy();
|
||||
setBlockPlacementPolicy(namesystem, policy);
|
||||
// start 3 more datanodes
|
||||
String[] racks = {"/rack2", "/rack2", "/rack2",
|
||||
"/rack2", "/rack2", "/rack2"};
|
||||
String[] hosts =
|
||||
{"host2.rack2.com", "host3.rack2.com", "host4.rack2.com",
|
||||
"host5.rack2.com", "host6.rack2.com", "host7.rack2.com"};
|
||||
cluster.startDataNodes(conf, 6, true, null, racks, hosts, null);
|
||||
int numBlocks = 6;
|
||||
DFSTestUtil.createFile(fs, new Path(parity), numBlocks, (short)2, 0L);
|
||||
DFSTestUtil.waitReplication(fs, new Path(parity), (short)2);
|
||||
FileStatus srcStat = fs.getFileStatus(new Path(src));
|
||||
BlockLocation[] srcLoc =
|
||||
fs.getFileBlockLocations(srcStat, 0, srcStat.getLen());
|
||||
FileStatus parityStat = fs.getFileStatus(new Path(parity));
|
||||
BlockLocation[] parityLoc =
|
||||
fs.getFileBlockLocations(parityStat, 0, parityStat.getLen());
|
||||
int parityLen = RaidNode.rsParityLength(conf);
|
||||
for (int i = 0; i < numBlocks / parityLen; i++) {
|
||||
Set<String> locations = new HashSet<String>();
|
||||
for (int j = 0; j < srcLoc.length; j++) {
|
||||
String [] names = srcLoc[j].getNames();
|
||||
for (int k = 0; k < names.length; k++) {
|
||||
LOG.info("Source block location: " + names[k]);
|
||||
locations.add(names[k]);
|
||||
}
|
||||
}
|
||||
for (int j = 0 ; j < parityLen; j++) {
|
||||
String[] names = parityLoc[j + i * parityLen].getNames();
|
||||
for (int k = 0; k < names.length; k++) {
|
||||
LOG.info("Parity block location: " + names[k]);
|
||||
Assert.assertTrue(locations.add(names[k]));
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that the har parity files will be placed at the good locations when we
|
||||
* create them.
|
||||
*/
|
||||
@Test
|
||||
public void testChooseTargetForHarRaidFile() throws IOException {
|
||||
setupCluster();
|
||||
try {
|
||||
String[] racks = {"/rack2", "/rack2", "/rack2",
|
||||
"/rack2", "/rack2", "/rack2"};
|
||||
String[] hosts =
|
||||
{"host2.rack2.com", "host3.rack2.com", "host4.rack2.com",
|
||||
"host5.rack2.com", "host6.rack2.com", "host7.rack2.com"};
|
||||
cluster.startDataNodes(conf, 6, true, null, racks, hosts, null);
|
||||
String harParity = raidrsHarTempPrefix + "/dir/file";
|
||||
int numBlocks = 11;
|
||||
DFSTestUtil.createFile(fs, new Path(harParity), numBlocks, (short)1, 0L);
|
||||
DFSTestUtil.waitReplication(fs, new Path(harParity), (short)1);
|
||||
FileStatus stat = fs.getFileStatus(new Path(harParity));
|
||||
BlockLocation[] loc = fs.getFileBlockLocations(stat, 0, stat.getLen());
|
||||
int rsParityLength = RaidNode.rsParityLength(conf);
|
||||
for (int i = 0; i < numBlocks - rsParityLength; i++) {
|
||||
Set<String> locations = new HashSet<String>();
|
||||
for (int j = 0; j < rsParityLength; j++) {
|
||||
for (int k = 0; k < loc[i + j].getNames().length; k++) {
|
||||
// verify that every adjacent 4 blocks are on differnt nodes
|
||||
String name = loc[i + j].getNames()[k];
|
||||
LOG.info("Har Raid block location: " + name);
|
||||
Assert.assertTrue(locations.add(name));
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test BlockPlacementPolicyRaid.CachedLocatedBlocks
|
||||
* Verify that the results obtained from cache is the same as
|
||||
* the results obtained directly
|
||||
*/
|
||||
@Test
|
||||
public void testCachedBlocks() throws IOException {
|
||||
setupCluster();
|
||||
try {
|
||||
String file1 = "/dir/file1";
|
||||
String file2 = "/dir/file2";
|
||||
DFSTestUtil.createFile(fs, new Path(file1), 3, (short)1, 0L);
|
||||
DFSTestUtil.createFile(fs, new Path(file2), 4, (short)1, 0L);
|
||||
// test blocks cache
|
||||
CachedLocatedBlocks cachedBlocks = new CachedLocatedBlocks(namesystem);
|
||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file1);
|
||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file1);
|
||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file2);
|
||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file2);
|
||||
try {
|
||||
Thread.sleep(1200L);
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file2);
|
||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file1);
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test BlockPlacementPolicyRaid.CachedFullPathNames
|
||||
* Verify that the results obtained from cache is the same as
|
||||
* the results obtained directly
|
||||
*/
|
||||
@Test
|
||||
public void testCachedPathNames() throws IOException {
|
||||
setupCluster();
|
||||
try {
|
||||
String file1 = "/dir/file1";
|
||||
String file2 = "/dir/file2";
|
||||
DFSTestUtil.createFile(fs, new Path(file1), 3, (short)1, 0L);
|
||||
DFSTestUtil.createFile(fs, new Path(file2), 4, (short)1, 0L);
|
||||
// test full path cache
|
||||
CachedFullPathNames cachedFullPathNames =
|
||||
new CachedFullPathNames(namesystem);
|
||||
final BlockCollection[] bcs = NameNodeRaidTestUtil.getBlockCollections(
|
||||
namesystem, file1, file2);
|
||||
|
||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[0]);
|
||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[0]);
|
||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[1]);
|
||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[1]);
|
||||
try {
|
||||
Thread.sleep(1200L);
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[1]);
|
||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[0]);
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Test the result of getCompanionBlocks() on the unraided files
|
||||
*/
|
||||
@Test
|
||||
public void testGetCompanionBLocks() throws IOException {
|
||||
setupCluster();
|
||||
try {
|
||||
String file1 = "/dir/file1";
|
||||
String file2 = "/raid/dir/file2";
|
||||
String file3 = "/raidrs/dir/file3";
|
||||
// Set the policy to default policy to place the block in the default way
|
||||
setBlockPlacementPolicy(namesystem, new BlockPlacementPolicyDefault(
|
||||
conf, namesystem, networktopology));
|
||||
DFSTestUtil.createFile(fs, new Path(file1), 3, (short)1, 0L);
|
||||
DFSTestUtil.createFile(fs, new Path(file2), 4, (short)1, 0L);
|
||||
DFSTestUtil.createFile(fs, new Path(file3), 8, (short)1, 0L);
|
||||
Collection<LocatedBlock> companionBlocks;
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, getBlocks(namesystem, file1).get(0).getBlock());
|
||||
Assert.assertTrue(companionBlocks == null || companionBlocks.size() == 0);
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, getBlocks(namesystem, file1).get(2).getBlock());
|
||||
Assert.assertTrue(companionBlocks == null || companionBlocks.size() == 0);
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, getBlocks(namesystem, file2).get(0).getBlock());
|
||||
Assert.assertEquals(1, companionBlocks.size());
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, getBlocks(namesystem, file2).get(3).getBlock());
|
||||
Assert.assertEquals(1, companionBlocks.size());
|
||||
|
||||
int rsParityLength = RaidNode.rsParityLength(conf);
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, getBlocks(namesystem, file3).get(0).getBlock());
|
||||
Assert.assertEquals(rsParityLength, companionBlocks.size());
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, getBlocks(namesystem, file3).get(4).getBlock());
|
||||
Assert.assertEquals(rsParityLength, companionBlocks.size());
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, getBlocks(namesystem, file3).get(6).getBlock());
|
||||
Assert.assertEquals(2, companionBlocks.size());
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void setBlockPlacementPolicy(
|
||||
FSNamesystem namesystem, BlockPlacementPolicy policy) {
|
||||
namesystem.writeLock();
|
||||
try {
|
||||
namesystem.getBlockManager().setBlockPlacementPolicy(policy);
|
||||
} finally {
|
||||
namesystem.writeUnlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test BlockPlacementPolicyRaid actually deletes the correct replica.
|
||||
* Start 2 datanodes and create 1 source file and its parity file.
|
||||
* 1) Start host1, create the parity file with replication 1
|
||||
* 2) Start host2, create the source file with replication 2
|
||||
* 3) Set repliation of source file to 1
|
||||
* Verify that the policy should delete the block with more companion blocks.
|
||||
*/
|
||||
@Test
|
||||
public void testDeleteReplica() throws IOException {
|
||||
setupCluster();
|
||||
try {
|
||||
// Set the policy to default policy to place the block in the default way
|
||||
setBlockPlacementPolicy(namesystem, new BlockPlacementPolicyDefault(
|
||||
conf, namesystem, networktopology));
|
||||
DatanodeDescriptor datanode1 = blockManager.getDatanodeManager(
|
||||
).getDatanodeCyclicIteration("").iterator().next().getValue();
|
||||
String source = "/dir/file";
|
||||
String parity = xorPrefix + source;
|
||||
|
||||
final Path parityPath = new Path(parity);
|
||||
DFSTestUtil.createFile(fs, parityPath, 3, (short)1, 0L);
|
||||
DFSTestUtil.waitReplication(fs, parityPath, (short)1);
|
||||
|
||||
// start one more datanode
|
||||
cluster.startDataNodes(conf, 1, true, null, rack2, host2, null);
|
||||
DatanodeDescriptor datanode2 = null;
|
||||
for(Map.Entry<String, DatanodeDescriptor> e : blockManager.getDatanodeManager(
|
||||
).getDatanodeCyclicIteration("")) {
|
||||
final DatanodeDescriptor d = e.getValue();
|
||||
if (!d.getName().equals(datanode1.getName())) {
|
||||
datanode2 = d;
|
||||
}
|
||||
}
|
||||
Assert.assertTrue(datanode2 != null);
|
||||
cluster.waitActive();
|
||||
final Path sourcePath = new Path(source);
|
||||
DFSTestUtil.createFile(fs, sourcePath, 5, (short)2, 0L);
|
||||
DFSTestUtil.waitReplication(fs, sourcePath, (short)2);
|
||||
|
||||
refreshPolicy();
|
||||
Assert.assertEquals(parity,
|
||||
policy.getParityFile(source));
|
||||
Assert.assertEquals(source,
|
||||
policy.getSourceFile(parity, xorPrefix));
|
||||
|
||||
List<LocatedBlock> sourceBlocks = getBlocks(namesystem, source);
|
||||
List<LocatedBlock> parityBlocks = getBlocks(namesystem, parity);
|
||||
Assert.assertEquals(5, sourceBlocks.size());
|
||||
Assert.assertEquals(3, parityBlocks.size());
|
||||
|
||||
// verify the result of getCompanionBlocks()
|
||||
Collection<LocatedBlock> companionBlocks;
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, sourceBlocks.get(0).getBlock());
|
||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
||||
new int[]{0, 1}, new int[]{0});
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, sourceBlocks.get(1).getBlock());
|
||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
||||
new int[]{0, 1}, new int[]{0});
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, sourceBlocks.get(2).getBlock());
|
||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
||||
new int[]{2, 3}, new int[]{1});
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, sourceBlocks.get(3).getBlock());
|
||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
||||
new int[]{2, 3}, new int[]{1});
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, sourceBlocks.get(4).getBlock());
|
||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
||||
new int[]{4}, new int[]{2});
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, parityBlocks.get(0).getBlock());
|
||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
||||
new int[]{0, 1}, new int[]{0});
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, parityBlocks.get(1).getBlock());
|
||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
||||
new int[]{2, 3}, new int[]{1});
|
||||
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, parityBlocks.get(2).getBlock());
|
||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
||||
new int[]{4}, new int[]{2});
|
||||
|
||||
// Set the policy back to raid policy. We have to create a new object
|
||||
// here to clear the block location cache
|
||||
refreshPolicy();
|
||||
setBlockPlacementPolicy(namesystem, policy);
|
||||
// verify policy deletes the correct blocks. companion blocks should be
|
||||
// evenly distributed.
|
||||
fs.setReplication(sourcePath, (short)1);
|
||||
DFSTestUtil.waitReplication(fs, sourcePath, (short)1);
|
||||
Map<String, Integer> counters = new HashMap<String, Integer>();
|
||||
refreshPolicy();
|
||||
for (int i = 0; i < parityBlocks.size(); i++) {
|
||||
companionBlocks = getCompanionBlocks(
|
||||
namesystem, policy, parityBlocks.get(i).getBlock());
|
||||
|
||||
counters = BlockPlacementPolicyRaid.countCompanionBlocks(
|
||||
companionBlocks, false);
|
||||
Assert.assertTrue(counters.get(datanode1.getName()) >= 1 &&
|
||||
counters.get(datanode1.getName()) <= 2);
|
||||
Assert.assertTrue(counters.get(datanode1.getName()) +
|
||||
counters.get(datanode2.getName()) ==
|
||||
companionBlocks.size());
|
||||
|
||||
counters = BlockPlacementPolicyRaid.countCompanionBlocks(
|
||||
companionBlocks, true);
|
||||
Assert.assertTrue(counters.get(datanode1.getParent().getName()) >= 1 &&
|
||||
counters.get(datanode1.getParent().getName()) <= 2);
|
||||
Assert.assertTrue(counters.get(datanode1.getParent().getName()) +
|
||||
counters.get(datanode2.getParent().getName()) ==
|
||||
companionBlocks.size());
|
||||
}
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create a new BlockPlacementPolicyRaid to clear the cache
|
||||
private void refreshPolicy() {
|
||||
policy = new BlockPlacementPolicyRaid();
|
||||
policy.initialize(conf, namesystem, networktopology);
|
||||
}
|
||||
|
||||
private void verifyCompanionBlocks(Collection<LocatedBlock> companionBlocks,
|
||||
List<LocatedBlock> sourceBlocks, List<LocatedBlock> parityBlocks,
|
||||
int[] sourceBlockIndexes, int[] parityBlockIndexes) {
|
||||
Set<ExtendedBlock> blockSet = new HashSet<ExtendedBlock>();
|
||||
for (LocatedBlock b : companionBlocks) {
|
||||
blockSet.add(b.getBlock());
|
||||
}
|
||||
Assert.assertEquals(sourceBlockIndexes.length + parityBlockIndexes.length,
|
||||
blockSet.size());
|
||||
for (int index : sourceBlockIndexes) {
|
||||
Assert.assertTrue(blockSet.contains(sourceBlocks.get(index).getBlock()));
|
||||
}
|
||||
for (int index : parityBlockIndexes) {
|
||||
Assert.assertTrue(blockSet.contains(parityBlocks.get(index).getBlock()));
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyCachedFullPathNameResult(
|
||||
CachedFullPathNames cachedFullPathNames, BlockCollection bc)
|
||||
throws IOException {
|
||||
String res1 = bc.getName();
|
||||
String res2 = cachedFullPathNames.get(bc);
|
||||
LOG.info("Actual path name: " + res1);
|
||||
LOG.info("Cached path name: " + res2);
|
||||
Assert.assertEquals(cachedFullPathNames.get(bc),
|
||||
bc.getName());
|
||||
}
|
||||
|
||||
private void verifyCachedBlocksResult(CachedLocatedBlocks cachedBlocks,
|
||||
FSNamesystem namesystem, String file) throws IOException{
|
||||
long len = NameNodeRaidUtil.getFileInfo(namesystem, file, true).getLen();
|
||||
List<LocatedBlock> res1 = NameNodeRaidUtil.getBlockLocations(namesystem,
|
||||
file, 0L, len, false, false).getLocatedBlocks();
|
||||
List<LocatedBlock> res2 = cachedBlocks.get(file);
|
||||
for (int i = 0; i < res1.size(); i++) {
|
||||
LOG.info("Actual block: " + res1.get(i).getBlock());
|
||||
LOG.info("Cached block: " + res2.get(i).getBlock());
|
||||
Assert.assertEquals(res1.get(i).getBlock(), res2.get(i).getBlock());
|
||||
}
|
||||
}
|
||||
|
||||
private Collection<LocatedBlock> getCompanionBlocks(
|
||||
FSNamesystem namesystem, BlockPlacementPolicyRaid policy,
|
||||
ExtendedBlock block) throws IOException {
|
||||
INodeFile inode = (INodeFile)blockManager.blocksMap.getBlockCollection(block
|
||||
.getLocalBlock());
|
||||
FileType type = policy.getFileType(inode.getFullPathName());
|
||||
return policy.getCompanionBlocks(inode.getFullPathName(), type,
|
||||
block.getLocalBlock());
|
||||
}
|
||||
|
||||
private List<LocatedBlock> getBlocks(FSNamesystem namesystem, String file)
|
||||
throws IOException {
|
||||
long len = NameNodeRaidUtil.getFileInfo(namesystem, file, true).getLen();
|
||||
return NameNodeRaidUtil.getBlockLocations(namesystem,
|
||||
file, 0, len, false, false).getLocatedBlocks();
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import org.apache.hadoop.fs.UnresolvedLinkException;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
||||
|
||||
public class NameNodeRaidTestUtil {
|
||||
public static BlockCollection[] getBlockCollections(final FSNamesystem namesystem,
|
||||
final String... files) throws UnresolvedLinkException {
|
||||
final BlockCollection[] inodes = new BlockCollection[files.length];
|
||||
final FSDirectory dir = namesystem.dir;
|
||||
dir.readLock();
|
||||
try {
|
||||
for(int i = 0; i < files.length; i++) {
|
||||
inodes[i] = (BlockCollection)dir.rootDir.getNode(files[i], true);
|
||||
}
|
||||
return inodes;
|
||||
} finally {
|
||||
dir.readUnlock();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,671 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.zip.CRC32;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.JobContext;
|
||||
import org.apache.hadoop.mapred.MiniMRCluster;
|
||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
||||
import org.apache.hadoop.util.JarFinder;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class TestBlockFixer {
|
||||
final static Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.TestBlockFixer");
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
||||
"target/test-data")).getAbsolutePath();
|
||||
final static String CONFIG_FILE = new File(TEST_DIR,
|
||||
"test-raid.xml").getAbsolutePath();
|
||||
public static final String DistBlockFixer_JAR =
|
||||
JarFinder.getJar(DistBlockFixer.class);
|
||||
final static long RELOAD_INTERVAL = 1000;
|
||||
final static int NUM_DATANODES = 3;
|
||||
Configuration conf;
|
||||
String namenode = null;
|
||||
MiniDFSCluster dfs = null;
|
||||
String hftp = null;
|
||||
MiniMRCluster mr = null;
|
||||
FileSystem fileSys = null;
|
||||
RaidNode cnode = null;
|
||||
String jobTrackerName = null;
|
||||
Random rand = new Random();
|
||||
|
||||
/**
|
||||
* Tests isXorParityFile and isRsParityFile
|
||||
*/
|
||||
@Test
|
||||
public void testIsParityFile() throws IOException {
|
||||
Configuration testConf = new Configuration();
|
||||
testConf.set("hdfs.raid.locations", "/raid");
|
||||
testConf.set("hdfs.raidrs.locations", "/raidrs");
|
||||
|
||||
BlockFixer.BlockFixerHelper helper =
|
||||
new BlockFixer.BlockFixerHelper(testConf);
|
||||
|
||||
assertFalse("incorrectly identified rs parity file as xor parity file",
|
||||
helper.isXorParityFile(new Path("/raidrs/test/test")));
|
||||
assertTrue("could not identify rs parity file",
|
||||
helper.isRsParityFile(new Path("/raidrs/test/test")));
|
||||
assertTrue("could not identify xor parity file",
|
||||
helper.isXorParityFile(new Path("/raid/test/test")));
|
||||
assertFalse("incorrectly identified xor parity file as rs parity file",
|
||||
helper.isRsParityFile(new Path("/raid/test/test")));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test the filtering of trash files from the list of corrupt files.
|
||||
*/
|
||||
@Test
|
||||
public void testTrashFilter() {
|
||||
List<Path> files = new LinkedList<Path>();
|
||||
// Paths that do not match the trash pattern.
|
||||
Path p1 = new Path("/user/raid/raidtest/f1");
|
||||
Path p2 = new Path("/user/.Trash/");
|
||||
// Paths that match the trash pattern.
|
||||
Path p3 = new Path("/user/raid/.Trash/raidtest/f1");
|
||||
Path p4 = new Path("/user/raid/.Trash/");
|
||||
files.add(p1);
|
||||
files.add(p3);
|
||||
files.add(p4);
|
||||
files.add(p2);
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
RaidUtils.filterTrash(conf, files);
|
||||
|
||||
assertEquals("expected 2 non-trash files but got " + files.size(),
|
||||
2, files.size());
|
||||
for (Path p: files) {
|
||||
assertTrue("wrong file returned by filterTrash",
|
||||
p == p1 || p == p2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBlockFixLocal() throws Exception {
|
||||
implBlockFix(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a file with three stripes, corrupt a block each in two stripes,
|
||||
* and wait for the the file to be fixed.
|
||||
*/
|
||||
protected void implBlockFix(boolean local) throws Exception {
|
||||
LOG.info("Test testBlockFix started.");
|
||||
long blockSize = 8192L;
|
||||
int stripeLength = 3;
|
||||
mySetup(stripeLength, -1); // never har
|
||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
||||
1, 7, blockSize);
|
||||
long file1Len = fileSys.getFileStatus(file1).getLen();
|
||||
LOG.info("Test testBlockFix created test files");
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
localConf.setInt("raid.blockfix.interval", 1000);
|
||||
if (local) {
|
||||
localConf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
||||
} else {
|
||||
localConf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.DistBlockFixer");
|
||||
}
|
||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
||||
|
||||
try {
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
||||
cnode.stop(); cnode.join();
|
||||
|
||||
FileStatus srcStat = fileSys.getFileStatus(file1);
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
||||
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
|
||||
dfs, file1.toUri().getPath(), 0, srcStat.getLen());
|
||||
|
||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
||||
assertEquals("filesFixed() should return 0 before fixing files",
|
||||
0, cnode.blockFixer.filesFixed());
|
||||
|
||||
// Corrupt blocks in two different stripes. We can fix them.
|
||||
int[] corruptBlockIdxs = new int[]{0, 4, 6};
|
||||
for (int idx: corruptBlockIdxs)
|
||||
corruptBlock(locs.get(idx).getBlock());
|
||||
reportCorruptBlocks(dfs, file1, corruptBlockIdxs, blockSize);
|
||||
|
||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals("file not corrupted", 1, corruptFiles.length);
|
||||
assertEquals("wrong file corrupted",
|
||||
corruptFiles[0], file1.toUri().getPath());
|
||||
assertEquals("wrong number of corrupt blocks", 3,
|
||||
RaidDFSUtil.corruptBlocksInFile(dfs, file1.toUri().getPath(), 0,
|
||||
srcStat.getLen()).size());
|
||||
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
long start = Time.now();
|
||||
while (cnode.blockFixer.filesFixed() < 1 &&
|
||||
Time.now() - start < 120000) {
|
||||
LOG.info("Test testBlockFix waiting for files to be fixed.");
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
assertEquals("file not fixed", 1, cnode.blockFixer.filesFixed());
|
||||
|
||||
dfs = getDFS(conf, dfs);
|
||||
assertTrue("file not fixed",
|
||||
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.info("Test testBlockFix Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
LOG.info("Test testBlockFix completed.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests integrity of generated block.
|
||||
* Create a file and delete a block entirely. Wait for the block to be
|
||||
* regenerated. Now stop RaidNode and corrupt the generated block.
|
||||
* Test that corruption in the generated block can be detected by clients.
|
||||
*/
|
||||
protected void generatedBlockTestCommon(String testName, int blockToCorrupt,
|
||||
boolean local) throws Exception {
|
||||
LOG.info("Test " + testName + " started.");
|
||||
long blockSize = 8192L;
|
||||
int stripeLength = 3;
|
||||
mySetup(stripeLength, -1); // never har
|
||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
||||
long crc1 = TestRaidDfs.createTestFile(fileSys, file1, 1, 7, blockSize);
|
||||
long file1Len = fileSys.getFileStatus(file1).getLen();
|
||||
LOG.info("Test " + testName + " created test files");
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
localConf.setInt("raid.blockfix.interval", 1000);
|
||||
if (local) {
|
||||
localConf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
||||
} else {
|
||||
localConf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.DistBlockFixer");
|
||||
}
|
||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
||||
try {
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
||||
cnode.stop(); cnode.join();
|
||||
|
||||
FileStatus srcStat = fileSys.getFileStatus(file1);
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
||||
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
|
||||
dfs, file1.toUri().getPath(), 0, srcStat.getLen());
|
||||
|
||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
||||
assertEquals("filesFixed() should return 0 before fixing files",
|
||||
0, cnode.blockFixer.filesFixed());
|
||||
|
||||
corruptBlock(locs.get(0).getBlock());
|
||||
reportCorruptBlocks(dfs, file1, new int[]{0}, blockSize);
|
||||
|
||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals("file not corrupted",
|
||||
1, corruptFiles.length);
|
||||
assertEquals("wrong file corrupted",
|
||||
corruptFiles[0], file1.toUri().getPath());
|
||||
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
long start = Time.now();
|
||||
while (cnode.blockFixer.filesFixed() < 1 &&
|
||||
Time.now() - start < 120000) {
|
||||
LOG.info("Test " + testName + " waiting for files to be fixed.");
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
assertEquals("file not fixed",
|
||||
1, cnode.blockFixer.filesFixed());
|
||||
|
||||
// Stop RaidNode
|
||||
cnode.stop(); cnode.join(); cnode = null;
|
||||
|
||||
// The block has successfully been reconstructed.
|
||||
dfs = getDFS(conf, dfs);
|
||||
assertTrue("file not fixed",
|
||||
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
|
||||
|
||||
// Now corrupt the generated block.
|
||||
locs = RaidDFSUtil.getBlockLocations(
|
||||
dfs, file1.toUri().getPath(), 0, srcStat.getLen());
|
||||
corruptBlock(locs.get(0).getBlock());
|
||||
reportCorruptBlocks(dfs, file1, new int[]{0}, blockSize);
|
||||
|
||||
try {
|
||||
Thread.sleep(5*1000);
|
||||
} catch (InterruptedException ignore) {
|
||||
}
|
||||
try {
|
||||
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1);
|
||||
fail("Expected exception not thrown");
|
||||
} catch (org.apache.hadoop.fs.ChecksumException ce) {
|
||||
} catch (org.apache.hadoop.hdfs.BlockMissingException bme) {
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.info("Test " + testName + " Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
LOG.info("Test " + testName + " completed.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests integrity of generated block.
|
||||
* Create a file and delete a block entirely. Wait for the block to be
|
||||
* regenerated. Now stop RaidNode and corrupt the generated block.
|
||||
* Test that corruption in the generated block can be detected by clients.
|
||||
*/
|
||||
@Test
|
||||
public void testGeneratedBlockLocal() throws Exception {
|
||||
generatedBlockTestCommon("testGeneratedBlock", 3, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests integrity of generated last block.
|
||||
* Create a file and delete a block entirely. Wait for the block to be
|
||||
* regenerated. Now stop RaidNode and corrupt the generated block.
|
||||
* Test that corruption in the generated block can be detected by clients.
|
||||
*/
|
||||
@Test
|
||||
public void testGeneratedLastBlockLocal() throws Exception {
|
||||
generatedBlockTestCommon("testGeneratedLastBlock", 6, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParityBlockFixLocal() throws Exception {
|
||||
implParityBlockFix("testParityBlockFixLocal", true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Corrupt a parity file and wait for it to get fixed.
|
||||
*/
|
||||
protected void implParityBlockFix(String testName, boolean local)
|
||||
throws Exception {
|
||||
LOG.info("Test " + testName + " started.");
|
||||
long blockSize = 8192L;
|
||||
int stripeLength = 3;
|
||||
mySetup(stripeLength, -1); // never har
|
||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
||||
Path parityFile = new Path("/destraid/user/dhruba/raidtest/file1");
|
||||
TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
||||
1, 7, blockSize);
|
||||
LOG.info("Test " + testName + " created test files");
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
localConf.setInt("raid.blockfix.interval", 1000);
|
||||
if (local) {
|
||||
localConf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
||||
} else {
|
||||
localConf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.DistBlockFixer");
|
||||
}
|
||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
||||
|
||||
try {
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
||||
cnode.stop(); cnode.join();
|
||||
|
||||
long parityCRC = getCRC(fileSys, parityFile);
|
||||
|
||||
FileStatus parityStat = fileSys.getFileStatus(parityFile);
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
||||
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
|
||||
dfs, parityFile.toUri().getPath(), 0, parityStat.getLen());
|
||||
|
||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
||||
assertEquals("filesFixed() should return 0 before fixing files",
|
||||
0, cnode.blockFixer.filesFixed());
|
||||
|
||||
// Corrupt parity blocks for different stripes.
|
||||
int[] corruptBlockIdxs = new int[]{0, 1, 2};
|
||||
for (int idx: corruptBlockIdxs)
|
||||
corruptBlock(locs.get(idx).getBlock());
|
||||
reportCorruptBlocks(dfs, parityFile, corruptBlockIdxs, blockSize);
|
||||
|
||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals("file not corrupted",
|
||||
1, corruptFiles.length);
|
||||
assertEquals("wrong file corrupted",
|
||||
corruptFiles[0], parityFile.toUri().getPath());
|
||||
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
long start = Time.now();
|
||||
while (cnode.blockFixer.filesFixed() < 1 &&
|
||||
Time.now() - start < 120000) {
|
||||
LOG.info("Test " + testName + " waiting for files to be fixed.");
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
assertEquals("file not fixed",
|
||||
1, cnode.blockFixer.filesFixed());
|
||||
|
||||
long checkCRC = getCRC(fileSys, parityFile);
|
||||
|
||||
assertEquals("file not fixed",
|
||||
parityCRC, checkCRC);
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.info("Test " + testName + " Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
LOG.info("Test " + testName + " completed.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParityHarBlockFixLocal() throws Exception {
|
||||
implParityHarBlockFix("testParityHarBlockFixLocal", true);
|
||||
}
|
||||
|
||||
protected void implParityHarBlockFix(String testName, boolean local)
|
||||
throws Exception {
|
||||
LOG.info("Test " + testName + " started.");
|
||||
long blockSize = 8192L;
|
||||
int stripeLength = 3;
|
||||
mySetup(stripeLength, 0); // Time before har = 0 days.
|
||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
||||
// Parity file will have 7 blocks.
|
||||
TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
||||
1, 20, blockSize);
|
||||
LOG.info("Test " + testName + " created test files");
|
||||
|
||||
// create an instance of the RaidNode
|
||||
// HAR block size = 2 * src block size = 2 * parity block size.
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.setLong("har.block.size", blockSize * 2);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
localConf.setInt("raid.blockfix.interval", 1000);
|
||||
if (local) {
|
||||
localConf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
||||
} else {
|
||||
localConf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.DistBlockFixer");
|
||||
}
|
||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
||||
|
||||
try {
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
Path harDirectory =
|
||||
new Path("/destraid/user/dhruba/raidtest/raidtest" +
|
||||
RaidNode.HAR_SUFFIX);
|
||||
long start = Time.now();
|
||||
while (Time.now() - start < 1000 * 120) {
|
||||
if (fileSys.exists(harDirectory)) {
|
||||
break;
|
||||
}
|
||||
LOG.info("Test " + testName + " waiting for har");
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
Path partFile = new Path(harDirectory, "part-0");
|
||||
long partCRC = getCRC(fileSys, partFile);
|
||||
FileStatus partStat = fileSys.getFileStatus(partFile);
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
||||
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
|
||||
dfs, partFile.toUri().getPath(), 0, partStat.getLen());
|
||||
// 7 parity blocks => 4 har blocks.
|
||||
assertEquals("wrong number of har blocks",
|
||||
4, locs.getLocatedBlocks().size());
|
||||
cnode.stop(); cnode.join();
|
||||
|
||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
||||
assertEquals("filesFixed() should return 0 before fixing files",
|
||||
0, cnode.blockFixer.filesFixed());
|
||||
|
||||
// Corrupt parity blocks for different stripes.
|
||||
int[] corruptBlockIdxs = new int[]{0, 3};
|
||||
for (int idx: corruptBlockIdxs)
|
||||
corruptBlock(locs.get(idx).getBlock());
|
||||
reportCorruptBlocks(dfs, partFile, corruptBlockIdxs,
|
||||
partStat.getBlockSize());
|
||||
|
||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals("file not corrupted", 1, corruptFiles.length);
|
||||
assertEquals("wrong file corrupted",
|
||||
corruptFiles[0], partFile.toUri().getPath());
|
||||
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
start = Time.now();
|
||||
while (cnode.blockFixer.filesFixed() < 1 &&
|
||||
Time.now() - start < 120000) {
|
||||
LOG.info("Test " + testName + " waiting for files to be fixed.");
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
assertEquals("file not fixed",
|
||||
1, cnode.blockFixer.filesFixed());
|
||||
|
||||
long checkCRC = getCRC(fileSys, partFile);
|
||||
|
||||
assertEquals("file not fixed",
|
||||
partCRC, checkCRC);
|
||||
} catch (Exception e) {
|
||||
LOG.info("Test " + testName + " Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
LOG.info("Test " + testName + " completed.");
|
||||
}
|
||||
|
||||
|
||||
protected static DistributedFileSystem getDFS(
|
||||
Configuration conf, FileSystem dfs) throws IOException {
|
||||
Configuration clientConf = new Configuration(conf);
|
||||
clientConf.set("fs.hdfs.impl",
|
||||
"org.apache.hadoop.hdfs.DistributedFileSystem");
|
||||
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
|
||||
URI dfsUri = dfs.getUri();
|
||||
FileSystem.closeAll();
|
||||
return (DistributedFileSystem) FileSystem.get(dfsUri, clientConf);
|
||||
}
|
||||
|
||||
protected void mySetup(int stripeLength, int timeBeforeHar) throws Exception {
|
||||
|
||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
||||
conf = new Configuration();
|
||||
|
||||
conf.set("raid.config.file", CONFIG_FILE);
|
||||
conf.setBoolean("raid.config.reload", true);
|
||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
||||
|
||||
// scan all policies once every 5 second
|
||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
||||
|
||||
// make all deletions not go through Trash
|
||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
||||
|
||||
// do not use map-reduce cluster for Raiding
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
||||
conf.set("raid.server.address", "localhost:0");
|
||||
conf.setInt("hdfs.raid.stripeLength", stripeLength);
|
||||
conf.set("hdfs.raid.locations", "/destraid");
|
||||
|
||||
conf.setBoolean("dfs.permissions", false);
|
||||
|
||||
conf.set("mapreduce.framework.name", "yarn");
|
||||
|
||||
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
|
||||
dfs.waitActive();
|
||||
fileSys = dfs.getFileSystem();
|
||||
namenode = fileSys.getUri().toString();
|
||||
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
mr = new MiniMRCluster(4, namenode, 3);
|
||||
JobConf jobConf = mr.createJobConf();
|
||||
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
|
||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
||||
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
conf.set("mapred.job.tracker", jobTrackerName);
|
||||
conf.set("mapreduce.framework.name", "yarn");
|
||||
String rmAdress = jobConf.get("yarn.resourcemanager.address");
|
||||
if (rmAdress != null) {
|
||||
conf.set("yarn.resourcemanager.address", rmAdress);
|
||||
}
|
||||
String schedulerAdress =
|
||||
jobConf.get("yarn.resourcemanager.scheduler.address");
|
||||
if (schedulerAdress != null) {
|
||||
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
|
||||
}
|
||||
String jobHistoryAddress =
|
||||
jobConf.get("mapreduce.jobhistory.address");
|
||||
if (jobHistoryAddress != null) {
|
||||
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
|
||||
}
|
||||
conf.set(JobContext.JAR, TestBlockFixer.DistBlockFixer_JAR);
|
||||
|
||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
||||
String str = "<configuration> " +
|
||||
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
|
||||
"<policy name = \"RaidTest1\"> " +
|
||||
"<erasureCode>xor</erasureCode> " +
|
||||
"<destPath> /destraid</destPath> " +
|
||||
"<property> " +
|
||||
"<name>targetReplication</name> " +
|
||||
"<value>1</value> " +
|
||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>metaReplication</name> " +
|
||||
"<value>1</value> " +
|
||||
"<description> replication factor of parity file" +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>modTimePeriod</name> " +
|
||||
"<value>2000</value> " +
|
||||
"<description> time (milliseconds) after a file is modified to make it " +
|
||||
"a candidate for RAIDing " +
|
||||
"</description> " +
|
||||
"</property> ";
|
||||
if (timeBeforeHar >= 0) {
|
||||
str +=
|
||||
"<property> " +
|
||||
"<name>time_before_har</name> " +
|
||||
"<value>" + timeBeforeHar + "</value> " +
|
||||
"<description> amount of time waited before har'ing parity files" +
|
||||
"</description> " +
|
||||
"</property> ";
|
||||
}
|
||||
|
||||
str +=
|
||||
"</policy>" +
|
||||
"</srcPath>" +
|
||||
"</configuration>";
|
||||
fileWriter.write(str);
|
||||
fileWriter.close();
|
||||
}
|
||||
|
||||
protected void myTearDown() throws Exception {
|
||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
||||
if (mr != null) { mr.shutdown(); }
|
||||
if (dfs != null) { dfs.shutdown(); }
|
||||
}
|
||||
|
||||
public static long getCRC(FileSystem fs, Path p) throws IOException {
|
||||
CRC32 crc = new CRC32();
|
||||
FSDataInputStream stm = fs.open(p);
|
||||
int b;
|
||||
while ((b = stm.read())>=0) {
|
||||
crc.update(b);
|
||||
}
|
||||
stm.close();
|
||||
return crc.getValue();
|
||||
}
|
||||
|
||||
void corruptBlock(ExtendedBlock block) throws IOException {
|
||||
assertTrue("Could not corrupt block",
|
||||
dfs.corruptBlockOnDataNodes(block) > 0);
|
||||
}
|
||||
|
||||
static void reportCorruptBlocks(FileSystem fs, Path file, int[] idxs,
|
||||
long blockSize) throws IOException {
|
||||
|
||||
FSDataInputStream in = fs.open(file);
|
||||
for (int idx: idxs) {
|
||||
long offset = idx * blockSize;
|
||||
LOG.info("Reporting corrupt block " + file + ":" + offset);
|
||||
in.seek(offset);
|
||||
try {
|
||||
in.readFully(new byte[(int)blockSize]);
|
||||
fail("Expected exception not thrown for " + file + ":" + offset);
|
||||
} catch (org.apache.hadoop.fs.ChecksumException e) {
|
||||
} catch (org.apache.hadoop.hdfs.BlockMissingException bme) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestBlockFixerBlockFixDist extends TestBlockFixer {
|
||||
@Test
|
||||
public void testBlockFixDist() throws Exception {
|
||||
implBlockFix(false);
|
||||
}
|
||||
}
|
|
@ -1,245 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestBlockFixerDistConcurrency extends TestBlockFixer {
|
||||
/**
|
||||
* tests that we can have 2 concurrent jobs fixing files
|
||||
* (dist block fixer)
|
||||
*/
|
||||
@Test
|
||||
public void testConcurrentJobs() throws Exception {
|
||||
LOG.info("Test testConcurrentJobs started.");
|
||||
long blockSize = 8192L;
|
||||
int stripeLength = 3;
|
||||
mySetup(stripeLength, -1); // never har
|
||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
||||
Path file2 = new Path("/user/dhruba/raidtest/file2");
|
||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
||||
1, 20, blockSize);
|
||||
long crc2 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file2,
|
||||
1, 20, blockSize);
|
||||
long file1Len = fileSys.getFileStatus(file1).getLen();
|
||||
long file2Len = fileSys.getFileStatus(file2).getLen();
|
||||
LOG.info("Test testConcurrentJobs created test files");
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
localConf.setInt("raid.blockfix.interval", 1000);
|
||||
localConf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.DistBlockFixer");
|
||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
||||
|
||||
try {
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file2, destPath);
|
||||
cnode.stop(); cnode.join();
|
||||
|
||||
FileStatus file1Stat = fileSys.getFileStatus(file1);
|
||||
FileStatus file2Stat = fileSys.getFileStatus(file2);
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
||||
LocatedBlocks file1Loc =
|
||||
RaidDFSUtil.getBlockLocations(dfs, file1.toUri().getPath(),
|
||||
0, file1Stat.getLen());
|
||||
LocatedBlocks file2Loc =
|
||||
RaidDFSUtil.getBlockLocations(dfs, file2.toUri().getPath(),
|
||||
0, file2Stat.getLen());
|
||||
|
||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
||||
assertEquals("filesFixed() should return 0 before fixing files",
|
||||
0, cnode.blockFixer.filesFixed());
|
||||
|
||||
// corrupt file1
|
||||
int[] corruptBlockIdxs = new int[]{0, 4, 6};
|
||||
for (int idx: corruptBlockIdxs)
|
||||
corruptBlock(file1Loc.get(idx).getBlock());
|
||||
reportCorruptBlocks(dfs, file1, corruptBlockIdxs, blockSize);
|
||||
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
DistBlockFixer blockFixer = (DistBlockFixer) cnode.blockFixer;
|
||||
long start = Time.now();
|
||||
|
||||
while (blockFixer.jobsRunning() < 1 &&
|
||||
Time.now() - start < 240000) {
|
||||
LOG.info("Test testBlockFix waiting for fixing job 1 to start");
|
||||
Thread.sleep(10);
|
||||
}
|
||||
assertEquals("job 1 not running", 1, blockFixer.jobsRunning());
|
||||
|
||||
// corrupt file2
|
||||
for (int idx: corruptBlockIdxs)
|
||||
corruptBlock(file2Loc.get(idx).getBlock());
|
||||
reportCorruptBlocks(dfs, file2, corruptBlockIdxs, blockSize);
|
||||
|
||||
while (blockFixer.jobsRunning() < 2 &&
|
||||
Time.now() - start < 240000) {
|
||||
LOG.info("Test testBlockFix waiting for fixing job 2 to start");
|
||||
Thread.sleep(10);
|
||||
}
|
||||
assertEquals("2 jobs not running", 2, blockFixer.jobsRunning());
|
||||
|
||||
while (blockFixer.filesFixed() < 2 &&
|
||||
Time.now() - start < 240000) {
|
||||
LOG.info("Test testBlockFix waiting for files to be fixed.");
|
||||
Thread.sleep(10);
|
||||
}
|
||||
assertEquals("files not fixed", 2, blockFixer.filesFixed());
|
||||
|
||||
dfs = getDFS(conf, dfs);
|
||||
|
||||
try {
|
||||
Thread.sleep(5*1000);
|
||||
} catch (InterruptedException ignore) {
|
||||
}
|
||||
assertTrue("file not fixed",
|
||||
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
|
||||
assertTrue("file not fixed",
|
||||
TestRaidDfs.validateFile(dfs, file2, file2Len, crc2));
|
||||
} catch (Exception e) {
|
||||
LOG.info("Test testConcurrentJobs exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* tests that the distributed block fixer obeys
|
||||
* the limit on how many files to fix simultaneously
|
||||
*/
|
||||
@Test
|
||||
public void testMaxPendingFiles() throws Exception {
|
||||
LOG.info("Test testMaxPendingFiles started.");
|
||||
long blockSize = 8192L;
|
||||
int stripeLength = 3;
|
||||
mySetup(stripeLength, -1); // never har
|
||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
||||
Path file2 = new Path("/user/dhruba/raidtest/file2");
|
||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
||||
1, 20, blockSize);
|
||||
long crc2 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file2,
|
||||
1, 20, blockSize);
|
||||
long file1Len = fileSys.getFileStatus(file1).getLen();
|
||||
long file2Len = fileSys.getFileStatus(file2).getLen();
|
||||
LOG.info("Test testMaxPendingFiles created test files");
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
localConf.setInt("raid.blockfix.interval", 1000);
|
||||
localConf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.DistBlockFixer");
|
||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
||||
localConf.setLong("raid.blockfix.maxpendingfiles", 1L);
|
||||
|
||||
try {
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file2, destPath);
|
||||
cnode.stop(); cnode.join();
|
||||
|
||||
FileStatus file1Stat = fileSys.getFileStatus(file1);
|
||||
FileStatus file2Stat = fileSys.getFileStatus(file2);
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
||||
LocatedBlocks file1Loc =
|
||||
RaidDFSUtil.getBlockLocations(dfs, file1.toUri().getPath(),
|
||||
0, file1Stat.getLen());
|
||||
LocatedBlocks file2Loc =
|
||||
RaidDFSUtil.getBlockLocations(dfs, file2.toUri().getPath(),
|
||||
0, file2Stat.getLen());
|
||||
|
||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
||||
assertEquals("filesFixed() should return 0 before fixing files",
|
||||
0, cnode.blockFixer.filesFixed());
|
||||
|
||||
// corrupt file1
|
||||
int[] corruptBlockIdxs = new int[]{0, 4, 6};
|
||||
for (int idx: corruptBlockIdxs)
|
||||
corruptBlock(file1Loc.get(idx).getBlock());
|
||||
reportCorruptBlocks(dfs, file1, corruptBlockIdxs, blockSize);
|
||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
DistBlockFixer blockFixer = (DistBlockFixer) cnode.blockFixer;
|
||||
long start = Time.now();
|
||||
|
||||
while (blockFixer.jobsRunning() < 1 &&
|
||||
Time.now() - start < 240000) {
|
||||
LOG.info("Test testBlockFix waiting for fixing job 1 to start");
|
||||
Thread.sleep(10);
|
||||
}
|
||||
assertEquals("job not running", 1, blockFixer.jobsRunning());
|
||||
|
||||
// corrupt file2
|
||||
for (int idx: corruptBlockIdxs)
|
||||
corruptBlock(file2Loc.get(idx).getBlock());
|
||||
reportCorruptBlocks(dfs, file2, corruptBlockIdxs, blockSize);
|
||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
|
||||
// wait until both files are fixed
|
||||
while (blockFixer.filesFixed() < 2 &&
|
||||
Time.now() - start < 240000) {
|
||||
// make sure the block fixer does not start a second job while
|
||||
// the first one is still running
|
||||
assertTrue("too many jobs running", blockFixer.jobsRunning() <= 1);
|
||||
Thread.sleep(10);
|
||||
}
|
||||
assertEquals("files not fixed", 2, blockFixer.filesFixed());
|
||||
|
||||
dfs = getDFS(conf, dfs);
|
||||
|
||||
try {
|
||||
Thread.sleep(5*1000);
|
||||
} catch (InterruptedException ignore) {
|
||||
}
|
||||
assertTrue("file not fixed",
|
||||
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
|
||||
assertTrue("file not fixed",
|
||||
TestRaidDfs.validateFile(dfs, file2, file2Len, crc2));
|
||||
} catch (Exception e) {
|
||||
LOG.info("Test testMaxPendingFiles exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestBlockFixerGeneratedBlockDist extends TestBlockFixer {
|
||||
/**
|
||||
* Tests integrity of generated block.
|
||||
* Create a file and delete a block entirely. Wait for the block to be
|
||||
* regenerated. Now stop RaidNode and corrupt the generated block.
|
||||
* Test that corruption in the generated block can be detected by clients.
|
||||
*/
|
||||
@Test
|
||||
public void testGeneratedBlockDist() throws Exception {
|
||||
generatedBlockTestCommon("testGeneratedBlock", 3, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests integrity of generated last block.
|
||||
* Create a file and delete a block entirely. Wait for the block to be
|
||||
* regenerated. Now stop RaidNode and corrupt the generated block.
|
||||
* Test that corruption in the generated block can be detected by clients.
|
||||
*/
|
||||
@Test
|
||||
public void testGeneratedLastBlockDist() throws Exception {
|
||||
generatedBlockTestCommon("testGeneratedLastBlock", 6, false);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestBlockFixerParityBlockFixDist extends TestBlockFixer {
|
||||
@Test
|
||||
public void testParityBlockFixDist() throws Exception {
|
||||
implParityBlockFix("testParityBlockFixDist", false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParityHarBlockFixDist() throws Exception {
|
||||
implParityHarBlockFix("testParityHarBlockFixDist", false);
|
||||
}
|
||||
}
|
|
@ -1,228 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestDirectoryTraversal {
|
||||
final static Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.TestDirectoryTraversal");
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
||||
"target/test-data")).getAbsolutePath();
|
||||
|
||||
MiniDFSCluster dfs = null;
|
||||
FileSystem fs = null;
|
||||
Configuration conf = null;
|
||||
|
||||
/**
|
||||
* Test basic enumeration.
|
||||
*/
|
||||
@Test
|
||||
public void testEnumeration() throws IOException {
|
||||
mySetup();
|
||||
|
||||
try {
|
||||
Path topDir = new Path(TEST_DIR + "/testenumeration");
|
||||
|
||||
createTestTree(topDir);
|
||||
|
||||
LOG.info("Enumerating files");
|
||||
List<FileStatus> startPaths = new LinkedList<FileStatus>();
|
||||
startPaths.add(fs.getFileStatus(topDir));
|
||||
DirectoryTraversal dt = new DirectoryTraversal(fs, startPaths, 2);
|
||||
|
||||
List<FileStatus> selected = new LinkedList<FileStatus>();
|
||||
while (true) {
|
||||
FileStatus f = dt.getNextFile();
|
||||
if (f == null) break;
|
||||
assertEquals(false, f.isDir());
|
||||
LOG.info(f.getPath());
|
||||
selected.add(f);
|
||||
}
|
||||
assertEquals(5, selected.size());
|
||||
|
||||
LOG.info("Enumerating directories");
|
||||
startPaths.clear();
|
||||
startPaths.add(fs.getFileStatus(topDir));
|
||||
dt = new DirectoryTraversal(fs, startPaths);
|
||||
selected.clear();
|
||||
while (true) {
|
||||
FileStatus dir = dt.getNextDirectory();
|
||||
if (dir == null) break;
|
||||
assertEquals(true, dir.isDir());
|
||||
LOG.info(dir.getPath());
|
||||
selected.add(dir);
|
||||
}
|
||||
assertEquals(4, selected.size());
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSuspension() throws IOException {
|
||||
LOG.info("Starting testSuspension");
|
||||
mySetup();
|
||||
|
||||
try {
|
||||
Path topDir = new Path(TEST_DIR + "/testenumeration");
|
||||
|
||||
createTestTree(topDir);
|
||||
|
||||
String top = topDir.toString();
|
||||
List<FileStatus> startPaths = new LinkedList<FileStatus>();
|
||||
startPaths.add(fs.getFileStatus(new Path(top + "/a")));
|
||||
startPaths.add(fs.getFileStatus(new Path(top + "/b")));
|
||||
DirectoryTraversal dt = new DirectoryTraversal(fs, startPaths);
|
||||
|
||||
int limit = 2;
|
||||
short targetRepl = 1;
|
||||
Path raid = new Path("/raid");
|
||||
DirectoryTraversal.FileFilter filter =
|
||||
new RaidFilter.TimeBasedFilter(conf,
|
||||
RaidNode.xorDestinationPath(conf), 1, Time.now(), 0);
|
||||
List<FileStatus> selected = dt.getFilteredFiles(filter, limit);
|
||||
for (FileStatus f: selected) {
|
||||
LOG.info(f.getPath());
|
||||
}
|
||||
assertEquals(limit, selected.size());
|
||||
|
||||
selected = dt.getFilteredFiles(filter, limit);
|
||||
for (FileStatus f: selected) {
|
||||
LOG.info(f.getPath());
|
||||
}
|
||||
assertEquals(limit, selected.size());
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFileFilter() throws IOException {
|
||||
mySetup();
|
||||
|
||||
try {
|
||||
Path topDir = new Path(TEST_DIR + "/testFileFilter");
|
||||
int targetRepl = 1;
|
||||
createTestTree(topDir);
|
||||
Path file = new Path(topDir.toString() + "/a/f1");
|
||||
FileStatus stat = fs.getFileStatus(file);
|
||||
PolicyInfo info = new PolicyInfo("testFileFilter", conf);
|
||||
info.setSrcPath(topDir.toString());
|
||||
info.setErasureCode("rs");
|
||||
info.setDescription("test policy");
|
||||
info.setProperty("targetReplication", "1");
|
||||
info.setProperty("metaReplication", "1");
|
||||
|
||||
DirectoryTraversal.FileFilter timeBasedXORFilter =
|
||||
new RaidFilter.TimeBasedFilter(conf,
|
||||
RaidNode.xorDestinationPath(conf), targetRepl,
|
||||
Time.now(), 0);
|
||||
DirectoryTraversal.FileFilter timeBasedRSFilter =
|
||||
new RaidFilter.TimeBasedFilter(conf,
|
||||
RaidNode.rsDestinationPath(conf), targetRepl,
|
||||
Time.now(), 0);
|
||||
DirectoryTraversal.FileFilter preferenceForRSFilter =
|
||||
new RaidFilter.PreferenceFilter(
|
||||
conf, RaidNode.rsDestinationPath(conf),
|
||||
RaidNode.xorDestinationPath(conf), 1, Time.now(), 0);
|
||||
|
||||
assertTrue(timeBasedXORFilter.check(stat));
|
||||
assertTrue(timeBasedRSFilter.check(stat));
|
||||
assertTrue(preferenceForRSFilter.check(stat));
|
||||
|
||||
RaidNode.doRaid(
|
||||
conf, info, stat, new RaidNode.Statistics(), Reporter.NULL);
|
||||
|
||||
assertTrue(timeBasedXORFilter.check(stat));
|
||||
assertFalse(timeBasedRSFilter.check(stat));
|
||||
assertFalse(preferenceForRSFilter.check(stat));
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a test directory tree.
|
||||
* top
|
||||
* / | \
|
||||
* / | f5
|
||||
* a b___
|
||||
* / \ |\ \
|
||||
* f1 f2 f3f4 c
|
||||
*/
|
||||
private void createTestTree(Path topDir) throws IOException {
|
||||
String top = topDir.toString();
|
||||
fs.delete(topDir, true);
|
||||
|
||||
fs.mkdirs(topDir);
|
||||
fs.create(new Path(top + "/f5")).close();
|
||||
|
||||
fs.mkdirs(new Path(top + "/a"));
|
||||
createTestFile(new Path(top + "/a/f1"));
|
||||
createTestFile(new Path(top + "/a/f2"));
|
||||
|
||||
fs.mkdirs(new Path(top + "/b"));
|
||||
fs.mkdirs(new Path(top + "/b/c"));
|
||||
createTestFile(new Path(top + "/b/f3"));
|
||||
createTestFile(new Path(top + "/b/f4"));
|
||||
}
|
||||
|
||||
private void createTestFile(Path file) throws IOException {
|
||||
long blockSize = 8192;
|
||||
byte[] bytes = new byte[(int)blockSize];
|
||||
FSDataOutputStream stm = fs.create(file, false, 4096, (short)1, blockSize);
|
||||
stm.write(bytes);
|
||||
stm.write(bytes);
|
||||
stm.write(bytes);
|
||||
stm.close();
|
||||
FileStatus stat = fs.getFileStatus(file);
|
||||
assertEquals(blockSize, stat.getBlockSize());
|
||||
}
|
||||
|
||||
private void mySetup() throws IOException {
|
||||
conf = new Configuration();
|
||||
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
|
||||
dfs.waitActive();
|
||||
fs = dfs.getFileSystem();
|
||||
}
|
||||
|
||||
private void myTearDown() {
|
||||
if (dfs != null) { dfs.shutdown(); }
|
||||
}
|
||||
}
|
|
@ -1,245 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestErasureCodes {
|
||||
final int TEST_CODES = 100;
|
||||
final int TEST_TIMES = 1000;
|
||||
final Random RAND = new Random();
|
||||
|
||||
@Test
|
||||
public void testEncodeDecode() {
|
||||
for (int n = 0; n < TEST_CODES; n++) {
|
||||
int stripeSize = RAND.nextInt(99) + 1; // 1, 2, 3, ... 100
|
||||
int paritySize = RAND.nextInt(9) + 1; //1, 2, 3, 4, ... 10
|
||||
ErasureCode ec = new ReedSolomonCode(stripeSize, paritySize);
|
||||
for (int m = 0; m < TEST_TIMES; m++) {
|
||||
int symbolMax = (int) Math.pow(2, ec.symbolSize());
|
||||
int[] message = new int[stripeSize];
|
||||
for (int i = 0; i < stripeSize; i++) {
|
||||
message[i] = RAND.nextInt(symbolMax);
|
||||
}
|
||||
int[] parity = new int[paritySize];
|
||||
ec.encode(message, parity);
|
||||
int[] data = new int[stripeSize + paritySize];
|
||||
int[] copy = new int[data.length];
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
data[i] = parity[i];
|
||||
copy[i] = parity[i];
|
||||
}
|
||||
for (int i = 0; i < stripeSize; i++) {
|
||||
data[i + paritySize] = message[i];
|
||||
copy[i + paritySize] = message[i];
|
||||
}
|
||||
int erasedLen = paritySize == 1 ? 1 : RAND.nextInt(paritySize - 1) + 1;
|
||||
int[] erasedLocations = randomErasedLocation(erasedLen, data.length);
|
||||
for (int i = 0; i < erasedLocations.length; i++) {
|
||||
data[erasedLocations[i]] = 0;
|
||||
}
|
||||
int[] erasedValues = new int[erasedLen];
|
||||
ec.decode(data, erasedLocations, erasedValues);
|
||||
for (int i = 0; i < erasedLen; i++) {
|
||||
assertEquals("Decode failed", copy[erasedLocations[i]], erasedValues[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRSPerformance() {
|
||||
int stripeSize = 10;
|
||||
int paritySize = 4;
|
||||
ErasureCode ec = new ReedSolomonCode(stripeSize, paritySize);
|
||||
int symbolMax = (int) Math.pow(2, ec.symbolSize());
|
||||
byte[][] message = new byte[stripeSize][];
|
||||
int bufsize = 1024 * 1024 * 10;
|
||||
for (int i = 0; i < stripeSize; i++) {
|
||||
message[i] = new byte[bufsize];
|
||||
for (int j = 0; j < bufsize; j++) {
|
||||
message[i][j] = (byte) RAND.nextInt(symbolMax);
|
||||
}
|
||||
}
|
||||
byte[][] parity = new byte[paritySize][];
|
||||
for (int i = 0; i < paritySize; i++) {
|
||||
parity[i] = new byte[bufsize];
|
||||
}
|
||||
long encodeStart = Time.now();
|
||||
int[] tmpIn = new int[stripeSize];
|
||||
int[] tmpOut = new int[paritySize];
|
||||
for (int i = 0; i < bufsize; i++) {
|
||||
// Copy message.
|
||||
for (int j = 0; j < stripeSize; j++) tmpIn[j] = 0x000000FF & message[j][i];
|
||||
ec.encode(tmpIn, tmpOut);
|
||||
// Copy parity.
|
||||
for (int j = 0; j < paritySize; j++) parity[j][i] = (byte)tmpOut[j];
|
||||
}
|
||||
long encodeEnd = Time.now();
|
||||
float encodeMSecs = (encodeEnd - encodeStart);
|
||||
System.out.println("Time to encode rs = " + encodeMSecs +
|
||||
"msec (" + message[0].length / (1000 * encodeMSecs) + " MB/s)");
|
||||
|
||||
// Copy erased array.
|
||||
int[] data = new int[paritySize + stripeSize];
|
||||
// 4th location is the 0th symbol in the message
|
||||
int[] erasedLocations = new int[]{4, 1, 5, 7};
|
||||
int[] erasedValues = new int[erasedLocations.length];
|
||||
byte[] copy = new byte[bufsize];
|
||||
for (int j = 0; j < bufsize; j++) {
|
||||
copy[j] = message[0][j];
|
||||
message[0][j] = 0;
|
||||
}
|
||||
|
||||
long decodeStart = Time.now();
|
||||
for (int i = 0; i < bufsize; i++) {
|
||||
// Copy parity first.
|
||||
for (int j = 0; j < paritySize; j++) {
|
||||
data[j] = 0x000000FF & parity[j][i];
|
||||
}
|
||||
// Copy message. Skip 0 as the erased symbol
|
||||
for (int j = 1; j < stripeSize; j++) {
|
||||
data[j + paritySize] = 0x000000FF & message[j][i];
|
||||
}
|
||||
// Use 0, 2, 3, 6, 8, 9, 10, 11, 12, 13th symbol to reconstruct the data
|
||||
ec.decode(data, erasedLocations, erasedValues);
|
||||
message[0][i] = (byte)erasedValues[0];
|
||||
}
|
||||
long decodeEnd = Time.now();
|
||||
float decodeMSecs = (decodeEnd - decodeStart);
|
||||
System.out.println("Time to decode = " + decodeMSecs +
|
||||
"msec (" + message[0].length / (1000 * decodeMSecs) + " MB/s)");
|
||||
assertTrue("Decode failed", java.util.Arrays.equals(copy, message[0]));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testXorPerformance() {
|
||||
java.util.Random RAND = new java.util.Random();
|
||||
int stripeSize = 10;
|
||||
byte[][] message = new byte[stripeSize][];
|
||||
int bufsize = 1024 * 1024 * 10;
|
||||
for (int i = 0; i < stripeSize; i++) {
|
||||
message[i] = new byte[bufsize];
|
||||
for (int j = 0; j < bufsize; j++) {
|
||||
message[i][j] = (byte)RAND.nextInt(256);
|
||||
}
|
||||
}
|
||||
byte[] parity = new byte[bufsize];
|
||||
|
||||
long encodeStart = Time.now();
|
||||
for (int i = 0; i < bufsize; i++) {
|
||||
for (int j = 0; j < stripeSize; j++) parity[i] ^= message[j][i];
|
||||
}
|
||||
long encodeEnd = Time.now();
|
||||
float encodeMSecs = encodeEnd - encodeStart;
|
||||
System.out.println("Time to encode xor = " + encodeMSecs +
|
||||
" msec (" + message[0].length / (1000 * encodeMSecs) + "MB/s)");
|
||||
|
||||
byte[] copy = new byte[bufsize];
|
||||
for (int j = 0; j < bufsize; j++) {
|
||||
copy[j] = message[0][j];
|
||||
message[0][j] = 0;
|
||||
}
|
||||
|
||||
long decodeStart = Time.now();
|
||||
for (int i = 0; i < bufsize; i++) {
|
||||
for (int j = 1; j < stripeSize; j++) message[0][i] ^= message[j][i];
|
||||
message[0][i] ^= parity[i];
|
||||
}
|
||||
long decodeEnd = Time.now();
|
||||
float decodeMSecs = decodeEnd - decodeStart;
|
||||
System.out.println("Time to decode xor = " + decodeMSecs +
|
||||
" msec (" + message[0].length / (1000 * decodeMSecs) + "MB/s)");
|
||||
assertTrue("Decode failed", java.util.Arrays.equals(copy, message[0]));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComputeErrorLocations() {
|
||||
for (int i = 0; i < TEST_TIMES; ++i) {
|
||||
verifyErrorLocations(10, 4, 1);
|
||||
verifyErrorLocations(10, 4, 2);
|
||||
}
|
||||
}
|
||||
|
||||
public void verifyErrorLocations(int stripeSize, int paritySize, int errors) {
|
||||
int[] message = new int[stripeSize];
|
||||
int[] parity = new int[paritySize];
|
||||
Set<Integer> errorLocations = new HashSet<Integer>();
|
||||
for (int i = 0; i < message.length; ++i) {
|
||||
message[i] = RAND.nextInt(256);
|
||||
}
|
||||
while (errorLocations.size() < errors) {
|
||||
int loc = RAND.nextInt(stripeSize + paritySize);
|
||||
errorLocations.add(loc);
|
||||
}
|
||||
ReedSolomonCode codec = new ReedSolomonCode(stripeSize, paritySize);
|
||||
codec.encode(message, parity);
|
||||
int[] data = combineArrays(parity, message);
|
||||
for (Integer i : errorLocations) {
|
||||
data[i] = randError(data[i]);
|
||||
}
|
||||
Set<Integer> recoveredLocations = new HashSet<Integer>();
|
||||
boolean resolved = codec.computeErrorLocations(data, recoveredLocations);
|
||||
if (resolved) {
|
||||
assertEquals(errorLocations, recoveredLocations);
|
||||
}
|
||||
}
|
||||
|
||||
private int randError(int actual) {
|
||||
while (true) {
|
||||
int r = RAND.nextInt(256);
|
||||
if (r != actual) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int[] combineArrays(int[] array1, int[] array2) {
|
||||
int[] result = new int[array1.length + array2.length];
|
||||
for (int i = 0; i < array1.length; ++i) {
|
||||
result[i] = array1[i];
|
||||
}
|
||||
for (int i = 0; i < array2.length; ++i) {
|
||||
result[i + array1.length] = array2[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private int[] randomErasedLocation(int erasedLen, int dataLen) {
|
||||
int[] erasedLocations = new int[erasedLen];
|
||||
for (int i = 0; i < erasedLen; i++) {
|
||||
Set<Integer> s = new HashSet<Integer>();
|
||||
while (s.size() != erasedLen) {
|
||||
s.add(RAND.nextInt(dataLen));
|
||||
}
|
||||
int t = 0;
|
||||
for (int erased : s) {
|
||||
erasedLocations[t++] = erased;
|
||||
}
|
||||
}
|
||||
return erasedLocations;
|
||||
}
|
||||
}
|
|
@ -1,190 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestGaloisField {
|
||||
|
||||
final int TEST_TIMES = 10000;
|
||||
final Random RAND = new Random();
|
||||
final static GaloisField GF = GaloisField.getInstance();
|
||||
|
||||
private int randGF() {
|
||||
return 0x000000FF & RAND.nextInt(GF.getFieldSize());
|
||||
}
|
||||
private int[] randGFPoly(int len) {
|
||||
int[] result = new int[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
result[i] = randGF();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetInstance() {
|
||||
GaloisField gf1 = GaloisField.getInstance(256, 285);
|
||||
GaloisField gf2 = GaloisField.getInstance();
|
||||
GaloisField gf3 = GaloisField.getInstance(128, 137);
|
||||
GaloisField gf4 = GaloisField.getInstance(128, 137);
|
||||
GaloisField gf5 = GaloisField.getInstance(512, 529);
|
||||
GaloisField gf6 = GaloisField.getInstance(512, 529);
|
||||
assertTrue(gf1 == gf2);
|
||||
assertTrue(gf3 == gf4);
|
||||
assertTrue(gf5 == gf6);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDistributivity() {
|
||||
for (int i = 0; i < TEST_TIMES; i++) {
|
||||
int a = RAND.nextInt(GF.getFieldSize());
|
||||
int b = RAND.nextInt(GF.getFieldSize());
|
||||
int c = RAND.nextInt(GF.getFieldSize());
|
||||
int result1 = GF.multiply(a, GF.add(b, c));
|
||||
int result2 = GF.add(GF.multiply(a, b), GF.multiply(a, c));
|
||||
assertTrue("Distributivity test #" + i + " failed: " + a + ", " + b + ", "
|
||||
+ c, result1 == result2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDevision() {
|
||||
for (int i = 0; i < TEST_TIMES; i++) {
|
||||
int a = RAND.nextInt(GF.getFieldSize());
|
||||
int b = RAND.nextInt(GF.getFieldSize());
|
||||
if (b == 0) {
|
||||
continue;
|
||||
}
|
||||
int c = GF.divide(a, b);
|
||||
assertTrue("Division test #" + i + " failed: " + a + "/" + b + " = " + c,
|
||||
a == GF.multiply(c, b));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPower() {
|
||||
for (int i = 0; i < TEST_TIMES; i++) {
|
||||
int a = randGF();
|
||||
int n = RAND.nextInt(10);
|
||||
int result1 = GF.power(a, n);
|
||||
int result2 = 1;
|
||||
for (int j = 0; j < n; j++) {
|
||||
result2 = GF.multiply(result2, a);
|
||||
}
|
||||
assert(result1 == result2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPolynomialDistributivity() {
|
||||
final int TEST_LEN = 15;
|
||||
for (int i = 0; i < TEST_TIMES; i++) {
|
||||
int[] a = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
||||
int[] b = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
||||
int[] c = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
||||
int[] result1 = GF.multiply(a, GF.add(b, c));
|
||||
int[] result2 = GF.add(GF.multiply(a, b), GF.multiply(a, c));
|
||||
assertTrue("Distributivity test on polynomials failed",
|
||||
java.util.Arrays.equals(result1, result2));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSubstitute() {
|
||||
final int TEST_LEN = 15;
|
||||
for (int i = 0; i < TEST_TIMES; i++) {
|
||||
int[] a = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
||||
int[] b = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
||||
int[] c = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
||||
int x = randGF();
|
||||
// (a * b * c)(x)
|
||||
int result1 = GF.substitute(GF.multiply(GF.multiply(a, b), c), x);
|
||||
// a(x) * b(x) * c(x)
|
||||
int result2 =
|
||||
GF.multiply(GF.multiply(GF.substitute(a, x), GF.substitute(b, x)),
|
||||
GF.substitute(c, x));
|
||||
assertTrue("Substitute test on polynomial failed",
|
||||
result1 == result2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSolveVandermondeSystem() {
|
||||
final int TEST_LEN = 15;
|
||||
for (int i = 0; i < TEST_TIMES; i++) {
|
||||
int[] z = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
||||
// generate distinct values for x
|
||||
int[] x = new int[z.length];
|
||||
Set<Integer> s = new HashSet<Integer>();
|
||||
while (s.size() != z.length) {
|
||||
s.add(randGF());
|
||||
}
|
||||
int t = 0;
|
||||
for (int v : s) {
|
||||
x[t++] = v;
|
||||
}
|
||||
// compute the output for the Vandermonde system
|
||||
int[] y = new int[x.length];
|
||||
for (int j = 0; j < x.length; j++) {
|
||||
y[j] = 0;
|
||||
for (int k = 0; k < x.length; k++) {
|
||||
//y[j] = y[j] + z[k] * pow(x[k], j);
|
||||
y[j] = GF.add(y[j], GF.multiply(GF.power(x[k], j), z[k]));
|
||||
}
|
||||
}
|
||||
|
||||
GF.solveVandermondeSystem(x, y);
|
||||
assertTrue("Solving Vandermonde system failed",
|
||||
java.util.Arrays.equals(y, z));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRemainder() {
|
||||
final int TEST_LEN = 15;
|
||||
for (int i = 0; i < TEST_TIMES; i++) {
|
||||
int[] quotient = null;
|
||||
int[] divisor = null;
|
||||
int[] remainder = null;
|
||||
int[] dividend = null;
|
||||
while (true) {
|
||||
quotient = randGFPoly(RAND.nextInt(TEST_LEN - 3) + 3);
|
||||
divisor = randGFPoly(RAND.nextInt(quotient.length - 2) + 2);
|
||||
remainder = randGFPoly(RAND.nextInt(divisor.length - 1) + 1);
|
||||
dividend = GF.add(remainder, GF.multiply(quotient, divisor));
|
||||
if (quotient[quotient.length - 1] != 0 &&
|
||||
divisor[divisor.length - 1] != 0 &&
|
||||
remainder[remainder.length - 1] != 0) {
|
||||
// make sure all the leading terms are not zero
|
||||
break;
|
||||
}
|
||||
}
|
||||
GF.remainder(dividend, divisor);
|
||||
for (int j = 0; j < remainder.length; j++) {
|
||||
assertTrue("Distributivity test on polynomials failed",
|
||||
dividend[j] == remainder[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,79 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestHarIndexParser {
|
||||
final static Log LOG = LogFactory.getLog(TestHarIndexParser.class);
|
||||
File indexFile = null;
|
||||
|
||||
@Before
|
||||
public void setUp() throws FileNotFoundException, IOException {
|
||||
LOG.info("TestHarIndexParser.setUp()");
|
||||
indexFile = File.createTempFile("harindex", ".tmp");
|
||||
indexFile.deleteOnExit();
|
||||
OutputStreamWriter out = new OutputStreamWriter(
|
||||
new FileOutputStream(indexFile),
|
||||
Charset.forName("UTF-8"));
|
||||
out.write("%2F dir 1282018162460+0+493+hadoop+hadoop 0 0 f1 f2 f3 f4\n");
|
||||
out.write("%2Ff1 file part-0 0 1024 1282018141145+1282018140822+420+hadoop+hadoop\n");
|
||||
out.write("%2Ff3 file part-0 2048 1024 1282018148590+1282018148255+420+hadoop+hadoop\n");
|
||||
out.write("%2Ff2 file part-0 1024 1024 1282018144198+1282018143852+420+hadoop+hadoop\n");
|
||||
out.write("%2Ff4 file part-1 0 1024000 1282018162959+1282018162460+420+hadoop+hadoop\n");
|
||||
out.flush();
|
||||
out.close();
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() {
|
||||
LOG.info("TestHarIndexParser.tearDown()");
|
||||
if (indexFile != null)
|
||||
indexFile.delete();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHarIndexParser()
|
||||
throws UnsupportedEncodingException, IOException {
|
||||
LOG.info("testHarIndexParser started.");
|
||||
InputStream in = new FileInputStream(indexFile);
|
||||
long size = indexFile.length();
|
||||
HarIndex parser = new HarIndex(in, size);
|
||||
|
||||
HarIndex.IndexEntry entry = parser.findEntry("part-0", 2100);
|
||||
assertEquals("/f3", entry.fileName);
|
||||
|
||||
LOG.info("testHarIndexParser finished.");
|
||||
}
|
||||
}
|
|
@ -1,121 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestRaidFilter {
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
||||
"target/test-data")).getAbsolutePath();
|
||||
final static Log LOG =
|
||||
LogFactory.getLog("org.apache.hadoop.raid.TestRaidFilter");
|
||||
|
||||
Configuration conf;
|
||||
MiniDFSCluster dfs = null;
|
||||
FileSystem fs = null;
|
||||
|
||||
private void mySetup() throws Exception {
|
||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
||||
conf = new Configuration();
|
||||
dfs = new MiniDFSCluster(conf, 2, true, null);
|
||||
dfs.waitActive();
|
||||
fs = dfs.getFileSystem();
|
||||
String namenode = fs.getUri().toString();
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
}
|
||||
|
||||
private void myTearDown() throws Exception {
|
||||
if (dfs != null) { dfs.shutdown(); }
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLayeredPolicies() throws Exception {
|
||||
mySetup();
|
||||
Path src1 = new Path("/user/foo");
|
||||
Path src2 = new Path("/user/foo/bar");
|
||||
|
||||
PolicyInfo info1 = new PolicyInfo("p1", conf);
|
||||
info1.setSrcPath(src1.toString());
|
||||
info1.setErasureCode("xor");
|
||||
info1.setDescription("test policy");
|
||||
info1.setProperty("targetReplication", "1");
|
||||
info1.setProperty("metaReplication", "1");
|
||||
info1.setProperty("modTimePeriod", "0");
|
||||
|
||||
PolicyInfo info2 = new PolicyInfo("p2", conf);
|
||||
info2.setSrcPath(src2.toString());
|
||||
info2.setErasureCode("xor");
|
||||
info2.setDescription("test policy");
|
||||
info2.setProperty("targetReplication", "1");
|
||||
info2.setProperty("metaReplication", "1");
|
||||
info2.setProperty("modTimePeriod", "0");
|
||||
|
||||
ArrayList<PolicyInfo> all = new ArrayList<PolicyInfo>();
|
||||
all.add(info1);
|
||||
all.add(info2);
|
||||
|
||||
try {
|
||||
long blockSize = 1024;
|
||||
byte[] bytes = new byte[(int)blockSize];
|
||||
Path f1 = new Path(src1, "f1");
|
||||
Path f2 = new Path(src2, "f2");
|
||||
FSDataOutputStream stm1 = fs.create(f1, false, 4096, (short)1, blockSize);
|
||||
FSDataOutputStream stm2 = fs.create(f2, false, 4096, (short)1, blockSize);
|
||||
FSDataOutputStream[] stms = new FSDataOutputStream[]{stm1, stm2};
|
||||
for (FSDataOutputStream stm: stms) {
|
||||
stm.write(bytes);
|
||||
stm.write(bytes);
|
||||
stm.write(bytes);
|
||||
stm.close();
|
||||
}
|
||||
|
||||
Thread.sleep(1000);
|
||||
|
||||
FileStatus stat1 = fs.getFileStatus(f1);
|
||||
FileStatus stat2 = fs.getFileStatus(f2);
|
||||
|
||||
RaidFilter.Statistics stats = new RaidFilter.Statistics();
|
||||
RaidFilter.TimeBasedFilter filter = new RaidFilter.TimeBasedFilter(
|
||||
conf, RaidNode.xorDestinationPath(conf), info1, all,
|
||||
Time.now(), stats);
|
||||
System.out.println("Stats " + stats);
|
||||
|
||||
assertTrue(filter.check(stat1));
|
||||
assertFalse(filter.check(stat2));
|
||||
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,315 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileWriter;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.commons.logging.impl.Log4JLogger;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.MiniMRCluster;
|
||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.log4j.Level;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* If a file gets deleted, then verify that the parity file gets deleted too.
|
||||
*/
|
||||
public class TestRaidHar {
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
||||
"target/test-data")).getAbsolutePath();
|
||||
final static String CONFIG_FILE = new File(TEST_DIR,
|
||||
"test-raid.xml").getAbsolutePath();
|
||||
final static long RELOAD_INTERVAL = 1000;
|
||||
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode");
|
||||
final Random rand = new Random();
|
||||
|
||||
{
|
||||
((Log4JLogger)RaidNode.LOG).getLogger().setLevel(Level.ALL);
|
||||
}
|
||||
|
||||
|
||||
Configuration conf;
|
||||
String namenode = null;
|
||||
String hftp = null;
|
||||
MiniDFSCluster dfs = null;
|
||||
MiniMRCluster mr = null;
|
||||
FileSystem fileSys = null;
|
||||
String jobTrackerName = null;
|
||||
|
||||
/**
|
||||
* create mapreduce and dfs clusters
|
||||
*/
|
||||
private void createClusters(boolean local) throws Exception {
|
||||
|
||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
||||
conf = new Configuration();
|
||||
conf.set("raid.config.file", CONFIG_FILE);
|
||||
conf.setBoolean("raid.config.reload", true);
|
||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
||||
|
||||
// scan all policies once every 5 second
|
||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
||||
|
||||
// make all deletions not go through Trash
|
||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
||||
|
||||
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
|
||||
if (local) {
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
||||
} else {
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
|
||||
}
|
||||
|
||||
conf.set("raid.server.address", "localhost:0");
|
||||
conf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
|
||||
// create a dfs and map-reduce cluster
|
||||
final int taskTrackers = 4;
|
||||
|
||||
dfs = new MiniDFSCluster(conf, 3, true, null);
|
||||
dfs.waitActive();
|
||||
fileSys = dfs.getFileSystem();
|
||||
namenode = fileSys.getUri().toString();
|
||||
mr = new MiniMRCluster(taskTrackers, namenode, 3);
|
||||
JobConf jobConf = mr.createJobConf();
|
||||
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
|
||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
||||
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
conf.set("mapred.job.tracker", jobTrackerName);
|
||||
conf.set("mapreduce.framework.name", "yarn");
|
||||
String rmAdress = jobConf.get("yarn.resourcemanager.address");
|
||||
if (rmAdress != null) {
|
||||
conf.set("yarn.resourcemanager.address", rmAdress);
|
||||
}
|
||||
String schedulerAdress =
|
||||
jobConf.get("yarn.resourcemanager.scheduler.address");
|
||||
if (schedulerAdress != null) {
|
||||
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
|
||||
}
|
||||
String jobHistoryAddress =
|
||||
jobConf.get("mapreduce.jobhistory.address");
|
||||
if (jobHistoryAddress != null) {
|
||||
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create raid.xml file for RaidNode
|
||||
*/
|
||||
private void mySetup(long targetReplication,
|
||||
long metaReplication, long stripeLength) throws Exception {
|
||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
||||
String str = "<configuration> " +
|
||||
"<srcPath prefix=\"/user/test/raidtest\"> " +
|
||||
"<policy name = \"RaidTest1\"> " +
|
||||
"<erasureCode>xor</erasureCode> " +
|
||||
"<property> " +
|
||||
"<name>targetReplication</name> " +
|
||||
"<value>" + targetReplication + "</value> " +
|
||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>metaReplication</name> " +
|
||||
"<value>" + metaReplication + "</value> " +
|
||||
"<description> replication factor of parity file" +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>stripeLength</name> " +
|
||||
"<value>" + stripeLength + "</value> " +
|
||||
"<description> the max number of blocks in a file to RAID together " +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>time_before_har</name> " +
|
||||
"<value>0</value> " +
|
||||
"<description> amount of time waited before har'ing parity files" +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>modTimePeriod</name> " +
|
||||
"<value>2000</value> " +
|
||||
"<description> time (milliseconds) after a file is modified to make it " +
|
||||
"a candidate for RAIDing " +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"</policy>" +
|
||||
"</srcPath>" +
|
||||
"</configuration>";
|
||||
fileWriter.write(str);
|
||||
fileWriter.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* stop clusters created earlier
|
||||
*/
|
||||
private void stopClusters() throws Exception {
|
||||
if (mr != null) { mr.shutdown(); }
|
||||
if (dfs != null) { dfs.shutdown(); }
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that parity files that do not have an associated master file
|
||||
* get deleted.
|
||||
*/
|
||||
@Test
|
||||
public void testRaidHar() throws Exception {
|
||||
LOG.info("Test testRaidHar started.");
|
||||
|
||||
long blockSizes [] = {1024L};
|
||||
long stripeLengths [] = {5};
|
||||
long targetReplication = 1;
|
||||
long metaReplication = 1;
|
||||
int numBlock = 9;
|
||||
int iter = 0;
|
||||
|
||||
createClusters(true);
|
||||
try {
|
||||
for (long blockSize : blockSizes) {
|
||||
for (long stripeLength : stripeLengths) {
|
||||
doTestHar(iter, targetReplication, metaReplication,
|
||||
stripeLength, blockSize, numBlock);
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
stopClusters();
|
||||
}
|
||||
LOG.info("Test testRaidHar completed.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Create parity file, delete original file and then validate that
|
||||
* parity file is automatically deleted.
|
||||
*/
|
||||
private void doTestHar(int iter, long targetReplication,
|
||||
long metaReplication, long stripeLength,
|
||||
long blockSize, int numBlock) throws Exception {
|
||||
LOG.info("doTestHar started---------------------------:" + " iter " + iter +
|
||||
" blockSize=" + blockSize + " stripeLength=" + stripeLength);
|
||||
mySetup(targetReplication, metaReplication, stripeLength);
|
||||
Path dir = new Path("/user/test/raidtest/subdir/");
|
||||
Path file1 = new Path(dir + "/file" + iter);
|
||||
RaidNode cnode = null;
|
||||
try {
|
||||
Path destPath = new Path("/destraid/user/test/raidtest/subdir");
|
||||
fileSys.delete(dir, true);
|
||||
fileSys.delete(destPath, true);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Path file = new Path(dir + "/file" + i);
|
||||
TestRaidNode.createOldFile(fileSys, file, 1, numBlock, blockSize);
|
||||
}
|
||||
LOG.info("doTestHar created test files for iteration " + iter);
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
FileStatus[] listPaths = null;
|
||||
|
||||
int maxFilesFound = 0;
|
||||
// wait till file is raided
|
||||
while (true) {
|
||||
try {
|
||||
listPaths = fileSys.listStatus(destPath);
|
||||
int count = 0;
|
||||
Path harPath = null;
|
||||
int filesFound = 0;
|
||||
if (listPaths != null) {
|
||||
for (FileStatus s : listPaths) {
|
||||
LOG.info("doTestHar found path " + s.getPath());
|
||||
|
||||
if (!s.isDir())
|
||||
filesFound++;
|
||||
if (filesFound > maxFilesFound)
|
||||
maxFilesFound = filesFound;
|
||||
|
||||
if (s.getPath().toString().endsWith(".har")) {
|
||||
// If a HAR directory is found, ensure that we have seen
|
||||
// 10 parity files. We have to keep track of the max # of
|
||||
// files since some parity files might get deleted by the
|
||||
// purge thread.
|
||||
assertEquals(10, maxFilesFound);
|
||||
harPath = s.getPath();
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (count == 1 && listPaths.length == 1) {
|
||||
Path partfile = new Path(harPath, "part-0");
|
||||
assertEquals(fileSys.getFileStatus(partfile).getReplication(),
|
||||
targetReplication);
|
||||
break;
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
//ignore
|
||||
}
|
||||
LOG.info("doTestHar waiting for files to be raided and parity files to be har'ed and deleted. Found " +
|
||||
(listPaths == null ? "none" : listPaths.length));
|
||||
Thread.sleep(1000); // keep waiting
|
||||
|
||||
}
|
||||
|
||||
fileSys.delete(dir, true);
|
||||
// wait till raid file is deleted
|
||||
int count = 1;
|
||||
while (count > 0) {
|
||||
count = 0;
|
||||
try {
|
||||
listPaths = fileSys.listStatus(destPath);
|
||||
if (listPaths != null) {
|
||||
for (FileStatus s : listPaths) {
|
||||
LOG.info("doTestHar found path " + s.getPath());
|
||||
if (s.getPath().toString().endsWith(".har")) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException e) { } //ignoring
|
||||
LOG.info("doTestHar waiting for har file to be deleted. Found " +
|
||||
(listPaths == null ? "none" : listPaths.length) + " files");
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.info("doTestHar Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
||||
}
|
||||
LOG.info("doTestHar completed:" + " blockSize=" + blockSize +
|
||||
" stripeLength=" + stripeLength);
|
||||
}
|
||||
}
|
|
@ -1,738 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.zip.CRC32;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.JobContext;
|
||||
import org.apache.hadoop.mapred.MiniMRCluster;
|
||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
||||
import org.apache.hadoop.raid.protocol.PolicyList;
|
||||
import org.apache.hadoop.util.JarFinder;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Test the generation of parity blocks for files with different block
|
||||
* sizes. Also test that a data block can be regenerated from a raid stripe
|
||||
* using the parity block
|
||||
*/
|
||||
public class TestRaidNode {
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
||||
"target/test-data")).getAbsolutePath();
|
||||
public static final String DistRaid_JAR = JarFinder.getJar(DistRaid.class);
|
||||
final static String CONFIG_FILE = new File(TEST_DIR,
|
||||
"test-raid.xml").getAbsolutePath();
|
||||
final static long RELOAD_INTERVAL = 1000;
|
||||
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode");
|
||||
final static Random rand = new Random();
|
||||
|
||||
Configuration conf;
|
||||
String namenode = null;
|
||||
String hftp = null;
|
||||
MiniDFSCluster dfs = null;
|
||||
MiniMRCluster mr = null;
|
||||
FileSystem fileSys = null;
|
||||
String jobTrackerName = null;
|
||||
|
||||
/**
|
||||
* create mapreduce and dfs clusters
|
||||
*/
|
||||
private void createClusters(boolean local) throws Exception {
|
||||
|
||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
||||
conf = new Configuration();
|
||||
conf.set("raid.config.file", CONFIG_FILE);
|
||||
conf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
conf.setBoolean("raid.config.reload", true);
|
||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
||||
conf.setBoolean("dfs.permissions.enabled", true);
|
||||
conf.setLong(JobMonitor.JOBMONITOR_INTERVAL_KEY, 20000);
|
||||
conf.setLong(RaidNode.TRIGGER_MONITOR_SLEEP_TIME_KEY, 3000L);
|
||||
|
||||
// scan all policies once every 5 second
|
||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
||||
|
||||
// make all deletions not go through Trash
|
||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
||||
|
||||
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
|
||||
if (local) {
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
||||
} else {
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
|
||||
}
|
||||
|
||||
conf.set("raid.server.address", "localhost:0");
|
||||
|
||||
// create a dfs and map-reduce cluster
|
||||
MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
|
||||
builder.numDataNodes(6);
|
||||
builder.format(true);
|
||||
dfs = builder.build();
|
||||
dfs.waitActive();
|
||||
fileSys = dfs.getFileSystem();
|
||||
|
||||
namenode = fileSys.getUri().toString();
|
||||
final int taskTrackers = 4;
|
||||
mr = new MiniMRCluster(taskTrackers, namenode, 3);
|
||||
JobConf jobConf = mr.createJobConf();
|
||||
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
|
||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
||||
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
conf.set("mapred.job.tracker", jobTrackerName);
|
||||
conf.set("mapreduce.framework.name", "yarn");
|
||||
String rmAdress = jobConf.get("yarn.resourcemanager.address");
|
||||
if (rmAdress != null) {
|
||||
conf.set("yarn.resourcemanager.address", rmAdress);
|
||||
}
|
||||
String schedulerAdress =
|
||||
jobConf.get("yarn.resourcemanager.scheduler.address");
|
||||
if (schedulerAdress != null) {
|
||||
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
|
||||
}
|
||||
String jobHistoryAddress =
|
||||
jobConf.get("mapreduce.jobhistory.address");
|
||||
if (jobHistoryAddress != null) {
|
||||
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
|
||||
}
|
||||
}
|
||||
|
||||
class ConfigBuilder {
|
||||
private List<String> policies;
|
||||
|
||||
public ConfigBuilder() {
|
||||
policies = new java.util.ArrayList<String>();
|
||||
}
|
||||
|
||||
public void addPolicy(String name, String path, String parent) {
|
||||
String str =
|
||||
"<srcPath prefix=\"" + path + "\"> " +
|
||||
"<policy name = \"" + name + "\"> " +
|
||||
"<parentPolicy>" + parent + "</parentPolicy>" +
|
||||
"</policy>" +
|
||||
"</srcPath>";
|
||||
policies.add(str);
|
||||
}
|
||||
|
||||
public void addPolicy(String name, short srcReplication,
|
||||
long targetReplication, long metaReplication, long stripeLength) {
|
||||
String str =
|
||||
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
|
||||
"<policy name = \"" + name + "\"> " +
|
||||
"<erasureCode>xor</erasureCode> " +
|
||||
"<property> " +
|
||||
"<name>srcReplication</name> " +
|
||||
"<value>" + srcReplication + "</value> " +
|
||||
"<description> pick only files whole replFactor is greater than or equal to " +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>targetReplication</name> " +
|
||||
"<value>" + targetReplication + "</value> " +
|
||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>metaReplication</name> " +
|
||||
"<value>" + metaReplication + "</value> " +
|
||||
"<description> replication factor of parity file" +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>stripeLength</name> " +
|
||||
"<value>" + stripeLength + "</value> " +
|
||||
"<description> the max number of blocks in a file to RAID together " +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>modTimePeriod</name> " +
|
||||
"<value>2000</value> " +
|
||||
"<description> time (milliseconds) after a file is modified to make it " +
|
||||
"a candidate for RAIDing " +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"</policy>" +
|
||||
"</srcPath>";
|
||||
policies.add(str);
|
||||
}
|
||||
|
||||
public void addPolicy(String name, String path, short srcReplication,
|
||||
long targetReplication, long metaReplication, long stripeLength) {
|
||||
String str =
|
||||
"<srcPath prefix=\"" + path + "\"> " +
|
||||
"<policy name = \"" + name + "\"> " +
|
||||
"<erasureCode>xor</erasureCode> " +
|
||||
"<property> " +
|
||||
"<name>srcReplication</name> " +
|
||||
"<value>" + srcReplication + "</value> " +
|
||||
"<description> pick only files whole replFactor is greater than or equal to " +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>targetReplication</name> " +
|
||||
"<value>" + targetReplication + "</value> " +
|
||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>metaReplication</name> " +
|
||||
"<value>" + metaReplication + "</value> " +
|
||||
"<description> replication factor of parity file" +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>stripeLength</name> " +
|
||||
"<value>" + stripeLength + "</value> " +
|
||||
"<description> the max number of blocks in a file to RAID together " +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>modTimePeriod</name> " +
|
||||
"<value>2000</value> " +
|
||||
"<description> time (milliseconds) after a file is modified to make it " +
|
||||
"a candidate for RAIDing " +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"</policy>" +
|
||||
"</srcPath>";
|
||||
policies.add(str);
|
||||
}
|
||||
|
||||
public void persist() throws IOException {
|
||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
||||
fileWriter.write("<configuration>");
|
||||
for (String policy: policies) {
|
||||
fileWriter.write(policy);
|
||||
}
|
||||
fileWriter.write("</configuration>");
|
||||
fileWriter.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* stop clusters created earlier
|
||||
*/
|
||||
private void stopClusters() throws Exception {
|
||||
if (mr != null) { mr.shutdown(); }
|
||||
if (dfs != null) { dfs.shutdown(); }
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to run a filter
|
||||
*/
|
||||
@Test
|
||||
public void testPathFilter() throws Exception {
|
||||
LOG.info("Test testPathFilter started.");
|
||||
|
||||
long blockSizes [] = {1024L};
|
||||
int stripeLengths [] = {5, 6, 10, 11, 12};
|
||||
int targetReplication = 1;
|
||||
int metaReplication = 1;
|
||||
int numBlock = 11;
|
||||
int iter = 0;
|
||||
|
||||
createClusters(true);
|
||||
try {
|
||||
for (long blockSize : blockSizes) {
|
||||
for (long stripeLength : stripeLengths) {
|
||||
doTestPathFilter(iter, targetReplication, metaReplication,
|
||||
stripeLength, blockSize, numBlock);
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
doCheckPolicy();
|
||||
} finally {
|
||||
stopClusters();
|
||||
}
|
||||
LOG.info("Test testPathFilter completed.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to run a filter
|
||||
*/
|
||||
private void doTestPathFilter(int iter, long targetReplication,
|
||||
long metaReplication, long stripeLength,
|
||||
long blockSize, int numBlock) throws Exception {
|
||||
LOG.info("doTestPathFilter started---------------------------:" + " iter " + iter +
|
||||
" blockSize=" + blockSize + " stripeLength=" + stripeLength);
|
||||
ConfigBuilder cb = new ConfigBuilder();
|
||||
cb.addPolicy("policy1", "/user/dhruba/raidtest", (short)1, targetReplication, metaReplication, stripeLength);
|
||||
cb.persist();
|
||||
|
||||
RaidShell shell = null;
|
||||
Path dir = new Path("/user/dhruba/raidtest/");
|
||||
Path file1 = new Path(dir + "/file" + iter);
|
||||
RaidNode cnode = null;
|
||||
try {
|
||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
||||
fileSys.delete(dir, true);
|
||||
fileSys.delete(destPath, true);
|
||||
long crc1 = createOldFile(fileSys, file1, 1, numBlock, blockSize);
|
||||
LOG.info("doTestPathFilter created test files for iteration " + iter);
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
FileStatus[] listPaths = null;
|
||||
|
||||
// wait till file is raided
|
||||
while (true) {
|
||||
try {
|
||||
listPaths = fileSys.listStatus(destPath);
|
||||
int count = 0;
|
||||
if (listPaths != null && listPaths.length == 1) {
|
||||
for (FileStatus s : listPaths) {
|
||||
LOG.info("doTestPathFilter found path " + s.getPath());
|
||||
if (!s.getPath().toString().endsWith(".tmp") &&
|
||||
fileSys.getFileStatus(file1).getReplication() ==
|
||||
targetReplication) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (count > 0) {
|
||||
break;
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
//ignore
|
||||
}
|
||||
LOG.info("doTestPathFilter waiting for files to be raided. Found " +
|
||||
(listPaths == null ? "none" : listPaths.length));
|
||||
Thread.sleep(1000); // keep waiting
|
||||
}
|
||||
// assertEquals(listPaths.length, 1); // all files raided
|
||||
LOG.info("doTestPathFilter all files found in Raid.");
|
||||
|
||||
// check for error at beginning of file
|
||||
shell = new RaidShell(conf);
|
||||
shell.initializeRpc(conf, cnode.getListenerAddress());
|
||||
if (numBlock >= 1) {
|
||||
LOG.info("doTestPathFilter Check error at beginning of file.");
|
||||
simulateError(shell, fileSys, file1, crc1, 0);
|
||||
}
|
||||
|
||||
// check for error at the beginning of second block
|
||||
if (numBlock >= 2) {
|
||||
LOG.info("doTestPathFilter Check error at beginning of second block.");
|
||||
simulateError(shell, fileSys, file1, crc1, blockSize + 1);
|
||||
}
|
||||
|
||||
// check for error at the middle of third block
|
||||
if (numBlock >= 3) {
|
||||
LOG.info("doTestPathFilter Check error at middle of third block.");
|
||||
simulateError(shell, fileSys, file1, crc1, 2 * blockSize + 10);
|
||||
}
|
||||
|
||||
// check for error at the middle of second stripe
|
||||
if (numBlock >= stripeLength + 1) {
|
||||
LOG.info("doTestPathFilter Check error at middle of second stripe.");
|
||||
simulateError(shell, fileSys, file1, crc1,
|
||||
stripeLength * blockSize + 100);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.info("doTestPathFilter Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
if (shell != null) shell.close();
|
||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
||||
LOG.info("doTestPathFilter delete file " + file1);
|
||||
fileSys.delete(file1, true);
|
||||
}
|
||||
LOG.info("doTestPathFilter completed:" + " blockSize=" + blockSize +
|
||||
" stripeLength=" + stripeLength);
|
||||
}
|
||||
|
||||
// Check that raid occurs only on files that have a replication factor
|
||||
// greater than or equal to the specified value
|
||||
private void doCheckPolicy() throws Exception {
|
||||
LOG.info("doCheckPolicy started---------------------------:");
|
||||
short srcReplication = 1;
|
||||
long targetReplication = 2;
|
||||
long metaReplication = 1;
|
||||
long stripeLength = 2;
|
||||
long blockSize = 1024;
|
||||
int numBlock = 3;
|
||||
ConfigBuilder cb = new ConfigBuilder();
|
||||
cb.addPolicy("policy1", "/user/dhruba/policytest", srcReplication,
|
||||
targetReplication, metaReplication, stripeLength);
|
||||
cb.persist();
|
||||
Path dir = new Path("/user/dhruba/policytest/");
|
||||
Path file1 = new Path(dir + "/file1");
|
||||
Path file2 = new Path(dir + "/file2");
|
||||
RaidNode cnode = null;
|
||||
try {
|
||||
Path destPath = new Path("/destraid/user/dhruba/policytest");
|
||||
fileSys.delete(dir, true);
|
||||
fileSys.delete(destPath, true);
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
|
||||
// this file should be picked up RaidNode
|
||||
createOldFile(fileSys, file2, 2, numBlock, blockSize);
|
||||
FileStatus[] listPaths = null;
|
||||
|
||||
long firstmodtime = 0;
|
||||
// wait till file is raided
|
||||
while (true) {
|
||||
Thread.sleep(1000); // waiting
|
||||
try {
|
||||
listPaths = fileSys.listStatus(destPath);
|
||||
} catch (FileNotFoundException e) {
|
||||
LOG.warn("File not found " + destPath);
|
||||
// The directory have been deleted by the purge thread.
|
||||
continue;
|
||||
}
|
||||
int count = 0;
|
||||
if (listPaths != null && listPaths.length == 1) {
|
||||
for (FileStatus s : listPaths) {
|
||||
LOG.info("doCheckPolicy found path " + s.getPath());
|
||||
if (!s.getPath().toString().endsWith(".tmp") &&
|
||||
fileSys.getFileStatus(file2).getReplication() ==
|
||||
targetReplication) {
|
||||
count++;
|
||||
firstmodtime = s.getModificationTime();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (count > 0) {
|
||||
break;
|
||||
}
|
||||
LOG.info("doCheckPolicy waiting for files to be raided. Found " +
|
||||
(listPaths == null ? "none" : listPaths.length));
|
||||
}
|
||||
assertEquals(listPaths.length, 1);
|
||||
|
||||
LOG.info("doCheckPolicy all files found in Raid the first time.");
|
||||
|
||||
LOG.info("doCheckPolicy: recreating source file");
|
||||
createOldFile(fileSys, file2, 2, numBlock, blockSize);
|
||||
|
||||
FileStatus st = fileSys.getFileStatus(file2);
|
||||
assertTrue(st.getModificationTime() > firstmodtime);
|
||||
|
||||
// wait till file is raided
|
||||
while (true) {
|
||||
Thread.sleep(20000L); // waiting
|
||||
listPaths = fileSys.listStatus(destPath);
|
||||
int count = 0;
|
||||
if (listPaths != null && listPaths.length == 1) {
|
||||
for (FileStatus s : listPaths) {
|
||||
LOG.info("doCheckPolicy found path " + s.getPath() + " " + s.getModificationTime());
|
||||
if (!s.getPath().toString().endsWith(".tmp") &&
|
||||
s.getModificationTime() > firstmodtime &&
|
||||
fileSys.getFileStatus(file2).getReplication() ==
|
||||
targetReplication) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (count > 0) {
|
||||
break;
|
||||
}
|
||||
LOG.info("doCheckPolicy waiting for files to be raided. Found " +
|
||||
(listPaths == null ? "none" : listPaths.length));
|
||||
}
|
||||
assertEquals(listPaths.length, 1);
|
||||
|
||||
LOG.info("doCheckPolicy: file got re-raided as expected.");
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.info("doCheckPolicy Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
||||
LOG.info("doTestPathFilter delete file " + file1);
|
||||
fileSys.delete(file1, false);
|
||||
}
|
||||
LOG.info("doCheckPolicy completed:");
|
||||
}
|
||||
|
||||
static public void createTestFiles(FileSystem fileSys,
|
||||
String path, String destpath, int nfile,
|
||||
int nblock) throws IOException {
|
||||
createTestFiles(fileSys, path, destpath, nfile, nblock, (short)1);
|
||||
}
|
||||
|
||||
static void createTestFiles(FileSystem fileSys, String path, String destpath, int nfile,
|
||||
int nblock, short repl) throws IOException {
|
||||
long blockSize = 1024L;
|
||||
Path dir = new Path(path);
|
||||
Path destPath = new Path(destpath);
|
||||
fileSys.delete(dir, true);
|
||||
fileSys.delete(destPath, true);
|
||||
|
||||
for(int i = 0 ; i < nfile; i++){
|
||||
Path file = new Path(path + "file" + i);
|
||||
createOldFile(fileSys, file, repl, nblock, blockSize);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test dist Raid
|
||||
*/
|
||||
@Test
|
||||
public void testDistRaid() throws Exception {
|
||||
LOG.info("Test testDistRaid started.");
|
||||
long targetReplication = 2;
|
||||
long metaReplication = 2;
|
||||
long stripeLength = 3;
|
||||
short srcReplication = 1;
|
||||
|
||||
createClusters(false);
|
||||
ConfigBuilder cb = new ConfigBuilder();
|
||||
cb.addPolicy("policy1", "/user/dhruba/raidtest",
|
||||
srcReplication, targetReplication, metaReplication, stripeLength);
|
||||
cb.addPolicy("policy2", "/user/dhruba/raidtest2",
|
||||
srcReplication, targetReplication, metaReplication, stripeLength);
|
||||
cb.persist();
|
||||
|
||||
RaidNode cnode = null;
|
||||
try {
|
||||
createTestFiles(fileSys, "/user/dhruba/raidtest/",
|
||||
"/destraid/user/dhruba/raidtest", 5, 7);
|
||||
createTestFiles(fileSys, "/user/dhruba/raidtest2/",
|
||||
"/destraid/user/dhruba/raidtest2", 5, 7);
|
||||
LOG.info("Test testDistRaid created test files");
|
||||
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
localConf.set(JobContext.JAR, TestRaidNode.DistRaid_JAR);
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
// Verify the policies are parsed correctly
|
||||
for (PolicyList policyList : cnode.getAllPolicies()) {
|
||||
for (PolicyInfo p : policyList.getAll()) {
|
||||
if (p.getName().equals("policy1")) {
|
||||
Path srcPath = new Path("/user/dhruba/raidtest");
|
||||
FileSystem fs = srcPath.getFileSystem(conf);
|
||||
assertTrue(p.getSrcPath().equals(
|
||||
srcPath.makeQualified(fs.getUri(), fs.getWorkingDirectory())));
|
||||
} else {
|
||||
assertTrue(p.getName().equals("policy2"));
|
||||
Path srcPath = new Path("/user/dhruba/raidtest2");
|
||||
FileSystem fs = srcPath.getFileSystem(conf);
|
||||
assertTrue(p.getSrcPath().equals(
|
||||
srcPath.makeQualified(fs.getUri(), fs.getWorkingDirectory())));
|
||||
}
|
||||
assertEquals(targetReplication,
|
||||
Integer.parseInt(p.getProperty("targetReplication")));
|
||||
assertEquals(metaReplication,
|
||||
Integer.parseInt(p.getProperty("metaReplication")));
|
||||
assertEquals(stripeLength,
|
||||
Integer.parseInt(p.getProperty("stripeLength")));
|
||||
}
|
||||
}
|
||||
|
||||
long start = Time.now();
|
||||
final int MAX_WAITTIME = 300000;
|
||||
|
||||
assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode);
|
||||
DistRaidNode dcnode = (DistRaidNode) cnode;
|
||||
|
||||
while (dcnode.jobMonitor.jobsMonitored() < 2 &&
|
||||
Time.now() - start < MAX_WAITTIME) {
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
start = Time.now();
|
||||
while (dcnode.jobMonitor.jobsSucceeded() < 2 &&
|
||||
Time.now() - start < MAX_WAITTIME) {
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
assertEquals(dcnode.jobMonitor.jobsSucceeded(), dcnode.jobMonitor.jobsMonitored());
|
||||
LOG.info("Test testDistRaid successful.");
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.info("testDistRaid Exception " + e + StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
||||
stopClusters();
|
||||
}
|
||||
LOG.info("Test testDistRaid completed.");
|
||||
}
|
||||
|
||||
//
|
||||
// simulate a corruption at specified offset and verify that eveyrthing is good
|
||||
//
|
||||
void simulateError(RaidShell shell, FileSystem fileSys, Path file1,
|
||||
long crc, long corruptOffset) throws IOException {
|
||||
// recover the file assuming that we encountered a corruption at offset 0
|
||||
String[] args = new String[3];
|
||||
args[0] = "-recover";
|
||||
args[1] = file1.toString();
|
||||
args[2] = Long.toString(corruptOffset);
|
||||
Path recover1 = shell.recover(args[0], args, 1)[0];
|
||||
|
||||
// compare that the recovered file is identical to the original one
|
||||
LOG.info("Comparing file " + file1 + " with recovered file " + recover1);
|
||||
validateFile(fileSys, file1, recover1, crc);
|
||||
fileSys.delete(recover1, false);
|
||||
}
|
||||
|
||||
//
|
||||
// creates a file and populate it with random data. Returns its crc.
|
||||
//
|
||||
static long createOldFile(FileSystem fileSys, Path name, int repl, int numBlocks, long blocksize)
|
||||
throws IOException {
|
||||
CRC32 crc = new CRC32();
|
||||
FSDataOutputStream stm = fileSys.create(name, true,
|
||||
fileSys.getConf().getInt("io.file.buffer.size", 4096),
|
||||
(short)repl, blocksize);
|
||||
// fill random data into file
|
||||
byte[] b = new byte[(int)blocksize];
|
||||
for (int i = 0; i < numBlocks; i++) {
|
||||
if (i == (numBlocks-1)) {
|
||||
b = new byte[(int)blocksize/2];
|
||||
}
|
||||
rand.nextBytes(b);
|
||||
stm.write(b);
|
||||
crc.update(b);
|
||||
}
|
||||
|
||||
stm.close();
|
||||
return crc.getValue();
|
||||
}
|
||||
|
||||
//
|
||||
// validates that file matches the crc.
|
||||
//
|
||||
private void validateFile(FileSystem fileSys, Path name1, Path name2, long crc)
|
||||
throws IOException {
|
||||
|
||||
FileStatus stat1 = fileSys.getFileStatus(name1);
|
||||
FileStatus stat2 = fileSys.getFileStatus(name2);
|
||||
assertTrue(" Length of file " + name1 + " is " + stat1.getLen() +
|
||||
" is different from length of file " + name1 + " " + stat2.getLen(),
|
||||
stat1.getLen() == stat2.getLen());
|
||||
|
||||
CRC32 newcrc = new CRC32();
|
||||
FSDataInputStream stm = fileSys.open(name2);
|
||||
final byte[] b = new byte[4192];
|
||||
int num = 0;
|
||||
while (num >= 0) {
|
||||
num = stm.read(b);
|
||||
if (num < 0) {
|
||||
break;
|
||||
}
|
||||
newcrc.update(b, 0, num);
|
||||
}
|
||||
stm.close();
|
||||
if (newcrc.getValue() != crc) {
|
||||
fail("CRC mismatch of files " + name1 + " with file " + name2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSuspendTraversal() throws Exception {
|
||||
LOG.info("Test testSuspendTraversal started.");
|
||||
long targetReplication = 2;
|
||||
long metaReplication = 2;
|
||||
long stripeLength = 3;
|
||||
short srcReplication = 1;
|
||||
|
||||
createClusters(false);
|
||||
ConfigBuilder cb = new ConfigBuilder();
|
||||
cb.addPolicy("policy1", "/user/dhruba/raidtest",
|
||||
srcReplication, targetReplication, metaReplication, stripeLength);
|
||||
cb.persist();
|
||||
|
||||
RaidNode cnode = null;
|
||||
try {
|
||||
for(int i = 0; i < 4; i++){
|
||||
Path file = new Path("/user/dhruba/raidtest/dir" + i + "/file" + i);
|
||||
createOldFile(fileSys, file, 1, 7, 1024L);
|
||||
}
|
||||
|
||||
LOG.info("Test testSuspendTraversal created test files");
|
||||
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.setInt("raid.distraid.max.jobs", 2);
|
||||
localConf.setInt("raid.distraid.max.files", 2);
|
||||
localConf.setInt("raid.directorytraversal.threads", 1);
|
||||
localConf.set(JobContext.JAR, TestRaidNode.DistRaid_JAR);
|
||||
// 4 test files: 2 jobs with 2 files each.
|
||||
final int numJobsExpected = 2;
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
|
||||
long start = Time.now();
|
||||
final int MAX_WAITTIME = 300000;
|
||||
|
||||
assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode);
|
||||
DistRaidNode dcnode = (DistRaidNode) cnode;
|
||||
|
||||
start = Time.now();
|
||||
while (dcnode.jobMonitor.jobsSucceeded() < numJobsExpected &&
|
||||
Time.now() - start < MAX_WAITTIME) {
|
||||
LOG.info("Waiting for num jobs succeeded " + dcnode.jobMonitor.jobsSucceeded() +
|
||||
" to reach " + numJobsExpected);
|
||||
Thread.sleep(3000);
|
||||
}
|
||||
// Wait for any running jobs to finish.
|
||||
start = Time.now();
|
||||
while (dcnode.jobMonitor.runningJobsCount() > 0 &&
|
||||
Time.now() - start < MAX_WAITTIME) {
|
||||
LOG.info("Waiting for zero running jobs: " +
|
||||
dcnode.jobMonitor.runningJobsCount());
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
assertEquals(numJobsExpected, dcnode.jobMonitor.jobsMonitored());
|
||||
assertEquals(numJobsExpected, dcnode.jobMonitor.jobsSucceeded());
|
||||
|
||||
LOG.info("Test testSuspendTraversal successful.");
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.info("testSuspendTraversal Exception " + e + StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
||||
stopClusters();
|
||||
}
|
||||
LOG.info("Test testSuspendTraversal completed.");
|
||||
}
|
||||
}
|
|
@ -1,521 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileWriter;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.commons.logging.impl.Log4JLogger;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.MiniMRCluster;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.log4j.Level;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* If a file gets deleted, then verify that the parity file gets deleted too.
|
||||
*/
|
||||
public class TestRaidPurge {
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
||||
"target/test-data")).getAbsolutePath();
|
||||
final static String CONFIG_FILE = new File(TEST_DIR,
|
||||
"test-raid.xml").getAbsolutePath();
|
||||
final static long RELOAD_INTERVAL = 1000;
|
||||
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode");
|
||||
final Random rand = new Random();
|
||||
|
||||
{
|
||||
((Log4JLogger)RaidNode.LOG).getLogger().setLevel(Level.ALL);
|
||||
}
|
||||
|
||||
|
||||
Configuration conf;
|
||||
String namenode = null;
|
||||
String hftp = null;
|
||||
MiniDFSCluster dfs = null;
|
||||
MiniMRCluster mr = null;
|
||||
FileSystem fileSys = null;
|
||||
String jobTrackerName = null;
|
||||
|
||||
/**
|
||||
* create mapreduce and dfs clusters
|
||||
*/
|
||||
private void createClusters(boolean local) throws Exception {
|
||||
|
||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
||||
conf = new Configuration();
|
||||
conf.set("raid.config.file", CONFIG_FILE);
|
||||
conf.setBoolean("raid.config.reload", true);
|
||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
||||
|
||||
// scan all policies once every 5 second
|
||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
||||
|
||||
// make all deletions not go through Trash
|
||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.dfs.DFSClient");
|
||||
|
||||
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
|
||||
if (local) {
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
||||
} else {
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
|
||||
}
|
||||
|
||||
conf.set("raid.server.address", "localhost:0");
|
||||
|
||||
// create a dfs and map-reduce cluster
|
||||
final int taskTrackers = 4;
|
||||
final int jobTrackerPort = 60050;
|
||||
|
||||
dfs = new MiniDFSCluster(conf, 3, true, null);
|
||||
dfs.waitActive();
|
||||
fileSys = dfs.getFileSystem();
|
||||
namenode = fileSys.getUri().toString();
|
||||
mr = new MiniMRCluster(taskTrackers, namenode, 3);
|
||||
JobConf jobConf = mr.createJobConf();
|
||||
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
|
||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
||||
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
conf.set("mapred.job.tracker", jobTrackerName);
|
||||
conf.set("mapreduce.framework.name", "yarn");
|
||||
String rmAdress = jobConf.get("yarn.resourcemanager.address");
|
||||
if (rmAdress != null) {
|
||||
conf.set("yarn.resourcemanager.address", rmAdress);
|
||||
}
|
||||
String schedulerAdress =
|
||||
jobConf.get("yarn.resourcemanager.scheduler.address");
|
||||
if (schedulerAdress != null) {
|
||||
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
|
||||
}
|
||||
String jobHistoryAddress =
|
||||
jobConf.get("mapreduce.jobhistory.address");
|
||||
if (jobHistoryAddress != null) {
|
||||
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create raid.xml file for RaidNode
|
||||
*/
|
||||
private void mySetup(long targetReplication,
|
||||
long metaReplication, long stripeLength) throws Exception {
|
||||
int harDelay = 1; // 1 day.
|
||||
mySetup(targetReplication, metaReplication, stripeLength, harDelay);
|
||||
}
|
||||
|
||||
private void mySetup(long targetReplication,
|
||||
long metaReplication, long stripeLength, int harDelay) throws Exception {
|
||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
||||
String str = "<configuration> " +
|
||||
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
|
||||
"<policy name = \"RaidTest1\"> " +
|
||||
"<erasureCode>xor</erasureCode> " +
|
||||
"<destPath> /destraid</destPath> " +
|
||||
"<property> " +
|
||||
"<name>targetReplication</name> " +
|
||||
"<value>" + targetReplication + "</value> " +
|
||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>metaReplication</name> " +
|
||||
"<value>" + metaReplication + "</value> " +
|
||||
"<description> replication factor of parity file" +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>stripeLength</name> " +
|
||||
"<value>" + stripeLength + "</value> " +
|
||||
"<description> the max number of blocks in a file to RAID together " +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>modTimePeriod</name> " +
|
||||
"<value>2000</value> " +
|
||||
"<description> time (milliseconds) after a file is modified to make it " +
|
||||
"a candidate for RAIDing " +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>time_before_har</name> " +
|
||||
"<value> " + harDelay + "</value> " +
|
||||
"<description> amount of time waited before har'ing parity files" +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"</policy>" +
|
||||
"</srcPath>" +
|
||||
"</configuration>";
|
||||
fileWriter.write(str);
|
||||
fileWriter.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* stop clusters created earlier
|
||||
*/
|
||||
private void stopClusters() throws Exception {
|
||||
if (mr != null) { mr.shutdown(); }
|
||||
if (dfs != null) { dfs.shutdown(); }
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that parity files that do not have an associated master file
|
||||
* get deleted.
|
||||
*/
|
||||
@Test
|
||||
public void testPurge() throws Exception {
|
||||
LOG.info("Test testPurge started.");
|
||||
|
||||
long blockSizes [] = {1024L};
|
||||
long stripeLengths [] = {5};
|
||||
long targetReplication = 1;
|
||||
long metaReplication = 1;
|
||||
int numBlock = 9;
|
||||
int iter = 0;
|
||||
|
||||
createClusters(true);
|
||||
try {
|
||||
for (long blockSize : blockSizes) {
|
||||
for (long stripeLength : stripeLengths) {
|
||||
doTestPurge(iter, targetReplication, metaReplication,
|
||||
stripeLength, blockSize, numBlock);
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
stopClusters();
|
||||
}
|
||||
LOG.info("Test testPurge completed.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Create parity file, delete original file and then validate that
|
||||
* parity file is automatically deleted.
|
||||
*/
|
||||
private void doTestPurge(int iter, long targetReplication,
|
||||
long metaReplication, long stripeLength,
|
||||
long blockSize, int numBlock) throws Exception {
|
||||
LOG.info("doTestPurge started---------------------------:" + " iter " + iter +
|
||||
" blockSize=" + blockSize + " stripeLength=" + stripeLength);
|
||||
mySetup(targetReplication, metaReplication, stripeLength);
|
||||
Path dir = new Path("/user/dhruba/raidtest/");
|
||||
Path file1 = new Path(dir + "/file" + iter);
|
||||
RaidNode cnode = null;
|
||||
try {
|
||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
||||
fileSys.delete(dir, true);
|
||||
fileSys.delete(destPath, true);
|
||||
TestRaidNode.createOldFile(fileSys, file1, 1, numBlock, blockSize);
|
||||
LOG.info("doTestPurge created test files for iteration " + iter);
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
FileStatus[] listPaths = null;
|
||||
|
||||
// wait till file is raided
|
||||
while (true) {
|
||||
try {
|
||||
listPaths = fileSys.listStatus(destPath);
|
||||
int count = 0;
|
||||
if (listPaths != null && listPaths.length == 1) {
|
||||
for (FileStatus s : listPaths) {
|
||||
LOG.info("doTestPurge found path " + s.getPath());
|
||||
if (!s.getPath().toString().endsWith(".tmp")) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (count > 0) {
|
||||
break;
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
//ignore
|
||||
}
|
||||
LOG.info("doTestPurge waiting for files to be raided. Found " +
|
||||
(listPaths == null ? "none" : listPaths.length));
|
||||
Thread.sleep(1000); // keep waiting
|
||||
}
|
||||
// assertEquals(listPaths.length, 1); // all files raided
|
||||
LOG.info("doTestPurge all files found in Raid.");
|
||||
|
||||
// delete original file
|
||||
assertTrue("Unable to delete original file " + file1 ,
|
||||
fileSys.delete(file1, true));
|
||||
LOG.info("deleted file " + file1);
|
||||
|
||||
// wait till parity file and directory are automatically deleted
|
||||
while (fileSys.exists(destPath)) {
|
||||
LOG.info("doTestPurge waiting for parity files to be removed.");
|
||||
Thread.sleep(1000); // keep waiting
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.info("doTestPurge Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
||||
LOG.info("doTestPurge delete file " + file1);
|
||||
fileSys.delete(file1, true);
|
||||
}
|
||||
LOG.info("doTestPurge completed:" + " blockSize=" + blockSize +
|
||||
" stripeLength=" + stripeLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a file, wait for parity file to get HARed. Then modify the file,
|
||||
* wait for the HAR to get purged.
|
||||
*/
|
||||
@Test
|
||||
public void testPurgeHar() throws Exception {
|
||||
LOG.info("testPurgeHar started");
|
||||
int harDelay = 0;
|
||||
createClusters(true);
|
||||
mySetup(1, 1, 5, harDelay);
|
||||
Path dir = new Path("/user/dhruba/raidtest/");
|
||||
Path destPath = new Path("/raid/user/dhruba/raidtest");
|
||||
Path file1 = new Path(dir + "/file");
|
||||
RaidNode cnode = null;
|
||||
try {
|
||||
TestRaidNode.createOldFile(fileSys, file1, 1, 8, 8192L);
|
||||
LOG.info("testPurgeHar created test files");
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
|
||||
// Wait till har is created.
|
||||
while (true) {
|
||||
try {
|
||||
FileStatus[] listPaths = listPaths = fileSys.listStatus(destPath);
|
||||
if (listPaths != null && listPaths.length == 1) {
|
||||
FileStatus s = listPaths[0];
|
||||
LOG.info("testPurgeHar found path " + s.getPath());
|
||||
if (s.getPath().toString().endsWith(".har")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
//ignore
|
||||
}
|
||||
Thread.sleep(1000); // keep waiting
|
||||
}
|
||||
|
||||
// Set an old timestamp.
|
||||
fileSys.setTimes(file1, 0, 0);
|
||||
|
||||
boolean found = false;
|
||||
FileStatus[] listPaths = null;
|
||||
while (!found || listPaths == null || listPaths.length > 1) {
|
||||
listPaths = fileSys.listStatus(destPath);
|
||||
if (listPaths != null) {
|
||||
for (FileStatus s: listPaths) {
|
||||
LOG.info("testPurgeHar waiting for parity file to be recreated" +
|
||||
" and har to be deleted found " + s.getPath());
|
||||
if (s.getPath().toString().endsWith("file") &&
|
||||
s.getModificationTime() == 0) {
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.info("testPurgeHar Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
||||
fileSys.delete(dir, true);
|
||||
fileSys.delete(destPath, true);
|
||||
stopClusters();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create parity file, delete original file's directory and then validate that
|
||||
* parity directory is automatically deleted.
|
||||
*/
|
||||
@Test
|
||||
public void testPurgeDirectory() throws Exception {
|
||||
long stripeLength = 5;
|
||||
long blockSize = 8192;
|
||||
long targetReplication = 1;
|
||||
long metaReplication = 1;
|
||||
int numBlock = 9;
|
||||
|
||||
createClusters(true);
|
||||
mySetup(targetReplication, metaReplication, stripeLength);
|
||||
Path dir = new Path("/user/dhruba/raidtest/");
|
||||
Path file1 = new Path(dir + "/file1");
|
||||
RaidNode cnode = null;
|
||||
try {
|
||||
TestRaidNode.createOldFile(fileSys, file1, 1, numBlock, blockSize);
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
|
||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
||||
|
||||
// delete original directory.
|
||||
assertTrue("Unable to delete original directory " + file1 ,
|
||||
fileSys.delete(file1.getParent(), true));
|
||||
LOG.info("deleted file " + file1);
|
||||
|
||||
// wait till parity file and directory are automatically deleted
|
||||
long start = Time.now();
|
||||
while (fileSys.exists(destPath) &&
|
||||
Time.now() - start < 120000) {
|
||||
LOG.info("testPurgeDirectory waiting for parity files to be removed.");
|
||||
Thread.sleep(1000); // keep waiting
|
||||
}
|
||||
assertFalse(fileSys.exists(destPath));
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.info("testPurgeDirectory Exception " + e +
|
||||
StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
||||
LOG.info("testPurgeDirectory delete file " + file1);
|
||||
fileSys.delete(file1, true);
|
||||
stopClusters();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that an XOR parity file is removed when a RS parity file is detected.
|
||||
*/
|
||||
@Test
|
||||
public void testPurgePreference() throws Exception {
|
||||
createClusters(true);
|
||||
Path dir = new Path("/user/test/raidtest/");
|
||||
Path file1 = new Path(dir + "/file1");
|
||||
|
||||
PolicyInfo infoXor = new PolicyInfo("testPurgePreference", conf);
|
||||
infoXor.setSrcPath("/user/test/raidtest");
|
||||
infoXor.setErasureCode("xor");
|
||||
infoXor.setDescription("test policy");
|
||||
infoXor.setProperty("targetReplication", "2");
|
||||
infoXor.setProperty("metaReplication", "2");
|
||||
|
||||
PolicyInfo infoRs = new PolicyInfo("testPurgePreference", conf);
|
||||
infoRs.setSrcPath("/user/test/raidtest");
|
||||
infoRs.setErasureCode("rs");
|
||||
infoRs.setDescription("test policy");
|
||||
infoRs.setProperty("targetReplication", "1");
|
||||
infoRs.setProperty("metaReplication", "1");
|
||||
try {
|
||||
TestRaidNode.createOldFile(fileSys, file1, 1, 9, 8192L);
|
||||
FileStatus stat = fileSys.getFileStatus(file1);
|
||||
|
||||
// Create the parity files.
|
||||
RaidNode.doRaid(
|
||||
conf, infoXor, stat, new RaidNode.Statistics(), Reporter.NULL);
|
||||
RaidNode.doRaid(
|
||||
conf, infoRs, stat, new RaidNode.Statistics(), Reporter.NULL);
|
||||
Path xorParity =
|
||||
new Path(RaidNode.DEFAULT_RAID_LOCATION, "user/test/raidtest/file1");
|
||||
Path rsParity =
|
||||
new Path(RaidNode.DEFAULT_RAIDRS_LOCATION, "user/test/raidtest/file1");
|
||||
assertTrue(fileSys.exists(xorParity));
|
||||
assertTrue(fileSys.exists(rsParity));
|
||||
|
||||
// Check purge of a single parity file.
|
||||
RaidNode cnode = RaidNode.createRaidNode(conf);
|
||||
FileStatus raidRsStat =
|
||||
fileSys.getFileStatus(new Path(RaidNode.DEFAULT_RAIDRS_LOCATION));
|
||||
cnode.purgeMonitor.recursePurge(infoRs.getErasureCode(), fileSys, fileSys,
|
||||
RaidNode.DEFAULT_RAIDRS_LOCATION, raidRsStat);
|
||||
|
||||
// Calling purge under the RS path has no effect.
|
||||
assertTrue(fileSys.exists(xorParity));
|
||||
assertTrue(fileSys.exists(rsParity));
|
||||
|
||||
FileStatus raidStat =
|
||||
fileSys.getFileStatus(new Path(RaidNode.DEFAULT_RAID_LOCATION));
|
||||
cnode.purgeMonitor.recursePurge(infoXor.getErasureCode(), fileSys, fileSys,
|
||||
RaidNode.DEFAULT_RAID_LOCATION, raidStat);
|
||||
// XOR parity must have been purged by now.
|
||||
assertFalse(fileSys.exists(xorParity));
|
||||
assertTrue(fileSys.exists(rsParity));
|
||||
|
||||
// Now check the purge of a parity har.
|
||||
// Delete the RS parity for now.
|
||||
fileSys.delete(rsParity);
|
||||
// Recreate the XOR parity.
|
||||
Path xorHar =
|
||||
new Path(RaidNode.DEFAULT_RAID_LOCATION, "user/test/raidtest/raidtest" +
|
||||
RaidNode.HAR_SUFFIX);
|
||||
RaidNode.doRaid(
|
||||
conf, infoXor, stat, new RaidNode.Statistics(), Reporter.NULL);
|
||||
assertTrue(fileSys.exists(xorParity));
|
||||
assertFalse(fileSys.exists(xorHar));
|
||||
|
||||
// Create the har.
|
||||
long cutoff = Time.now();
|
||||
cnode.recurseHar(infoXor, fileSys, raidStat,
|
||||
RaidNode.DEFAULT_RAID_LOCATION, fileSys, cutoff,
|
||||
RaidNode.tmpHarPathForCode(conf, infoXor.getErasureCode()));
|
||||
|
||||
// Call purge to get rid of the parity file. The har should remain.
|
||||
cnode.purgeMonitor.recursePurge(infoXor.getErasureCode(), fileSys, fileSys,
|
||||
RaidNode.DEFAULT_RAID_LOCATION, raidStat);
|
||||
// XOR har should exist but xor parity file should have been purged.
|
||||
assertFalse(fileSys.exists(xorParity));
|
||||
assertTrue(fileSys.exists(xorHar));
|
||||
|
||||
// Now create the RS parity.
|
||||
RaidNode.doRaid(
|
||||
conf, infoRs, stat, new RaidNode.Statistics(), Reporter.NULL);
|
||||
cnode.purgeMonitor.recursePurge(infoXor.getErasureCode(), fileSys, fileSys,
|
||||
RaidNode.DEFAULT_RAID_LOCATION, raidStat);
|
||||
// XOR har should get deleted.
|
||||
assertTrue(fileSys.exists(rsParity));
|
||||
assertFalse(fileSys.exists(xorParity));
|
||||
assertFalse(fileSys.exists(xorHar));
|
||||
|
||||
} finally {
|
||||
stopClusters();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,267 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
import java.util.zip.CRC32;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class TestRaidShell {
|
||||
final static Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.TestRaidShell");
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
||||
"target/test-data")).getAbsolutePath();
|
||||
final static String CONFIG_FILE = new File(TEST_DIR,
|
||||
"test-raid.xml").getAbsolutePath();
|
||||
final static long RELOAD_INTERVAL = 1000;
|
||||
final static int NUM_DATANODES = 3;
|
||||
Configuration conf;
|
||||
String namenode = null;
|
||||
MiniDFSCluster dfs = null;
|
||||
String hftp = null;
|
||||
FileSystem fileSys = null;
|
||||
RaidNode cnode = null;
|
||||
Random rand = new Random();
|
||||
|
||||
/**
|
||||
* Create a file with three stripes, corrupt a block each in two stripes,
|
||||
* and wait for the the file to be fixed.
|
||||
*/
|
||||
@Test
|
||||
public void testBlockFix() throws Exception {
|
||||
LOG.info("Test testBlockFix started.");
|
||||
long blockSize = 8192L;
|
||||
int stripeLength = 3;
|
||||
mySetup(stripeLength, -1);
|
||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
||||
Path parityFile = new Path(destPath, "file1");
|
||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
||||
1, 7, blockSize);
|
||||
long file1Len = fileSys.getFileStatus(file1).getLen();
|
||||
LOG.info("Test testBlockFix created test files");
|
||||
|
||||
// create an instance of the RaidNode
|
||||
Configuration localConf = new Configuration(conf);
|
||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
||||
localConf.setInt("raid.blockfix.interval", 1000);
|
||||
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
||||
conf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
||||
cnode = RaidNode.createRaidNode(null, localConf);
|
||||
|
||||
try {
|
||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
||||
cnode.stop();
|
||||
cnode.join();
|
||||
cnode = null;
|
||||
|
||||
FileStatus srcStat = fileSys.getFileStatus(file1);
|
||||
LocatedBlocks locations = RaidDFSUtil.getBlockLocations(
|
||||
(DistributedFileSystem) fileSys, file1.toUri().getPath(),
|
||||
0, srcStat.getLen());
|
||||
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
||||
|
||||
// Corrupt blocks in different stripes. We can fix them.
|
||||
int[] corruptBlockIdxs = new int[]{0, 4, 6};
|
||||
for (int idx: corruptBlockIdxs) {
|
||||
LOG.info("Corrupting block " + locations.get(idx).getBlock());
|
||||
corruptBlock(locations.get(idx).getBlock());
|
||||
}
|
||||
TestBlockFixer.reportCorruptBlocks(fileSys, file1, corruptBlockIdxs,
|
||||
srcStat.getBlockSize());
|
||||
|
||||
waitForCorruptBlocks(corruptBlockIdxs.length, dfs, file1);
|
||||
|
||||
// Create RaidShell and fix the file.
|
||||
RaidShell shell = new RaidShell(conf);
|
||||
String[] args = new String[2];
|
||||
args[0] = "-recoverBlocks";
|
||||
args[1] = file1.toUri().getPath();
|
||||
ToolRunner.run(shell, args);
|
||||
|
||||
waitForCorruptBlocks(0, dfs, file1);
|
||||
|
||||
assertTrue(TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
|
||||
|
||||
// Now corrupt and fix the parity file.
|
||||
FileStatus parityStat = fileSys.getFileStatus(parityFile);
|
||||
long parityCrc = getCRC(fileSys, parityFile);
|
||||
locations = RaidDFSUtil.getBlockLocations(
|
||||
dfs, parityFile.toUri().getPath(), 0, parityStat.getLen());
|
||||
corruptBlock(locations.get(0).getBlock());
|
||||
TestBlockFixer.reportCorruptBlocks(fileSys, parityFile, new int[]{0},
|
||||
srcStat.getBlockSize());
|
||||
waitForCorruptBlocks(1, dfs, parityFile);
|
||||
|
||||
args[1] = parityFile.toUri().getPath();
|
||||
ToolRunner.run(shell, args);
|
||||
|
||||
waitForCorruptBlocks(0, dfs, file1);
|
||||
assertEquals(parityCrc, getCRC(fileSys, parityFile));
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.info("Test testBlockFix Exception " + e + StringUtils.stringifyException(e));
|
||||
throw e;
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
LOG.info("Test testBlockFix completed.");
|
||||
}
|
||||
|
||||
private void waitForCorruptBlocks(
|
||||
int numCorruptBlocks, DistributedFileSystem dfs, Path file)
|
||||
throws Exception {
|
||||
String path = file.toUri().getPath();
|
||||
FileStatus stat = dfs.getFileStatus(file);
|
||||
long start = Time.now();
|
||||
long actual = 0;
|
||||
do {
|
||||
actual = RaidDFSUtil.corruptBlocksInFile(
|
||||
dfs, path, 0, stat.getLen()).size();
|
||||
if (actual == numCorruptBlocks) break;
|
||||
if (Time.now() - start > 120000) break;
|
||||
LOG.info("Waiting for " + numCorruptBlocks + " corrupt blocks in " +
|
||||
path + ", found " + actual);
|
||||
Thread.sleep(1000);
|
||||
} while (true);
|
||||
assertEquals(numCorruptBlocks, actual);
|
||||
}
|
||||
|
||||
private void mySetup(int stripeLength, int timeBeforeHar) throws Exception {
|
||||
|
||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
||||
conf = new Configuration();
|
||||
|
||||
conf.set("raid.config.file", CONFIG_FILE);
|
||||
conf.setBoolean("raid.config.reload", true);
|
||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
||||
|
||||
// scan all policies once every 5 second
|
||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
||||
|
||||
// make all deletions not go through Trash
|
||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
||||
|
||||
// do not use map-reduce cluster for Raiding
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
||||
conf.set("raid.server.address", "localhost:0");
|
||||
conf.setInt("hdfs.raid.stripeLength", stripeLength);
|
||||
conf.set("hdfs.raid.locations", "/destraid");
|
||||
|
||||
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
|
||||
dfs.waitActive();
|
||||
fileSys = dfs.getFileSystem();
|
||||
namenode = fileSys.getUri().toString();
|
||||
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
||||
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
|
||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
||||
String str = "<configuration> " +
|
||||
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
|
||||
"<policy name = \"RaidTest1\"> " +
|
||||
"<erasureCode>xor</erasureCode> " +
|
||||
"<destPath> /destraid</destPath> " +
|
||||
"<property> " +
|
||||
"<name>targetReplication</name> " +
|
||||
"<value>1</value> " +
|
||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>metaReplication</name> " +
|
||||
"<value>1</value> " +
|
||||
"<description> replication factor of parity file" +
|
||||
"</description> " +
|
||||
"</property> " +
|
||||
"<property> " +
|
||||
"<name>modTimePeriod</name> " +
|
||||
"<value>2000</value> " +
|
||||
"<description> time (milliseconds) after a file is modified to make it " +
|
||||
"a candidate for RAIDing " +
|
||||
"</description> " +
|
||||
"</property> ";
|
||||
if (timeBeforeHar >= 0) {
|
||||
str +=
|
||||
"<property> " +
|
||||
"<name>time_before_har</name> " +
|
||||
"<value>" + timeBeforeHar + "</value> " +
|
||||
"<description> amount of time waited before har'ing parity files" +
|
||||
"</description> " +
|
||||
"</property> ";
|
||||
}
|
||||
|
||||
str +=
|
||||
"</policy>" +
|
||||
"</srcPath>" +
|
||||
"</configuration>";
|
||||
fileWriter.write(str);
|
||||
fileWriter.close();
|
||||
}
|
||||
|
||||
private void myTearDown() throws Exception {
|
||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
||||
if (dfs != null) { dfs.shutdown(); }
|
||||
}
|
||||
|
||||
private long getCRC(FileSystem fs, Path p) throws IOException {
|
||||
CRC32 crc = new CRC32();
|
||||
FSDataInputStream stm = fs.open(p);
|
||||
int b;
|
||||
while ((b = stm.read())>=0) {
|
||||
crc.update(b);
|
||||
}
|
||||
stm.close();
|
||||
return crc.getValue();
|
||||
}
|
||||
|
||||
void corruptBlock(ExtendedBlock block) throws IOException {
|
||||
assertTrue("Could not corrupt block",
|
||||
dfs.corruptBlockOnDataNodes(block) > 0);
|
||||
}
|
||||
}
|
|
@ -1,724 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http:www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.junit.After;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class TestRaidShellFsck {
|
||||
final static Log LOG =
|
||||
LogFactory.getLog("org.apache.hadoop.raid.TestRaidShellFsck");
|
||||
final static String TEST_DIR =
|
||||
new File(System.
|
||||
getProperty("test.build.data", "target/test-data")).getAbsolutePath();
|
||||
|
||||
final static String CONFIG_FILE = new File(TEST_DIR, "test-raid.xml").
|
||||
getAbsolutePath();
|
||||
final static long RELOAD_INTERVAL = 1000;
|
||||
final static int NUM_DATANODES = 4;
|
||||
final static int STRIPE_BLOCKS = 3; // number of blocks per stripe
|
||||
final static int FILE_BLOCKS = 6; // number of blocks that file consists of
|
||||
final static short REPL = 1; // replication factor before raiding
|
||||
final static long BLOCK_SIZE = 8192L; // size of block in byte
|
||||
final static String DIR_PATH = "/user/pkling/raidtest";
|
||||
final static Path FILE_PATH0 =
|
||||
new Path("/user/pkling/raidtest/raidfsck.test");
|
||||
final static Path FILE_PATH1 =
|
||||
new Path("/user/pkling/raidtest/raidfsck2.test");
|
||||
final static Path RAID_PATH = new Path("/destraid/user/pkling/raidtest");
|
||||
final static String HAR_NAME = "raidtest_raid.har";
|
||||
final static String RAID_DIR = "/destraid";
|
||||
|
||||
Configuration conf = null;
|
||||
Configuration raidConf = null;
|
||||
Configuration clientConf = null;
|
||||
MiniDFSCluster cluster = null;
|
||||
DistributedFileSystem dfs = null;
|
||||
RaidNode rnode = null;
|
||||
|
||||
|
||||
RaidShell shell = null;
|
||||
String[] args = null;
|
||||
|
||||
|
||||
/**
|
||||
* creates a MiniDFS instance with a raided file in it
|
||||
*/
|
||||
private void setUp(boolean doHar) throws IOException, ClassNotFoundException {
|
||||
|
||||
final int timeBeforeHar;
|
||||
if (doHar) {
|
||||
timeBeforeHar = 0;
|
||||
} else {
|
||||
timeBeforeHar = -1;
|
||||
}
|
||||
|
||||
|
||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
||||
conf = new Configuration();
|
||||
|
||||
conf.set("raid.config.file", CONFIG_FILE);
|
||||
conf.setBoolean("raid.config.reload", true);
|
||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
||||
|
||||
// scan all policies once every 5 second
|
||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
||||
|
||||
// make all deletions not go through Trash
|
||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
||||
|
||||
// do not use map-reduce cluster for Raiding
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
||||
// use local block fixer
|
||||
conf.set("raid.blockfix.classname",
|
||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
||||
|
||||
conf.set("raid.server.address", "localhost:0");
|
||||
conf.setInt("hdfs.raid.stripeLength", STRIPE_BLOCKS);
|
||||
conf.set("hdfs.raid.locations", RAID_DIR);
|
||||
|
||||
conf.setInt("dfs.corruptfilesreturned.max", 500);
|
||||
|
||||
conf.setBoolean("dfs.permissions", false);
|
||||
|
||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES)
|
||||
.build();
|
||||
cluster.waitActive();
|
||||
dfs = (DistributedFileSystem) cluster.getFileSystem();
|
||||
String namenode = dfs.getUri().toString();
|
||||
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
|
||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
||||
String str =
|
||||
"<configuration> " +
|
||||
" <srcPath prefix=\"" + DIR_PATH + "\"> " +
|
||||
" <policy name = \"RaidTest1\"> " +
|
||||
" <erasureCode>xor</erasureCode> " +
|
||||
" <destPath> " + RAID_DIR + " </destPath> " +
|
||||
" <property> " +
|
||||
" <name>targetReplication</name> " +
|
||||
" <value>1</value> " +
|
||||
" <description>after RAIDing, decrease the replication " +
|
||||
"factor of a file to this value.</description> " +
|
||||
" </property> " +
|
||||
" <property> " +
|
||||
" <name>metaReplication</name> " +
|
||||
" <value>1</value> " +
|
||||
" <description> replication factor of parity file</description> " +
|
||||
" </property> " +
|
||||
" <property> " +
|
||||
" <name>modTimePeriod</name> " +
|
||||
" <value>2000</value> " +
|
||||
" <description>time (milliseconds) after a file is modified " +
|
||||
"to make it a candidate for RAIDing</description> " +
|
||||
" </property> ";
|
||||
|
||||
if (timeBeforeHar >= 0) {
|
||||
str +=
|
||||
" <property> " +
|
||||
" <name>time_before_har</name> " +
|
||||
" <value>" + timeBeforeHar + "</value> " +
|
||||
" <description> amount of time waited before har'ing parity " +
|
||||
"files</description> " +
|
||||
" </property> ";
|
||||
}
|
||||
|
||||
str +=
|
||||
" </policy>" +
|
||||
" </srcPath>" +
|
||||
"</configuration>";
|
||||
|
||||
fileWriter.write(str);
|
||||
fileWriter.close();
|
||||
|
||||
createTestFile(FILE_PATH0);
|
||||
createTestFile(FILE_PATH1);
|
||||
|
||||
Path[] filePaths = { FILE_PATH0, FILE_PATH1 };
|
||||
raidTestFiles(RAID_PATH, filePaths, doHar);
|
||||
|
||||
clientConf = new Configuration(raidConf);
|
||||
clientConf.set("fs.hdfs.impl",
|
||||
"org.apache.hadoop.hdfs.DistributedRaidFileSystem");
|
||||
clientConf.set("fs.raid.underlyingfs.impl",
|
||||
"org.apache.hadoop.hdfs.DistributedFileSystem");
|
||||
|
||||
// prepare shell and arguments
|
||||
shell = new RaidShell(clientConf);
|
||||
args = new String[2];
|
||||
args[0] = "-fsck";
|
||||
args[1] = DIR_PATH;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates test file consisting of random data
|
||||
*/
|
||||
private void createTestFile(Path filePath) throws IOException {
|
||||
Random rand = new Random();
|
||||
FSDataOutputStream stm = dfs.create(filePath, true,
|
||||
conf.getInt("io.file.buffer.size",
|
||||
4096), REPL, BLOCK_SIZE);
|
||||
|
||||
final byte[] b = new byte[(int) BLOCK_SIZE];
|
||||
for (int i = 0; i < FILE_BLOCKS; i++) {
|
||||
rand.nextBytes(b);
|
||||
stm.write(b);
|
||||
}
|
||||
stm.close();
|
||||
LOG.info("test file created");
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* raids test file
|
||||
*/
|
||||
private void raidTestFiles(Path raidPath, Path[] filePaths, boolean doHar)
|
||||
throws IOException, ClassNotFoundException {
|
||||
// create RaidNode
|
||||
raidConf = new Configuration(conf);
|
||||
raidConf.set(RaidNode.RAID_LOCATION_KEY, RAID_DIR);
|
||||
raidConf.setInt("raid.blockfix.interval", 1000);
|
||||
raidConf.setLong("har.block.size", BLOCK_SIZE * 3);
|
||||
// the RaidNode does the raiding inline (instead of submitting to MR node)
|
||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
||||
rnode = RaidNode.createRaidNode(null, raidConf);
|
||||
|
||||
for (Path filePath: filePaths) {
|
||||
long waitStart = Time.now();
|
||||
boolean raided = false;
|
||||
|
||||
Path parityFilePath = new Path(RAID_DIR,
|
||||
filePath.toString().substring(1));
|
||||
|
||||
while (!raided) {
|
||||
try {
|
||||
FileStatus[] listPaths = dfs.listStatus(raidPath);
|
||||
if (listPaths != null) {
|
||||
if (doHar) {
|
||||
// case with HAR
|
||||
for (FileStatus f: listPaths) {
|
||||
if (f.getPath().toString().endsWith(".har")) {
|
||||
// check if the parity file is in the index
|
||||
final Path indexPath = new Path(f.getPath(), "_index");
|
||||
final FileStatus indexFileStatus =
|
||||
dfs.getFileStatus(indexPath);
|
||||
final HarIndex harIndex =
|
||||
new HarIndex(dfs.open(indexPath), indexFileStatus.getLen());
|
||||
final HarIndex.IndexEntry indexEntry =
|
||||
harIndex.findEntryByFileName(parityFilePath.toString());
|
||||
if (indexEntry != null) {
|
||||
LOG.info("raid file " + parityFilePath.toString() +
|
||||
" found in Har archive: " +
|
||||
f.getPath().toString() +
|
||||
" ts=" + indexEntry.mtime);
|
||||
raided = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// case without HAR
|
||||
for (FileStatus f : listPaths) {
|
||||
Path found = new Path(f.getPath().toUri().getPath());
|
||||
if (parityFilePath.equals(found)) {
|
||||
LOG.info("raid file found: " + f.getPath().toString());
|
||||
raided = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException ignore) {
|
||||
}
|
||||
if (!raided) {
|
||||
if (Time.now() > waitStart + 40000L) {
|
||||
LOG.error("parity file not created after 40s");
|
||||
throw new IOException("parity file not HARed after 40s");
|
||||
} else {
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException ignore) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rnode.stop();
|
||||
rnode.join();
|
||||
rnode = null;
|
||||
LOG.info("test file raided");
|
||||
}
|
||||
|
||||
/**
|
||||
* sleeps for up to 20s until the number of corrupt files
|
||||
* in the file system is equal to the number specified
|
||||
*/
|
||||
private void waitUntilCorruptFileCount(DistributedFileSystem dfs,
|
||||
int corruptFiles)
|
||||
throws IOException {
|
||||
long waitStart = Time.now();
|
||||
while (RaidDFSUtil.getCorruptFiles(dfs).length != corruptFiles) {
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException ignore) {
|
||||
|
||||
}
|
||||
|
||||
if (Time.now() > waitStart + 20000L) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int corruptFilesFound = RaidDFSUtil.getCorruptFiles(dfs).length;
|
||||
if (corruptFilesFound != corruptFiles) {
|
||||
throw new IOException("expected " + corruptFiles +
|
||||
" corrupt files but got " +
|
||||
corruptFilesFound);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* removes a specified block from MiniDFS storage and reports it as corrupt
|
||||
*/
|
||||
private void removeAndReportBlock(DistributedFileSystem blockDfs,
|
||||
Path filePath,
|
||||
LocatedBlock block)
|
||||
throws IOException {
|
||||
TestRaidDfs.corruptBlock(cluster, filePath, block.getBlock(), NUM_DATANODES, true);
|
||||
|
||||
// report deleted block to the name node
|
||||
LocatedBlock[] toReport = { block };
|
||||
blockDfs.getClient().getNamenode().reportBadBlocks(toReport);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* removes a file block in the specified stripe
|
||||
*/
|
||||
private void removeFileBlock(Path filePath, int stripe, int blockInStripe)
|
||||
throws IOException {
|
||||
LocatedBlocks fileBlocks = dfs.getClient().getNamenode().
|
||||
getBlockLocations(filePath.toString(), 0, FILE_BLOCKS * BLOCK_SIZE);
|
||||
if (fileBlocks.locatedBlockCount() != FILE_BLOCKS) {
|
||||
throw new IOException("expected " + FILE_BLOCKS +
|
||||
" file blocks but found " +
|
||||
fileBlocks.locatedBlockCount());
|
||||
}
|
||||
if (blockInStripe >= STRIPE_BLOCKS) {
|
||||
throw new IOException("blockInStripe is " + blockInStripe +
|
||||
" but must be smaller than " + STRIPE_BLOCKS);
|
||||
}
|
||||
LocatedBlock block = fileBlocks.get(stripe * STRIPE_BLOCKS + blockInStripe);
|
||||
removeAndReportBlock(dfs, filePath, block);
|
||||
LOG.info("removed file " + filePath.toString() + " block " +
|
||||
stripe * STRIPE_BLOCKS + " in stripe " + stripe);
|
||||
}
|
||||
|
||||
/**
|
||||
* removes a parity block in the specified stripe
|
||||
*/
|
||||
private void removeParityBlock(Path filePath, int stripe) throws IOException {
|
||||
// find parity file
|
||||
Path destPath = new Path(RAID_DIR);
|
||||
RaidNode.ParityFilePair ppair = null;
|
||||
|
||||
ppair = RaidNode.getParityFile(destPath, filePath, conf);
|
||||
String parityPathStr = ppair.getPath().toUri().getPath();
|
||||
LOG.info("parity path: " + parityPathStr);
|
||||
FileSystem parityFS = ppair.getFileSystem();
|
||||
if (!(parityFS instanceof DistributedFileSystem)) {
|
||||
throw new IOException("parity file is not on distributed file system");
|
||||
}
|
||||
DistributedFileSystem parityDFS = (DistributedFileSystem) parityFS;
|
||||
|
||||
|
||||
// now corrupt the block corresponding to the stripe selected
|
||||
FileStatus parityFileStatus =
|
||||
parityDFS.getFileStatus(new Path(parityPathStr));
|
||||
long parityBlockSize = parityFileStatus.getBlockSize();
|
||||
long parityFileLength = parityFileStatus.getLen();
|
||||
long parityFileLengthInBlocks = (parityFileLength / parityBlockSize) +
|
||||
(((parityFileLength % parityBlockSize) == 0) ? 0L : 1L);
|
||||
if (parityFileLengthInBlocks <= stripe) {
|
||||
throw new IOException("selected stripe " + stripe +
|
||||
" but parity file only has " +
|
||||
parityFileLengthInBlocks + " blocks");
|
||||
}
|
||||
if (parityBlockSize != BLOCK_SIZE) {
|
||||
throw new IOException("file block size is " + BLOCK_SIZE +
|
||||
" but parity file block size is " +
|
||||
parityBlockSize);
|
||||
}
|
||||
LocatedBlocks parityFileBlocks = parityDFS.getClient().getNamenode().
|
||||
getBlockLocations(parityPathStr, 0, parityFileLength);
|
||||
if (parityFileBlocks.locatedBlockCount() != parityFileLengthInBlocks) {
|
||||
throw new IOException("expected " + parityFileLengthInBlocks +
|
||||
" parity file blocks but got " +
|
||||
parityFileBlocks.locatedBlockCount() +
|
||||
" blocks");
|
||||
}
|
||||
LocatedBlock parityFileBlock = parityFileBlocks.get(stripe);
|
||||
removeAndReportBlock(parityDFS, new Path(parityPathStr), parityFileBlock);
|
||||
LOG.info("removed parity file block/stripe " + stripe +
|
||||
" for " + filePath.toString());
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* removes a block from the har part file
|
||||
*/
|
||||
private void removeHarParityBlock(int block) throws IOException {
|
||||
Path harPath = new Path(RAID_PATH, HAR_NAME);
|
||||
FileStatus [] listPaths = dfs.listStatus(harPath);
|
||||
|
||||
boolean deleted = false;
|
||||
|
||||
for (FileStatus f: listPaths) {
|
||||
if (f.getPath().getName().startsWith("part-")) {
|
||||
final Path partPath = new Path(f.getPath().toUri().getPath());
|
||||
final LocatedBlocks partBlocks = dfs.getClient().getNamenode().
|
||||
getBlockLocations(partPath.toString(),
|
||||
0,
|
||||
f.getLen());
|
||||
|
||||
if (partBlocks.locatedBlockCount() <= block) {
|
||||
throw new IOException("invalid har block " + block);
|
||||
}
|
||||
|
||||
final LocatedBlock partBlock = partBlocks.get(block);
|
||||
removeAndReportBlock(dfs, partPath, partBlock);
|
||||
LOG.info("removed block " + block + "/" +
|
||||
partBlocks.locatedBlockCount() +
|
||||
" of file " + partPath.toString() +
|
||||
" block size " + partBlock.getBlockSize());
|
||||
deleted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!deleted) {
|
||||
throw new IOException("cannot find part file in " + harPath.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks fsck with no missing blocks
|
||||
*/
|
||||
@Test
|
||||
public void testClean() throws Exception {
|
||||
LOG.info("testClean");
|
||||
setUp(false);
|
||||
int result = ToolRunner.run(shell, args);
|
||||
|
||||
assertTrue("fsck should return 0, but returns " +
|
||||
Integer.toString(result), result == 0);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* checks fsck with missing block in file block but not in parity block
|
||||
*/
|
||||
@Test
|
||||
public void testFileBlockMissing() throws Exception {
|
||||
LOG.info("testFileBlockMissing");
|
||||
setUp(false);
|
||||
waitUntilCorruptFileCount(dfs, 0);
|
||||
removeFileBlock(FILE_PATH0, 0, 0);
|
||||
waitUntilCorruptFileCount(dfs, 1);
|
||||
|
||||
int result = ToolRunner.run(shell, args);
|
||||
|
||||
assertTrue("fsck should return 0, but returns " +
|
||||
Integer.toString(result), result == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks fsck with missing block in parity block but not in file block
|
||||
*/
|
||||
@Test
|
||||
public void testParityBlockMissing() throws Exception {
|
||||
LOG.info("testParityBlockMissing");
|
||||
setUp(false);
|
||||
waitUntilCorruptFileCount(dfs, 0);
|
||||
removeParityBlock(FILE_PATH0, 0);
|
||||
waitUntilCorruptFileCount(dfs, 1);
|
||||
|
||||
int result = ToolRunner.run(shell, args);
|
||||
|
||||
assertTrue("fsck should return 0, but returns " +
|
||||
Integer.toString(result), result == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks fsck with missing block in both file block and parity block
|
||||
* in different stripes
|
||||
*/
|
||||
@Test
|
||||
public void testFileBlockAndParityBlockMissingInDifferentStripes()
|
||||
throws Exception {
|
||||
LOG.info("testFileBlockAndParityBlockMissingInDifferentStripes");
|
||||
setUp(false);
|
||||
waitUntilCorruptFileCount(dfs, 0);
|
||||
removeFileBlock(FILE_PATH0, 0, 0);
|
||||
waitUntilCorruptFileCount(dfs, 1);
|
||||
removeParityBlock(FILE_PATH0, 1);
|
||||
waitUntilCorruptFileCount(dfs, 2);
|
||||
|
||||
int result = ToolRunner.run(shell, args);
|
||||
|
||||
assertTrue("fsck should return 0, but returns " +
|
||||
Integer.toString(result), result == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks fsck with missing block in both file block and parity block
|
||||
* in same stripe
|
||||
*/
|
||||
@Test
|
||||
public void testFileBlockAndParityBlockMissingInSameStripe()
|
||||
throws Exception {
|
||||
LOG.info("testFileBlockAndParityBlockMissingInSameStripe");
|
||||
setUp(false);
|
||||
waitUntilCorruptFileCount(dfs, 0);
|
||||
removeParityBlock(FILE_PATH0, 1);
|
||||
waitUntilCorruptFileCount(dfs, 1);
|
||||
removeFileBlock(FILE_PATH0, 1, 0);
|
||||
waitUntilCorruptFileCount(dfs, 2);
|
||||
|
||||
int result = ToolRunner.run(shell, args);
|
||||
|
||||
assertTrue("fsck should return 1, but returns " +
|
||||
Integer.toString(result), result == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks fsck with two missing file blocks in same stripe
|
||||
*/
|
||||
@Test
|
||||
public void test2FileBlocksMissingInSameStripe()
|
||||
throws Exception {
|
||||
LOG.info("test2FileBlocksMissingInSameStripe");
|
||||
setUp(false);
|
||||
waitUntilCorruptFileCount(dfs, 0);
|
||||
removeFileBlock(FILE_PATH0, 1, 1);
|
||||
waitUntilCorruptFileCount(dfs, 1);
|
||||
removeFileBlock(FILE_PATH0, 1, 0);
|
||||
waitUntilCorruptFileCount(dfs, 1);
|
||||
|
||||
int result = ToolRunner.run(shell, args);
|
||||
|
||||
assertTrue("fsck should return 1, but returns " +
|
||||
Integer.toString(result), result == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks fsck with two missing file blocks in different stripes
|
||||
*/
|
||||
@Test
|
||||
public void test2FileBlocksMissingInDifferentStripes()
|
||||
throws Exception {
|
||||
LOG.info("test2FileBlocksMissingInDifferentStripes");
|
||||
setUp(false);
|
||||
waitUntilCorruptFileCount(dfs, 0);
|
||||
removeFileBlock(FILE_PATH0, 1, 1);
|
||||
waitUntilCorruptFileCount(dfs, 1);
|
||||
removeFileBlock(FILE_PATH0, 0, 0);
|
||||
waitUntilCorruptFileCount(dfs, 1);
|
||||
|
||||
int result = ToolRunner.run(shell, args);
|
||||
|
||||
assertTrue("fsck should return 0, but returns " +
|
||||
Integer.toString(result), result == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks fsck with file block missing (HAR)
|
||||
* use 2 files to verify HAR offset logic in RaidShell fsck
|
||||
* both files have one corrupt block, parity blocks are clean
|
||||
*
|
||||
* parity blocks in har (file.stripe):
|
||||
* +-----+-----+-----+ +-----+
|
||||
* | 0.0 | 0.1 | 1.0 | | 1.1 |
|
||||
* +-----+-----+-----+ +-----+
|
||||
* 0 1
|
||||
*
|
||||
*/
|
||||
@Test
|
||||
public void testFileBlockMissingHar()
|
||||
throws Exception {
|
||||
LOG.info("testFileBlockMissingHar");
|
||||
setUp(true);
|
||||
waitUntilCorruptFileCount(dfs, 0);
|
||||
removeFileBlock(FILE_PATH0, 1, 1);
|
||||
removeFileBlock(FILE_PATH1, 1, 1);
|
||||
waitUntilCorruptFileCount(dfs, 2);
|
||||
|
||||
int result = ToolRunner.run(shell, args);
|
||||
|
||||
assertTrue("fsck should return 0, but returns " +
|
||||
Integer.toString(result), result == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks fsck with file block missing (HAR)
|
||||
* use 2 files to verify HAR offset logic in RaidShell fsck
|
||||
*
|
||||
* parity blocks in har (file.stripe):
|
||||
* +-----+-----+-----+ +-----+
|
||||
* | 0.0 | 0.1 | 1.0 | | 1.1 |
|
||||
* +-----+-----+-----+ +-----+
|
||||
* 0 1
|
||||
*
|
||||
* corrupt file 0, stripe 0 file block 0
|
||||
* corrupt file 0, stripe 1 file block 0
|
||||
* corrupt file 1, stripe 0 file block 0
|
||||
* corrupt file 1, stripe 1 file block 0
|
||||
* corrupt har block 0
|
||||
* both files should be corrupt
|
||||
*/
|
||||
@Test
|
||||
public void testFileBlockAndParityBlockMissingHar1()
|
||||
throws Exception {
|
||||
LOG.info("testFileBlockAndParityBlockMissingHar1");
|
||||
setUp(true);
|
||||
waitUntilCorruptFileCount(dfs, 0);
|
||||
removeFileBlock(FILE_PATH0, 0, 0);
|
||||
removeFileBlock(FILE_PATH0, 1, 0);
|
||||
removeFileBlock(FILE_PATH1, 0, 0);
|
||||
removeFileBlock(FILE_PATH1, 1, 0);
|
||||
removeHarParityBlock(0);
|
||||
waitUntilCorruptFileCount(dfs, 3);
|
||||
|
||||
int result = ToolRunner.run(shell, args);
|
||||
|
||||
assertTrue("fsck should return 2, but returns " +
|
||||
Integer.toString(result), result == 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks fsck with file block missing (HAR)
|
||||
* use 2 files to verify HAR offset logic in RaidShell fsck
|
||||
*
|
||||
* parity blocks in har (file.stripe):
|
||||
* +-----+-----+-----+ +-----+
|
||||
* | 0.0 | 0.1 | 1.0 | | 1.1 |
|
||||
* +-----+-----+-----+ +-----+
|
||||
* 0 1
|
||||
*
|
||||
* corrupt file 0, stripe 0 file block 0
|
||||
* corrupt file 0, stripe 1 file block 0
|
||||
* corrupt file 1, stripe 0 file block 0
|
||||
* corrupt file 1, stripe 1 file block 0
|
||||
* corrupt har block 1
|
||||
* only file 2 should be corrupt
|
||||
*/
|
||||
@Test
|
||||
public void testFileBlockAndParityBlockMissingHar2()
|
||||
throws Exception {
|
||||
LOG.info("testFileBlockAndParityBlockMissingHar2");
|
||||
setUp(true);
|
||||
waitUntilCorruptFileCount(dfs, 0);
|
||||
removeFileBlock(FILE_PATH0, 0, 0);
|
||||
removeFileBlock(FILE_PATH0, 1, 0);
|
||||
removeFileBlock(FILE_PATH1, 0, 0);
|
||||
removeFileBlock(FILE_PATH1, 1, 0);
|
||||
removeHarParityBlock(1);
|
||||
waitUntilCorruptFileCount(dfs, 3);
|
||||
|
||||
int result = ToolRunner.run(shell, args);
|
||||
|
||||
assertTrue("fsck should return 1, but returns " +
|
||||
Integer.toString(result), result == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* checks that fsck does not report corrupt file that is not in
|
||||
* the specified path
|
||||
*/
|
||||
@Test
|
||||
public void testPathFilter()
|
||||
throws Exception {
|
||||
LOG.info("testPathFilter");
|
||||
setUp(false);
|
||||
waitUntilCorruptFileCount(dfs, 0);
|
||||
removeParityBlock(FILE_PATH0, 1);
|
||||
waitUntilCorruptFileCount(dfs, 1);
|
||||
removeFileBlock(FILE_PATH0, 1, 0);
|
||||
waitUntilCorruptFileCount(dfs, 2);
|
||||
|
||||
String[] otherArgs = new String[2];
|
||||
otherArgs[0] = "-fsck";
|
||||
otherArgs[1] = "/user/pkling/other";
|
||||
int result = ToolRunner.run(shell, otherArgs);
|
||||
|
||||
assertTrue("fsck should return 0, but returns " +
|
||||
Integer.toString(result), result == 0);
|
||||
}
|
||||
|
||||
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
if (rnode != null) {
|
||||
rnode.stop();
|
||||
rnode.join();
|
||||
rnode = null;
|
||||
}
|
||||
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
cluster = null;
|
||||
}
|
||||
|
||||
dfs = null;
|
||||
|
||||
LOG.info("Test cluster shut down");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,135 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class TestReedSolomonDecoder {
|
||||
final static Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.TestReedSolomonDecoder");
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
||||
"target/test-data")).getAbsolutePath();
|
||||
final static int NUM_DATANODES = 3;
|
||||
|
||||
Configuration conf;
|
||||
MiniDFSCluster dfs = null;
|
||||
FileSystem fileSys = null;
|
||||
|
||||
@Test
|
||||
public void testDecoder() throws Exception {
|
||||
mySetup();
|
||||
int stripeSize = 10;
|
||||
int paritySize = 4;
|
||||
long blockSize = 8192;
|
||||
Path file1 = new Path("/user/raidtest/file1");
|
||||
Path recoveredFile1 = new Path("/user/raidtest/file1.recovered");
|
||||
Path parityFile1 = new Path("/rsraid/user/raidtest/file1");
|
||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
||||
1, 25, blockSize);
|
||||
FileStatus file1Stat = fileSys.getFileStatus(file1);
|
||||
|
||||
conf.setInt("raid.rsdecoder.bufsize", 512);
|
||||
conf.setInt("raid.rsencoder.bufsize", 512);
|
||||
|
||||
try {
|
||||
// First encode the file.
|
||||
ReedSolomonEncoder encoder = new ReedSolomonEncoder(
|
||||
conf, stripeSize, paritySize);
|
||||
short parityRepl = 1;
|
||||
encoder.encodeFile(fileSys, file1, fileSys, parityFile1, parityRepl,
|
||||
Reporter.NULL);
|
||||
|
||||
// Ensure there are no corrupt files yet.
|
||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals(corruptFiles.length, 0);
|
||||
|
||||
// Now corrupt the file.
|
||||
long corruptOffset = blockSize * 5;
|
||||
FileStatus srcStat = fileSys.getFileStatus(file1);
|
||||
LocatedBlocks locations = RaidDFSUtil.getBlockLocations(dfs,
|
||||
file1.toUri().getPath(), 0, srcStat.getLen());
|
||||
corruptBlock(locations.get(5).getBlock());
|
||||
corruptBlock(locations.get(6).getBlock());
|
||||
TestBlockFixer.reportCorruptBlocks(dfs, file1, new int[]{5, 6},
|
||||
srcStat.getBlockSize());
|
||||
|
||||
// Ensure file is corrupted.
|
||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
||||
assertEquals(corruptFiles.length, 1);
|
||||
assertEquals(corruptFiles[0], file1.toString());
|
||||
|
||||
// Fix the file.
|
||||
ReedSolomonDecoder decoder = new ReedSolomonDecoder(
|
||||
conf, stripeSize, paritySize);
|
||||
decoder.decodeFile(fileSys, file1, fileSys, parityFile1,
|
||||
corruptOffset, recoveredFile1);
|
||||
assertTrue(TestRaidDfs.validateFile(
|
||||
fileSys, recoveredFile1, file1Stat.getLen(), crc1));
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
}
|
||||
|
||||
void corruptBlock(ExtendedBlock block) throws IOException {
|
||||
assertTrue("Could not corrupt block",
|
||||
dfs.corruptBlockOnDataNodes(block) > 0);
|
||||
}
|
||||
|
||||
private void mySetup() throws Exception {
|
||||
|
||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
||||
conf = new Configuration();
|
||||
|
||||
// make all deletions not go through Trash
|
||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
||||
|
||||
conf.setBoolean("dfs.permissions", false);
|
||||
|
||||
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
|
||||
dfs.waitActive();
|
||||
fileSys = dfs.getFileSystem();
|
||||
String namenode = fileSys.getUri().toString();
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
}
|
||||
|
||||
private void myTearDown() throws Exception {
|
||||
if (dfs != null) { dfs.shutdown(); }
|
||||
}
|
||||
}
|
|
@ -1,94 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.raid;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class TestReedSolomonEncoder {
|
||||
final static Log LOG = LogFactory.getLog(
|
||||
"org.apache.hadoop.raid.TestReedSolomonEncoder");
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
||||
"target/test-data")).getAbsolutePath();
|
||||
final static int NUM_DATANODES = 3;
|
||||
|
||||
Configuration conf;
|
||||
String namenode = null;
|
||||
MiniDFSCluster dfs = null;
|
||||
FileSystem fileSys = null;
|
||||
|
||||
@Test
|
||||
public void testEncoder() throws Exception {
|
||||
mySetup();
|
||||
int stripeSize = 10;
|
||||
int paritySize = 4;
|
||||
long blockSize = 8192;
|
||||
Path file1 = new Path("/user/raidtest/file1");
|
||||
Path parityFile1 = new Path("/rsraid/user/raidtest/file1");
|
||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
||||
1, 25, blockSize);
|
||||
try {
|
||||
ReedSolomonEncoder encoder = new ReedSolomonEncoder(
|
||||
conf, stripeSize, paritySize);
|
||||
short parityRepl = 1;
|
||||
encoder.encodeFile(fileSys, file1, fileSys, parityFile1, parityRepl,
|
||||
Reporter.NULL);
|
||||
|
||||
FileStatus parityStat = fileSys.getFileStatus(parityFile1);
|
||||
assertEquals(4*8192*3, parityStat.getLen());
|
||||
|
||||
} finally {
|
||||
myTearDown();
|
||||
}
|
||||
}
|
||||
|
||||
private void mySetup() throws Exception {
|
||||
|
||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
||||
conf = new Configuration();
|
||||
|
||||
// make all deletions not go through Trash
|
||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
||||
|
||||
dfs = new MiniDFSCluster(conf, NUM_DATANODES, true, null);
|
||||
dfs.waitActive();
|
||||
fileSys = dfs.getFileSystem();
|
||||
namenode = fileSys.getUri().toString();
|
||||
|
||||
FileSystem.setDefaultUri(conf, namenode);
|
||||
|
||||
}
|
||||
|
||||
private void myTearDown() throws Exception {
|
||||
if (dfs != null) { dfs.shutdown(); }
|
||||
}
|
||||
}
|
|
@ -34,7 +34,6 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|||
<module>hadoop-hdfs</module>
|
||||
<module>hadoop-hdfs-httpfs</module>
|
||||
<module>hadoop-hdfs/src/contrib/bkjournal</module>
|
||||
<module>hadoop-hdfs-raid</module>
|
||||
</modules>
|
||||
|
||||
<build>
|
||||
|
|
|
@ -1711,10 +1711,6 @@
|
|||
output="${build.dir.eclipse-contrib-classes}/gridmix/main" />
|
||||
<source path="${contrib.dir}/gridmix/src/test"
|
||||
output="${build.dir.eclipse-contrib-classes}/gridmix/test" />
|
||||
<source path="${contrib.dir}/raid/src/java"
|
||||
output="${build.dir.eclipse-contrib-classes}/raid/main" />
|
||||
<source path="${contrib.dir}/raid/src/test"
|
||||
output="${build.dir.eclipse-contrib-classes}/raid/test" />
|
||||
<source path="${contrib.dir}/vaidya/src/java"
|
||||
output="${build.dir.eclipse-contrib-classes}/vaidya/main" />
|
||||
<source path="${contrib.dir}/vertica/src/java"
|
||||
|
|
|
@ -62,7 +62,6 @@
|
|||
<fileset dir="." includes="streaming/build.xml"/>
|
||||
<fileset dir="." includes="gridmix/build.xml"/>
|
||||
<fileset dir="." includes="vertica/build.xml"/>
|
||||
<fileset dir="." includes="raid/build.xml"/>
|
||||
</subant>
|
||||
<available file="${build.contrib.dir}/testsfailed" property="testsfailed"/>
|
||||
<fail if="testsfailed">Tests failed!</fail>
|
||||
|
|
|
@ -1,201 +0,0 @@
|
|||
# Copyright 2008 The Apache Software Foundation Licensed under the
|
||||
# Apache License, Version 2.0 (the "License"); you may not use this
|
||||
# file except in compliance with the License. You may obtain a copy
|
||||
# of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless
|
||||
# required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied. See the License for the specific language governing
|
||||
# permissions and limitations under the License.
|
||||
|
||||
This package implements a Distributed Raid File System. It is used alongwith
|
||||
an instance of the Hadoop Distributed File System (HDFS). It can be used to
|
||||
provide better protection against data corruption. It can also be used to
|
||||
reduce the total storage requirements of HDFS.
|
||||
|
||||
Distributed Raid File System consists of two main software components. The first component
|
||||
is the RaidNode, a daemon that creates parity files from specified HDFS files.
|
||||
The second component "raidfs" is a software that is layered over a HDFS client and it
|
||||
intercepts all calls that an application makes to the HDFS client. If HDFS encounters
|
||||
corrupted data while reading a file, the raidfs client detects it; it uses the
|
||||
relevant parity blocks to recover the corrupted data (if possible) and returns
|
||||
the data to the application. The application is completely transparent to the
|
||||
fact that parity data was used to satisfy it's read request.
|
||||
|
||||
The primary use of this feature is to save disk space for HDFS files.
|
||||
HDFS typically stores data in triplicate.
|
||||
The Distributed Raid File System can be configured in such a way that a set of
|
||||
data blocks of a file are combined together to form one or more parity blocks.
|
||||
This allows one to reduce the replication factor of a HDFS file from 3 to 2
|
||||
while keeping the failure probabilty relatively same as before. This typically
|
||||
results in saving 25% to 30% of storage space in a HDFS cluster.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
BUILDING:
|
||||
|
||||
In HADOOP_PREFIX, run ant package to build Hadoop and its contrib packages.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
INSTALLING and CONFIGURING:
|
||||
|
||||
The entire code is packaged in the form of a single jar file hadoop-*-raid.jar.
|
||||
To use HDFS Raid, you need to put the above mentioned jar file on
|
||||
the CLASSPATH. The easiest way is to copy the hadoop-*-raid.jar
|
||||
from HADOOP_PREFIX/build/contrib/raid to HADOOP_PREFIX/lib. Alternatively
|
||||
you can modify HADOOP_CLASSPATH to include this jar, in conf/hadoop-env.sh.
|
||||
|
||||
There is a single configuration file named raid.xml that describes the HDFS
|
||||
path(s) that you want to raid. A sample of this file can be found in
|
||||
sc/contrib/raid/conf/raid.xml. Please edit the entries in this file to list the
|
||||
path(s) that you want to raid. Then, edit the hdfs-site.xml file for
|
||||
your installation to include a reference to this raid.xml. You can add the
|
||||
following to your hdfs-site.xml
|
||||
<property>
|
||||
<name>raid.config.file</name>
|
||||
<value>/mnt/hdfs/DFS/conf/raid.xml</value>
|
||||
<description>This is needed by the RaidNode </description>
|
||||
</property>
|
||||
|
||||
Please add an entry to your hdfs-site.xml to enable hdfs clients to use the
|
||||
parity bits to recover corrupted data.
|
||||
|
||||
<property>
|
||||
<name>fs.hdfs.impl</name>
|
||||
<value>org.apache.hadoop.dfs.DistributedRaidFileSystem</value>
|
||||
<description>The FileSystem for hdfs: uris.</description>
|
||||
</property>
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
OPTIONAL CONFIGIURATION:
|
||||
|
||||
The following properties can be set in hdfs-site.xml to further tune you configuration:
|
||||
|
||||
Specifies the location where parity files are located.
|
||||
<property>
|
||||
<name>hdfs.raid.locations</name>
|
||||
<value>hdfs://newdfs.data:8000/raid</value>
|
||||
<description>The location for parity files. If this is
|
||||
is not defined, then defaults to /raid.
|
||||
</descrition>
|
||||
</property>
|
||||
|
||||
Specify the parity stripe length
|
||||
<property>
|
||||
<name>hdfs.raid.stripeLength</name>
|
||||
<value>10</value>
|
||||
<description>The number of blocks in a file to be combined into
|
||||
a single raid parity block. The default value is 5. The lower
|
||||
the number the greater is the disk space you will save when you
|
||||
enable raid.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
Specify the size of HAR part-files
|
||||
<property>
|
||||
<name>raid.har.partfile.size</name>
|
||||
<value>4294967296</value>
|
||||
<description>The size of HAR part files that store raid parity
|
||||
files. The default is 4GB. The higher the number the fewer the
|
||||
number of files used to store the HAR archive.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
Specify which implementation of RaidNode to use.
|
||||
<property>
|
||||
<name>raid.classname</name>
|
||||
<value>org.apache.hadoop.raid.DistRaidNode</value>
|
||||
<description>Specify which implementation of RaidNode to use
|
||||
(class name).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
Specify the periodicy at which the RaidNode re-calculates (if necessary)
|
||||
the parity blocks
|
||||
<property>
|
||||
<name>raid.policy.rescan.interval</name>
|
||||
<value>5000</value>
|
||||
<description>Specify the periodicity in milliseconds after which
|
||||
all source paths are rescanned and parity blocks recomputed if
|
||||
necessary. By default, this value is 1 hour.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
By default, the DistributedRaidFileSystem assumes that the underlying file
|
||||
system is the DistributedFileSystem. If you want to layer the DistributedRaidFileSystem
|
||||
over some other file system, then define a property named fs.raid.underlyingfs.impl
|
||||
that specifies the name of the underlying class. For example, if you want to layer
|
||||
The DistributedRaidFileSystem over an instance of the NewFileSystem, then
|
||||
<property>
|
||||
<name>fs.raid.underlyingfs.impl</name>
|
||||
<value>org.apche.hadoop.new.NewFileSystem</value>
|
||||
<description>Specify the filesystem that is layered immediately below the
|
||||
DistributedRaidFileSystem. By default, this value is DistributedFileSystem.
|
||||
</description>
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
ADMINISTRATION:
|
||||
|
||||
The Distributed Raid File System provides support for administration at runtime without
|
||||
any downtime to cluster services. It is possible to add/delete new paths to be raided without
|
||||
interrupting any load on the cluster. If you change raid.xml, its contents will be
|
||||
reload within seconds and the new contents will take effect immediately.
|
||||
|
||||
Designate one machine in your cluster to run the RaidNode software. You can run this daemon
|
||||
on any machine irrespective of whether that machine is running any other hadoop daemon or not.
|
||||
You can start the RaidNode by running the following on the selected machine:
|
||||
nohup $HADOOP_PREFIX/bin/hadoop org.apache.hadoop.raid.RaidNode >> /xxx/logs/hadoop-root-raidnode-hadoop.xxx.com.log &
|
||||
|
||||
Optionally, we provide two scripts to start and stop the RaidNode. Copy the scripts
|
||||
start-raidnode.sh and stop-raidnode.sh to the directory $HADOOP_PREFIX/bin in the machine
|
||||
you would like to deploy the daemon. You can start or stop the RaidNode by directly
|
||||
callying the scripts from that machine. If you want to deploy the RaidNode remotely,
|
||||
copy start-raidnode-remote.sh and stop-raidnode-remote.sh to $HADOOP_PREFIX/bin at
|
||||
the machine from which you want to trigger the remote deployment and create a text
|
||||
file $HADOOP_PREFIX/conf/raidnode at the same machine containing the name of the server
|
||||
where the RaidNode should run. These scripts run ssh to the specified machine and
|
||||
invoke start/stop-raidnode.sh there. As an example, you might want to change
|
||||
start-mapred.sh in the JobTracker machine so that it automatically calls
|
||||
start-raidnode-remote.sh (and do the equivalent thing for stop-mapred.sh and
|
||||
stop-raidnode-remote.sh).
|
||||
|
||||
To validate the integrity of a file system, run RaidFSCK as follows:
|
||||
$HADOOP_PREFIX/bin/hadoop org.apache.hadoop.raid.RaidShell -fsck [path]
|
||||
|
||||
This will print a list of corrupt files (i.e., files which have lost too many
|
||||
blocks and can no longer be fixed by Raid).
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
IMPLEMENTATION:
|
||||
|
||||
The RaidNode periodically scans all the specified paths in the configuration
|
||||
file. For each path, it recursively scans all files that have more than 2 blocks
|
||||
and that has not been modified during the last few hours (default is 24 hours).
|
||||
It picks the specified number of blocks (as specified by the stripe size),
|
||||
from the file, generates a parity block by combining them and
|
||||
stores the results as another HDFS file in the specified destination
|
||||
directory. There is a one-to-one mapping between a HDFS
|
||||
file and its parity file. The RaidNode also periodically finds parity files
|
||||
that are orphaned and deletes them.
|
||||
|
||||
The Distributed Raid FileSystem is layered over a DistributedFileSystem
|
||||
instance intercepts all calls that go into HDFS. HDFS throws a ChecksumException
|
||||
or a BlocMissingException when a file read encounters bad data. The layered
|
||||
Distributed Raid FileSystem catches these exceptions, locates the corresponding
|
||||
parity file, extract the original data from the parity files and feeds the
|
||||
extracted data back to the application in a completely transparent way.
|
||||
|
||||
The layered Distributed Raid FileSystem does not fix the data-loss that it
|
||||
encounters while serving data. It merely make the application transparently
|
||||
use the parity blocks to re-create the original data. A command line tool
|
||||
"fsckraid" is currently under development that will fix the corrupted files
|
||||
by extracting the data from the associated parity files. An adminstrator
|
||||
can run "fsckraid" manually as and when needed.
|
|
@ -1,64 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Before you can run these subtargets directly, you need
|
||||
to call at top-level: ant deploy-contrib compile-core-test
|
||||
-->
|
||||
<project name="raid" default="jar">
|
||||
|
||||
<import file="../build-contrib.xml"/>
|
||||
|
||||
<!-- the unit test classpath -->
|
||||
<path id="contrib.classpath.raid">
|
||||
<pathelement location="${hadoop.root}/src/contrib/raid/lib"/>
|
||||
<path refid="contrib-classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="test" depends="compile,compile-test,test-junit" description="Automated Test Framework" if="test.available"/>
|
||||
|
||||
<target name="test-junit" depends="compile,compile-test" if="test.available">
|
||||
<junit maxmemory="512m" showoutput="${test.output}" fork="yes" printsummary="yes" errorProperty="tests.failed"
|
||||
haltonfailure="no" failureProperty="tests.failed" timeout="${test.timeout}">
|
||||
|
||||
<classpath refid="test.classpath"/>
|
||||
<sysproperty key="test.build.data" value="${build.test}/data"/>
|
||||
<sysproperty key="build.test" value="${build.test}"/>
|
||||
<sysproperty key="user.dir" value="${build.test}/data"/>
|
||||
<sysproperty key="fs.default.name" value="${fs.default.name}"/>
|
||||
<sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
|
||||
<sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/>
|
||||
<sysproperty key="test.src.dir" value="${test.src.dir}"/>
|
||||
<formatter type="${test.junit.output.format}" />
|
||||
<batchtest todir="${build.test}" unless="testcase">
|
||||
<fileset dir="${src.test}">
|
||||
<include name="**/Test*.java"/>
|
||||
</fileset>
|
||||
</batchtest>
|
||||
<batchtest todir="${build.test}" if="testcase">
|
||||
<fileset dir="${src.test}">
|
||||
<include name="**/${testcase}.java"/>
|
||||
</fileset>
|
||||
</batchtest>
|
||||
</junit>
|
||||
<fail if="tests.failed">Tests failed!</fail>
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
|
@ -1,145 +0,0 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<ivy-module version="1.0" xmlns:m="http://ant.apache.org/ivy/maven">
|
||||
<info organisation="org.apache.hadoop" module="${ant.project.name}">
|
||||
<license name="Apache 2.0"/>
|
||||
<description>Rumen</description>
|
||||
</info>
|
||||
<configurations defaultconfmapping="default">
|
||||
<!--these match the Maven configurations-->
|
||||
<conf name="default" extends="master,runtime"/>
|
||||
<conf name="master" description="contains the artifact but no dependencies"/>
|
||||
<conf name="runtime" description="runtime but not the artifact" />
|
||||
|
||||
<conf name="common" visibility="private" extends="runtime"
|
||||
description="artifacts needed to compile/test the application"/>
|
||||
<conf name="test" visibility="private" extends="runtime"/>
|
||||
</configurations>
|
||||
|
||||
<publications>
|
||||
<!--get the artifact from our module name-->
|
||||
<artifact conf="master"/>
|
||||
</publications>
|
||||
<dependencies>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-annotations" rev="${hadoop-common.version}" conf="common->default"/>
|
||||
<dependency org="org.apache.hadoop"
|
||||
name="hadoop-common"
|
||||
rev="${hadoop-common.version}"
|
||||
conf="common->default"/>
|
||||
<dependency org="org.apache.hadoop"
|
||||
name="hadoop-common"
|
||||
rev="${hadoop-common.version}"
|
||||
conf="test->default">
|
||||
<artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests"/>
|
||||
</dependency>
|
||||
<dependency org="org.apache.hadoop"
|
||||
name="hadoop-hdfs"
|
||||
rev="${hadoop-hdfs.version}"
|
||||
conf="common->default"/>
|
||||
<dependency org="org.apache.hadoop"
|
||||
name="hadoop-hdfs"
|
||||
rev="${hadoop-hdfs.version}"
|
||||
conf="test->default">
|
||||
<artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests"/>
|
||||
</dependency>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
|
||||
rev="${yarn.version}" conf="common->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-yarn-common"
|
||||
rev="${yarn.version}" conf="common->default"/>
|
||||
<dependency org="org.apache.hadoop" name="hadoop-archives"
|
||||
rev="${hadoop-common.version}" conf="common->default"/>
|
||||
|
||||
<dependency org="commons-logging"
|
||||
name="commons-logging"
|
||||
rev="${commons-logging.version}"
|
||||
conf="common->default"/>
|
||||
<dependency org="log4j"
|
||||
name="log4j"
|
||||
rev="${log4j.version}"
|
||||
conf="common->master"/>
|
||||
<dependency org="junit"
|
||||
name="junit"
|
||||
rev="${junit.version}"
|
||||
conf="common->default"/>
|
||||
|
||||
<!-- necessary for Mini*Clusters -->
|
||||
<dependency org="commons-httpclient"
|
||||
name="commons-httpclient"
|
||||
rev="${commons-httpclient.version}"
|
||||
conf="common->master"/>
|
||||
<dependency org="commons-codec"
|
||||
name="commons-codec"
|
||||
rev="${commons-codec.version}"
|
||||
conf="common->default"/>
|
||||
<dependency org="commons-net"
|
||||
name="commons-net"
|
||||
rev="${commons-net.version}"
|
||||
conf="common->default"/>
|
||||
<dependency org="org.mortbay.jetty"
|
||||
name="jetty"
|
||||
rev="${jetty.version}"
|
||||
conf="common->master"/>
|
||||
<dependency org="org.mortbay.jetty"
|
||||
name="jetty-util"
|
||||
rev="${jetty-util.version}"
|
||||
conf="common->master"/>
|
||||
<dependency org="org.mortbay.jetty"
|
||||
name="jsp-api-2.1"
|
||||
rev="${jetty.version}"
|
||||
conf="common->master"/>
|
||||
<dependency org="org.mortbay.jetty"
|
||||
name="jsp-2.1"
|
||||
rev="${jetty.version}"
|
||||
conf="common->master"/>
|
||||
<dependency org="org.mortbay.jetty"
|
||||
name="servlet-api-2.5"
|
||||
rev="${servlet-api-2.5.version}"
|
||||
conf="common->master"/>
|
||||
<dependency org="commons-cli"
|
||||
name="commons-cli"
|
||||
rev="${commons-cli.version}"
|
||||
conf="common->default"/>
|
||||
<dependency org="org.apache.avro"
|
||||
name="avro"
|
||||
rev="${avro.version}"
|
||||
conf="common->default">
|
||||
<exclude module="ant"/>
|
||||
<exclude module="jetty"/>
|
||||
<exclude module="slf4j-simple"/>
|
||||
</dependency>
|
||||
<dependency org="org.codehaus.jackson"
|
||||
name="jackson-mapper-asl"
|
||||
rev="${jackson.version}"
|
||||
conf="common->default"/>
|
||||
<dependency org="org.codehaus.jackson"
|
||||
name="jackson-core-asl"
|
||||
rev="${jackson.version}"
|
||||
conf="common->default"/>
|
||||
<dependency org="com.thoughtworks.paranamer"
|
||||
name="paranamer"
|
||||
rev="${paranamer.version}"
|
||||
conf="common->default"/>
|
||||
|
||||
<!-- Exclusions for transitive dependencies pulled in by log4j -->
|
||||
<exclude org="com.sun.jdmk"/>
|
||||
<exclude org="com.sun.jmx"/>
|
||||
<exclude org="javax.jms"/>
|
||||
<exclude org="javax.mail"/>
|
||||
|
||||
</dependencies>
|
||||
</ivy-module>
|
|
@ -1,18 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#This properties file lists the versions of the various artifacts used by hadoop.
|
||||
#It drives ivy and the generation of a maven POM
|
||||
#These are the versions of our dependencies (in alphabetical order)
|
|
@ -257,11 +257,6 @@
|
|||
<artifactId>hadoop-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs-raid</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
|
|
Loading…
Reference in New Issue