Revert MAPREDUCE-3868. Reenable Raid.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1363572 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4c51dacd52
commit
370c65f282
|
@ -1,60 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!--
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<assembly>
|
|
||||||
<id>hadoop-raid-dist</id>
|
|
||||||
<formats>
|
|
||||||
<format>dir</format>
|
|
||||||
</formats>
|
|
||||||
<includeBaseDirectory>false</includeBaseDirectory>
|
|
||||||
<fileSets>
|
|
||||||
<!-- Configuration files -->
|
|
||||||
<fileSet>
|
|
||||||
<directory>${basedir}/src/main/conf</directory>
|
|
||||||
<outputDirectory>/etc/hadoop</outputDirectory>
|
|
||||||
<includes>
|
|
||||||
<include>*</include>
|
|
||||||
</includes>
|
|
||||||
</fileSet>
|
|
||||||
<fileSet>
|
|
||||||
<directory>${basedir}/src/main/sbin</directory>
|
|
||||||
<outputDirectory>/sbin</outputDirectory>
|
|
||||||
<includes>
|
|
||||||
<include>*</include>
|
|
||||||
</includes>
|
|
||||||
<fileMode>0755</fileMode>
|
|
||||||
</fileSet>
|
|
||||||
<fileSet>
|
|
||||||
<directory>${basedir}/src/main/libexec</directory>
|
|
||||||
<outputDirectory>/libexec</outputDirectory>
|
|
||||||
<includes>
|
|
||||||
<include>*</include>
|
|
||||||
</includes>
|
|
||||||
<fileMode>0755</fileMode>
|
|
||||||
</fileSet>
|
|
||||||
<!-- Documentation -->
|
|
||||||
<fileSet>
|
|
||||||
<directory>${project.build.directory}/site</directory>
|
|
||||||
<outputDirectory>/share/doc/hadoop/raid</outputDirectory>
|
|
||||||
</fileSet>
|
|
||||||
</fileSets>
|
|
||||||
<dependencySets>
|
|
||||||
<dependencySet>
|
|
||||||
<outputDirectory>/share/hadoop/${hadoop.component}/lib</outputDirectory>
|
|
||||||
<unpack>false</unpack>
|
|
||||||
<scope>runtime</scope>
|
|
||||||
<useProjectArtifact>true</useProjectArtifact>
|
|
||||||
</dependencySet>
|
|
||||||
</dependencySets>
|
|
||||||
</assembly>
|
|
|
@ -52,11 +52,6 @@
|
||||||
<artifactId>hadoop-yarn-api</artifactId>
|
<artifactId>hadoop-yarn-api</artifactId>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-hdfs-raid</artifactId>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
@ -125,7 +120,6 @@
|
||||||
run cp -r $ROOT/hadoop-common-project/hadoop-common/target/hadoop-common-${project.version}/* .
|
run cp -r $ROOT/hadoop-common-project/hadoop-common/target/hadoop-common-${project.version}/* .
|
||||||
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${project.version}/* .
|
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${project.version}/* .
|
||||||
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-httpfs/target/hadoop-hdfs-httpfs-${project.version}/* .
|
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-httpfs/target/hadoop-hdfs-httpfs-${project.version}/* .
|
||||||
run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-raid/target/hadoop-hdfs-raid-${project.version}/* .
|
|
||||||
run cp -r $ROOT/hadoop-mapreduce-project/target/hadoop-mapreduce-${project.version}/* .
|
run cp -r $ROOT/hadoop-mapreduce-project/target/hadoop-mapreduce-${project.version}/* .
|
||||||
run cp -r $ROOT/hadoop-tools/hadoop-tools-dist/target/hadoop-tools-dist-${project.version}/* .
|
run cp -r $ROOT/hadoop-tools/hadoop-tools-dist/target/hadoop-tools-dist-${project.version}/* .
|
||||||
echo
|
echo
|
||||||
|
|
|
@ -1,170 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!--
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
|
|
||||||
-->
|
|
||||||
<project>
|
|
||||||
<modelVersion>4.0.0</modelVersion>
|
|
||||||
<parent>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-project-dist</artifactId>
|
|
||||||
<version>3.0.0-SNAPSHOT</version>
|
|
||||||
<relativePath>../../hadoop-project-dist</relativePath>
|
|
||||||
</parent>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-hdfs-raid</artifactId>
|
|
||||||
<version>3.0.0-SNAPSHOT</version>
|
|
||||||
<packaging>jar</packaging>
|
|
||||||
|
|
||||||
<name>Apache Hadoop HDFS Raid</name>
|
|
||||||
<description>Apache Hadoop HDFS Raid</description>
|
|
||||||
|
|
||||||
|
|
||||||
<properties>
|
|
||||||
<hadoop.component>raid</hadoop.component>
|
|
||||||
<is.hadoop.component>false</is.hadoop.component>
|
|
||||||
</properties>
|
|
||||||
|
|
||||||
<dependencies>
|
|
||||||
<dependency>
|
|
||||||
<groupId>junit</groupId>
|
|
||||||
<artifactId>junit</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-annotations</artifactId>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-minicluster</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-client</artifactId>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-archives</artifactId>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
|
||||||
|
|
||||||
<build>
|
|
||||||
|
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<artifactId>maven-dependency-plugin</artifactId>
|
|
||||||
<executions>
|
|
||||||
<execution>
|
|
||||||
<id>create-mrapp-generated-classpath</id>
|
|
||||||
<phase>generate-test-resources</phase>
|
|
||||||
<goals>
|
|
||||||
<goal>build-classpath</goal>
|
|
||||||
</goals>
|
|
||||||
<configuration>
|
|
||||||
<!--
|
|
||||||
This is needed to run the unit tests. It generates the required classpath
|
|
||||||
that is required in the env of the launch container in the mini mr/yarn cluster.
|
|
||||||
-->
|
|
||||||
<outputFile>${project.build.directory}/test-classes/mrapp-generated-classpath</outputFile>
|
|
||||||
</configuration>
|
|
||||||
</execution>
|
|
||||||
</executions>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.rat</groupId>
|
|
||||||
<artifactId>apache-rat-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<excludes>
|
|
||||||
</excludes>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.codehaus.mojo</groupId>
|
|
||||||
<artifactId>findbugs-maven-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<excludeFilterFile combine.self="override"></excludeFilterFile>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
|
||||||
|
|
||||||
<profiles>
|
|
||||||
<profile>
|
|
||||||
<id>docs</id>
|
|
||||||
<activation>
|
|
||||||
<activeByDefault>false</activeByDefault>
|
|
||||||
</activation>
|
|
||||||
<build>
|
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-site-plugin</artifactId>
|
|
||||||
<executions>
|
|
||||||
<execution>
|
|
||||||
<id>docs</id>
|
|
||||||
<phase>prepare-package</phase>
|
|
||||||
<goals>
|
|
||||||
<goal>site</goal>
|
|
||||||
</goals>
|
|
||||||
</execution>
|
|
||||||
</executions>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
|
||||||
</profile>
|
|
||||||
|
|
||||||
<profile>
|
|
||||||
<id>dist</id>
|
|
||||||
<activation>
|
|
||||||
<activeByDefault>false</activeByDefault>
|
|
||||||
</activation>
|
|
||||||
<build>
|
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
|
||||||
<artifactId>maven-assembly-plugin</artifactId>
|
|
||||||
<dependencies>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-assemblies</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
|
||||||
<executions>
|
|
||||||
<execution>
|
|
||||||
<id>dist</id>
|
|
||||||
<phase>prepare-package</phase>
|
|
||||||
<goals>
|
|
||||||
<goal>single</goal>
|
|
||||||
</goals>
|
|
||||||
<configuration>
|
|
||||||
<finalName>${project.artifactId}-${project.version}</finalName>
|
|
||||||
<appendAssemblyId>false</appendAssemblyId>
|
|
||||||
<attach>false</attach>
|
|
||||||
<descriptorRefs>
|
|
||||||
<descriptorRef>hadoop-raid-dist</descriptorRef>
|
|
||||||
</descriptorRefs>
|
|
||||||
</configuration>
|
|
||||||
</execution>
|
|
||||||
</executions>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
|
||||||
</profile>
|
|
||||||
</profiles>
|
|
||||||
</project>
|
|
|
@ -1,58 +0,0 @@
|
||||||
<configuration>
|
|
||||||
<srcPath prefix="hdfs://dfs1.xxx.com:8000/user/dhruba/">
|
|
||||||
<policy name = "dhruba">
|
|
||||||
<property>
|
|
||||||
<name>srcReplication</name>
|
|
||||||
<value>3</value>
|
|
||||||
<description> pick files for RAID only if their replication factor is
|
|
||||||
greater than or equal to this value.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>targetReplication</name>
|
|
||||||
<value>2</value>
|
|
||||||
<description> after RAIDing, decrease the replication factor of a file to
|
|
||||||
this value.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>metaReplication</name>
|
|
||||||
<value>2</value>
|
|
||||||
<description> the replication factor of the RAID meta file
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>modTimePeriod</name>
|
|
||||||
<value>3600000</value>
|
|
||||||
<description> time (milliseconds) after a file is modified to make it a
|
|
||||||
candidate for RAIDing
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
</policy>
|
|
||||||
</srcPath>
|
|
||||||
<srcPath prefix="hdfs://dfs1.xxx.com:9000/warehouse/table1">
|
|
||||||
<policy name = "table1">
|
|
||||||
<property>
|
|
||||||
<name>targetReplication</name>
|
|
||||||
<value>1</value>
|
|
||||||
<description> after RAIDing, decrease the replication factor of a file to
|
|
||||||
this value.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>metaReplication</name>
|
|
||||||
<value>2</value>
|
|
||||||
<description> the replication factor of the RAID meta file
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>modTimePeriod</name>
|
|
||||||
<value>3600000</value>
|
|
||||||
<description> time (milliseconds) after a file is modified to make it a
|
|
||||||
candidate for RAIDing
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
</policy>
|
|
||||||
</srcPath>
|
|
||||||
</configuration>
|
|
||||||
|
|
|
@ -1,509 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.hdfs;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.text.DateFormat;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.ChecksumException;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FSInputStream;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.FilterFileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.raid.Decoder;
|
|
||||||
import org.apache.hadoop.raid.RaidNode;
|
|
||||||
import org.apache.hadoop.raid.ReedSolomonDecoder;
|
|
||||||
import org.apache.hadoop.raid.XORDecoder;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType;
|
|
||||||
import org.apache.hadoop.util.ReflectionUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is an implementation of the Hadoop RAID Filesystem. This FileSystem
|
|
||||||
* wraps an instance of the DistributedFileSystem.
|
|
||||||
* If a file is corrupted, this FileSystem uses the parity blocks to
|
|
||||||
* regenerate the bad block.
|
|
||||||
*/
|
|
||||||
|
|
||||||
public class DistributedRaidFileSystem extends FilterFileSystem {
|
|
||||||
|
|
||||||
// these are alternate locations that can be used for read-only access
|
|
||||||
DecodeInfo[] alternates;
|
|
||||||
Configuration conf;
|
|
||||||
int stripeLength;
|
|
||||||
|
|
||||||
DistributedRaidFileSystem() throws IOException {
|
|
||||||
}
|
|
||||||
|
|
||||||
DistributedRaidFileSystem(FileSystem fs) throws IOException {
|
|
||||||
super(fs);
|
|
||||||
alternates = null;
|
|
||||||
stripeLength = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Information required for decoding a source file
|
|
||||||
static private class DecodeInfo {
|
|
||||||
final Path destPath;
|
|
||||||
final ErasureCodeType type;
|
|
||||||
final Configuration conf;
|
|
||||||
final int stripeLength;
|
|
||||||
private DecodeInfo(Configuration conf, ErasureCodeType type, Path destPath) {
|
|
||||||
this.conf = conf;
|
|
||||||
this.type = type;
|
|
||||||
this.destPath = destPath;
|
|
||||||
this.stripeLength = RaidNode.getStripeLength(conf);
|
|
||||||
}
|
|
||||||
|
|
||||||
Decoder createDecoder() {
|
|
||||||
if (this.type == ErasureCodeType.XOR) {
|
|
||||||
return new XORDecoder(conf, stripeLength);
|
|
||||||
} else if (this.type == ErasureCodeType.RS) {
|
|
||||||
return new ReedSolomonDecoder(conf, stripeLength,
|
|
||||||
RaidNode.rsParityLength(conf));
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize a Raid FileSystem
|
|
||||||
*/
|
|
||||||
public void initialize(URI name, Configuration conf) throws IOException {
|
|
||||||
this.conf = conf;
|
|
||||||
|
|
||||||
Class<?> clazz = conf.getClass("fs.raid.underlyingfs.impl",
|
|
||||||
DistributedFileSystem.class);
|
|
||||||
if (clazz == null) {
|
|
||||||
throw new IOException("No FileSystem for fs.raid.underlyingfs.impl.");
|
|
||||||
}
|
|
||||||
|
|
||||||
this.fs = (FileSystem)ReflectionUtils.newInstance(clazz, null);
|
|
||||||
super.initialize(name, conf);
|
|
||||||
|
|
||||||
// find stripe length configured
|
|
||||||
stripeLength = RaidNode.getStripeLength(conf);
|
|
||||||
if (stripeLength == 0) {
|
|
||||||
LOG.info("dfs.raid.stripeLength is incorrectly defined to be " +
|
|
||||||
stripeLength + " Ignoring...");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Put XOR and RS in alternates
|
|
||||||
alternates= new DecodeInfo[2];
|
|
||||||
Path xorPath = RaidNode.xorDestinationPath(conf, fs);
|
|
||||||
alternates[0] = new DecodeInfo(conf, ErasureCodeType.XOR, xorPath);
|
|
||||||
Path rsPath = RaidNode.rsDestinationPath(conf, fs);
|
|
||||||
alternates[1] = new DecodeInfo(conf, ErasureCodeType.RS, rsPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns the underlying filesystem
|
|
||||||
*/
|
|
||||||
public FileSystem getFileSystem() throws IOException {
|
|
||||||
return fs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
|
|
||||||
ExtFSDataInputStream fd = new ExtFSDataInputStream(conf, this, alternates, f,
|
|
||||||
stripeLength, bufferSize);
|
|
||||||
return fd;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
if (fs != null) {
|
|
||||||
try {
|
|
||||||
fs.close();
|
|
||||||
} catch(IOException ie) {
|
|
||||||
//this might already be closed, ignore
|
|
||||||
}
|
|
||||||
}
|
|
||||||
super.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Layered filesystem input stream. This input stream tries reading
|
|
||||||
* from alternate locations if it encoumters read errors in the primary location.
|
|
||||||
*/
|
|
||||||
private static class ExtFSDataInputStream extends FSDataInputStream {
|
|
||||||
|
|
||||||
private static class UnderlyingBlock {
|
|
||||||
// File that holds this block. Need not be the same as outer file.
|
|
||||||
public Path path;
|
|
||||||
// Offset within path where this block starts.
|
|
||||||
public long actualFileOffset;
|
|
||||||
// Offset within the outer file where this block starts.
|
|
||||||
public long originalFileOffset;
|
|
||||||
// Length of the block (length <= blk sz of outer file).
|
|
||||||
public long length;
|
|
||||||
public UnderlyingBlock(Path path, long actualFileOffset,
|
|
||||||
long originalFileOffset, long length) {
|
|
||||||
this.path = path;
|
|
||||||
this.actualFileOffset = actualFileOffset;
|
|
||||||
this.originalFileOffset = originalFileOffset;
|
|
||||||
this.length = length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create an input stream that wraps all the reads/positions/seeking.
|
|
||||||
*/
|
|
||||||
private static class ExtFsInputStream extends FSInputStream {
|
|
||||||
|
|
||||||
// Extents of "good" underlying data that can be read.
|
|
||||||
private UnderlyingBlock[] underlyingBlocks;
|
|
||||||
private long currentOffset;
|
|
||||||
private FSDataInputStream currentStream;
|
|
||||||
private UnderlyingBlock currentBlock;
|
|
||||||
private byte[] oneBytebuff = new byte[1];
|
|
||||||
private int nextLocation;
|
|
||||||
private DistributedRaidFileSystem lfs;
|
|
||||||
private Path path;
|
|
||||||
private FileStatus stat;
|
|
||||||
private final DecodeInfo[] alternates;
|
|
||||||
private final int buffersize;
|
|
||||||
private final Configuration conf;
|
|
||||||
private final int stripeLength;
|
|
||||||
|
|
||||||
ExtFsInputStream(Configuration conf, DistributedRaidFileSystem lfs,
|
|
||||||
DecodeInfo[] alternates, Path path, int stripeLength, int buffersize)
|
|
||||||
throws IOException {
|
|
||||||
this.path = path;
|
|
||||||
this.nextLocation = 0;
|
|
||||||
// Construct array of blocks in file.
|
|
||||||
this.stat = lfs.getFileStatus(path);
|
|
||||||
long numBlocks = (this.stat.getLen() % this.stat.getBlockSize() == 0) ?
|
|
||||||
this.stat.getLen() / this.stat.getBlockSize() :
|
|
||||||
1 + this.stat.getLen() / this.stat.getBlockSize();
|
|
||||||
this.underlyingBlocks = new UnderlyingBlock[(int)numBlocks];
|
|
||||||
for (int i = 0; i < numBlocks; i++) {
|
|
||||||
long actualFileOffset = i * stat.getBlockSize();
|
|
||||||
long originalFileOffset = i * stat.getBlockSize();
|
|
||||||
long length = Math.min(
|
|
||||||
stat.getBlockSize(), stat.getLen() - originalFileOffset);
|
|
||||||
this.underlyingBlocks[i] = new UnderlyingBlock(
|
|
||||||
path, actualFileOffset, originalFileOffset, length);
|
|
||||||
}
|
|
||||||
this.currentOffset = 0;
|
|
||||||
this.currentBlock = null;
|
|
||||||
this.alternates = alternates;
|
|
||||||
this.buffersize = buffersize;
|
|
||||||
this.conf = conf;
|
|
||||||
this.lfs = lfs;
|
|
||||||
this.stripeLength = stripeLength;
|
|
||||||
// Open a stream to the first block.
|
|
||||||
openCurrentStream();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void closeCurrentStream() throws IOException {
|
|
||||||
if (currentStream != null) {
|
|
||||||
currentStream.close();
|
|
||||||
currentStream = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Open a stream to the file containing the current block
|
|
||||||
* and seek to the appropriate offset
|
|
||||||
*/
|
|
||||||
private void openCurrentStream() throws IOException {
|
|
||||||
int blockIdx = (int)(currentOffset/stat.getBlockSize());
|
|
||||||
UnderlyingBlock block = underlyingBlocks[blockIdx];
|
|
||||||
// If the current path is the same as we want.
|
|
||||||
if (currentBlock == block ||
|
|
||||||
currentBlock != null && currentBlock.path == block.path) {
|
|
||||||
// If we have a valid stream, nothing to do.
|
|
||||||
if (currentStream != null) {
|
|
||||||
currentBlock = block;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
closeCurrentStream();
|
|
||||||
}
|
|
||||||
currentBlock = block;
|
|
||||||
currentStream = lfs.fs.open(currentBlock.path, buffersize);
|
|
||||||
long offset = block.actualFileOffset +
|
|
||||||
(currentOffset - block.originalFileOffset);
|
|
||||||
currentStream.seek(offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the number of bytes available in the current block.
|
|
||||||
*/
|
|
||||||
private int blockAvailable() {
|
|
||||||
return (int) (currentBlock.length -
|
|
||||||
(currentOffset - currentBlock.originalFileOffset));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public synchronized int available() throws IOException {
|
|
||||||
// Application should not assume that any bytes are buffered here.
|
|
||||||
nextLocation = 0;
|
|
||||||
return Math.min(blockAvailable(), currentStream.available());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public synchronized void close() throws IOException {
|
|
||||||
closeCurrentStream();
|
|
||||||
super.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean markSupported() { return false; }
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void mark(int readLimit) {
|
|
||||||
// Mark and reset are not supported.
|
|
||||||
nextLocation = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void reset() throws IOException {
|
|
||||||
// Mark and reset are not supported.
|
|
||||||
nextLocation = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public synchronized int read() throws IOException {
|
|
||||||
int value = read(oneBytebuff);
|
|
||||||
if (value < 0) {
|
|
||||||
return value;
|
|
||||||
} else {
|
|
||||||
return oneBytebuff[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public synchronized int read(byte[] b) throws IOException {
|
|
||||||
int value = read(b, 0, b.length);
|
|
||||||
nextLocation = 0;
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public synchronized int read(byte[] b, int offset, int len)
|
|
||||||
throws IOException {
|
|
||||||
while (true) {
|
|
||||||
openCurrentStream();
|
|
||||||
try{
|
|
||||||
int limit = Math.min(blockAvailable(), len);
|
|
||||||
int value = currentStream.read(b, offset, limit);
|
|
||||||
currentOffset += value;
|
|
||||||
nextLocation = 0;
|
|
||||||
return value;
|
|
||||||
} catch (BlockMissingException e) {
|
|
||||||
setAlternateLocations(e, currentOffset);
|
|
||||||
} catch (ChecksumException e) {
|
|
||||||
setAlternateLocations(e, currentOffset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public synchronized int read(long position, byte[] b, int offset, int len)
|
|
||||||
throws IOException {
|
|
||||||
long oldPos = currentOffset;
|
|
||||||
seek(position);
|
|
||||||
try {
|
|
||||||
return read(b, offset, len);
|
|
||||||
} finally {
|
|
||||||
seek(oldPos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public synchronized long skip(long n) throws IOException {
|
|
||||||
long skipped = 0;
|
|
||||||
while (skipped < n) {
|
|
||||||
int val = read();
|
|
||||||
if (val < 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
skipped++;
|
|
||||||
}
|
|
||||||
nextLocation = 0;
|
|
||||||
return skipped;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public synchronized long getPos() throws IOException {
|
|
||||||
nextLocation = 0;
|
|
||||||
return currentOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public synchronized void seek(long pos) throws IOException {
|
|
||||||
if (pos != currentOffset) {
|
|
||||||
closeCurrentStream();
|
|
||||||
currentOffset = pos;
|
|
||||||
openCurrentStream();
|
|
||||||
}
|
|
||||||
nextLocation = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
|
||||||
seek(targetPos);
|
|
||||||
boolean value = currentStream.seekToNewSource(currentStream.getPos());
|
|
||||||
nextLocation = 0;
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* position readable again.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void readFully(long pos, byte[] b, int offset, int length)
|
|
||||||
throws IOException {
|
|
||||||
long oldPos = currentOffset;
|
|
||||||
seek(pos);
|
|
||||||
try {
|
|
||||||
while (true) {
|
|
||||||
// This loop retries reading until successful. Unrecoverable errors
|
|
||||||
// cause exceptions.
|
|
||||||
// currentOffset is changed by read().
|
|
||||||
try {
|
|
||||||
while (length > 0) {
|
|
||||||
int n = read(b, offset, length);
|
|
||||||
if (n < 0) {
|
|
||||||
throw new IOException("Premature EOF");
|
|
||||||
}
|
|
||||||
offset += n;
|
|
||||||
length -= n;
|
|
||||||
}
|
|
||||||
nextLocation = 0;
|
|
||||||
return;
|
|
||||||
} catch (BlockMissingException e) {
|
|
||||||
setAlternateLocations(e, currentOffset);
|
|
||||||
} catch (ChecksumException e) {
|
|
||||||
setAlternateLocations(e, currentOffset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
seek(oldPos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void readFully(long pos, byte[] b) throws IOException {
|
|
||||||
readFully(pos, b, 0, b.length);
|
|
||||||
nextLocation = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract good block from RAID
|
|
||||||
* @throws IOException if all alternate locations are exhausted
|
|
||||||
*/
|
|
||||||
private void setAlternateLocations(IOException curexp, long offset)
|
|
||||||
throws IOException {
|
|
||||||
while (alternates != null && nextLocation < alternates.length) {
|
|
||||||
try {
|
|
||||||
int idx = nextLocation++;
|
|
||||||
// Start offset of block.
|
|
||||||
long corruptOffset =
|
|
||||||
(offset / stat.getBlockSize()) * stat.getBlockSize();
|
|
||||||
// Make sure we use DFS and not DistributedRaidFileSystem for unRaid.
|
|
||||||
Configuration clientConf = new Configuration(conf);
|
|
||||||
Class<?> clazz = conf.getClass("fs.raid.underlyingfs.impl",
|
|
||||||
DistributedFileSystem.class);
|
|
||||||
clientConf.set("fs.hdfs.impl", clazz.getName());
|
|
||||||
// Disable caching so that a previously cached RaidDfs is not used.
|
|
||||||
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
|
|
||||||
Path npath = RaidNode.unRaidCorruptBlock(clientConf, path,
|
|
||||||
alternates[idx].destPath,
|
|
||||||
alternates[idx].createDecoder(),
|
|
||||||
stripeLength, corruptOffset);
|
|
||||||
if (npath == null)
|
|
||||||
continue;
|
|
||||||
try {
|
|
||||||
String outdir = conf.get("fs.raid.recoverylogdir");
|
|
||||||
if (outdir != null) {
|
|
||||||
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss");
|
|
||||||
java.util.Date date = new java.util.Date();
|
|
||||||
String fname = path.getName() + dateFormat.format(date) +
|
|
||||||
(new Random()).nextInt() + ".txt";
|
|
||||||
Path outputunraid = new Path(outdir, fname);
|
|
||||||
FileSystem fs = outputunraid.getFileSystem(conf);
|
|
||||||
FSDataOutputStream dout = fs.create(outputunraid);
|
|
||||||
PrintStream ps = new PrintStream(dout);
|
|
||||||
ps.println("Recovery attempt log");
|
|
||||||
ps.println("Source path : " + path );
|
|
||||||
ps.println("Alternate path : " + alternates[idx].destPath);
|
|
||||||
ps.println("Stripe lentgh : " + stripeLength);
|
|
||||||
ps.println("Corrupt offset : " + corruptOffset);
|
|
||||||
String output = (npath==null) ? "UNSUCCESSFUL" : npath.toString();
|
|
||||||
ps.println("Output from unRaid : " + output);
|
|
||||||
ps.close();
|
|
||||||
}
|
|
||||||
} catch (Exception exc) {
|
|
||||||
LOG.info("Error while creating recovery log: " + exc);
|
|
||||||
}
|
|
||||||
|
|
||||||
closeCurrentStream();
|
|
||||||
LOG.info("Using block at offset " + corruptOffset + " from " +
|
|
||||||
npath);
|
|
||||||
currentBlock.path = npath;
|
|
||||||
currentBlock.actualFileOffset = 0; // Single block in file.
|
|
||||||
// Dont change currentOffset, in case the user had done a seek?
|
|
||||||
openCurrentStream();
|
|
||||||
|
|
||||||
return;
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("Error in using alternate path " + path + ". " + e +
|
|
||||||
" Ignoring...");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw curexp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The name of the file system that is immediately below the
|
|
||||||
* DistributedRaidFileSystem. This is specified by the
|
|
||||||
* configuration parameter called fs.raid.underlyingfs.impl.
|
|
||||||
* If this parameter is not specified in the configuration, then
|
|
||||||
* the default class DistributedFileSystem is returned.
|
|
||||||
* @param conf the configuration object
|
|
||||||
* @return the filesystem object immediately below DistributedRaidFileSystem
|
|
||||||
* @throws IOException if all alternate locations are exhausted
|
|
||||||
*/
|
|
||||||
private FileSystem getUnderlyingFileSystem(Configuration conf) {
|
|
||||||
Class<?> clazz = conf.getClass("fs.raid.underlyingfs.impl", DistributedFileSystem.class);
|
|
||||||
FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf);
|
|
||||||
return fs;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* constructor for ext input stream.
|
|
||||||
* @param fs the underlying filesystem
|
|
||||||
* @param p the path in the underlying file system
|
|
||||||
* @param buffersize the size of IO
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public ExtFSDataInputStream(Configuration conf, DistributedRaidFileSystem lfs,
|
|
||||||
DecodeInfo[] alternates, Path p, int stripeLength, int buffersize) throws IOException {
|
|
||||||
super(new ExtFsInputStream(conf, lfs, alternates, p, stripeLength, buffersize));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,79 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.hdfs;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.RemoteIterator;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
|
||||||
import org.apache.hadoop.hdfs.tools.DFSck;
|
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
|
||||||
|
|
||||||
public abstract class RaidDFSUtil {
|
|
||||||
/**
|
|
||||||
* Returns the corrupt blocks in a file.
|
|
||||||
*/
|
|
||||||
public static List<LocatedBlock> corruptBlocksInFile(
|
|
||||||
DistributedFileSystem dfs, String path, long offset, long length)
|
|
||||||
throws IOException {
|
|
||||||
List<LocatedBlock> corrupt = new LinkedList<LocatedBlock>();
|
|
||||||
LocatedBlocks locatedBlocks =
|
|
||||||
getBlockLocations(dfs, path, offset, length);
|
|
||||||
for (LocatedBlock b: locatedBlocks.getLocatedBlocks()) {
|
|
||||||
if (b.isCorrupt() ||
|
|
||||||
(b.getLocations().length == 0 && b.getBlockSize() > 0)) {
|
|
||||||
corrupt.add(b);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return corrupt;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LocatedBlocks getBlockLocations(
|
|
||||||
DistributedFileSystem dfs, String path, long offset, long length)
|
|
||||||
throws IOException {
|
|
||||||
return dfs.getClient().namenode.getBlockLocations(path, offset, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Make successive calls to listCorruptFiles to obtain all
|
|
||||||
* corrupt files.
|
|
||||||
*/
|
|
||||||
public static String[] getCorruptFiles(DistributedFileSystem dfs)
|
|
||||||
throws IOException {
|
|
||||||
Set<String> corruptFiles = new HashSet<String>();
|
|
||||||
RemoteIterator<Path> cfb = dfs.listCorruptFileBlocks(new Path("/"));
|
|
||||||
while (cfb.hasNext()) {
|
|
||||||
corruptFiles.add(cfb.next().toUri().getPath());
|
|
||||||
}
|
|
||||||
|
|
||||||
return corruptFiles.toArray(new String[corruptFiles.size()]);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,632 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.hdfs.server.blockmanagement;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.Block;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.*;
|
|
||||||
import org.apache.hadoop.net.NetworkTopology;
|
|
||||||
import org.apache.hadoop.net.Node;
|
|
||||||
import org.apache.hadoop.raid.RaidNode;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This BlockPlacementPolicy spreads out the group of blocks which used by RAID
|
|
||||||
* for recovering each other. This is important for the availability
|
|
||||||
* of the blocks. This class can be used by multiple threads. It has to be
|
|
||||||
* thread safe.
|
|
||||||
*/
|
|
||||||
public class BlockPlacementPolicyRaid extends BlockPlacementPolicy {
|
|
||||||
public static final Log LOG =
|
|
||||||
LogFactory.getLog(BlockPlacementPolicyRaid.class);
|
|
||||||
Configuration conf;
|
|
||||||
private int stripeLength;
|
|
||||||
private int xorParityLength;
|
|
||||||
private int rsParityLength;
|
|
||||||
private String xorPrefix = null;
|
|
||||||
private String rsPrefix = null;
|
|
||||||
private String raidTempPrefix = null;
|
|
||||||
private String raidrsTempPrefix = null;
|
|
||||||
private String raidHarTempPrefix = null;
|
|
||||||
private String raidrsHarTempPrefix = null;
|
|
||||||
private FSNamesystem namesystem = null;
|
|
||||||
private BlockPlacementPolicyDefault defaultPolicy;
|
|
||||||
|
|
||||||
CachedLocatedBlocks cachedLocatedBlocks;
|
|
||||||
CachedFullPathNames cachedFullPathNames;
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public void initialize(Configuration conf, FSClusterStats stats,
|
|
||||||
NetworkTopology clusterMap) {
|
|
||||||
this.conf = conf;
|
|
||||||
this.stripeLength = RaidNode.getStripeLength(conf);
|
|
||||||
this.rsParityLength = RaidNode.rsParityLength(conf);
|
|
||||||
this.xorParityLength = 1;
|
|
||||||
try {
|
|
||||||
this.xorPrefix = RaidNode.xorDestinationPath(conf).toUri().getPath();
|
|
||||||
this.rsPrefix = RaidNode.rsDestinationPath(conf).toUri().getPath();
|
|
||||||
} catch (IOException e) {
|
|
||||||
}
|
|
||||||
if (this.xorPrefix == null) {
|
|
||||||
this.xorPrefix = RaidNode.DEFAULT_RAID_LOCATION;
|
|
||||||
}
|
|
||||||
if (this.rsPrefix == null) {
|
|
||||||
this.rsPrefix = RaidNode.DEFAULT_RAIDRS_LOCATION;
|
|
||||||
}
|
|
||||||
// Throws ClassCastException if we cannot cast here.
|
|
||||||
this.namesystem = (FSNamesystem) stats;
|
|
||||||
this.cachedLocatedBlocks = new CachedLocatedBlocks(namesystem);
|
|
||||||
this.cachedFullPathNames = new CachedFullPathNames(namesystem);
|
|
||||||
this.raidTempPrefix = RaidNode.xorTempPrefix(conf);
|
|
||||||
this.raidrsTempPrefix = RaidNode.rsTempPrefix(conf);
|
|
||||||
this.raidHarTempPrefix = RaidNode.xorHarTempPrefix(conf);
|
|
||||||
this.raidrsHarTempPrefix = RaidNode.rsHarTempPrefix(conf);
|
|
||||||
defaultPolicy = new BlockPlacementPolicyDefault(conf, stats, clusterMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas,
|
|
||||||
DatanodeDescriptor writer, List<DatanodeDescriptor> chosenNodes,
|
|
||||||
long blocksize) {
|
|
||||||
return chooseTarget(srcPath, numOfReplicas, writer, chosenNodes,
|
|
||||||
null, blocksize);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DatanodeDescriptor[] chooseTarget(String srcPath, int numOfReplicas,
|
|
||||||
DatanodeDescriptor writer, List<DatanodeDescriptor> chosenNodes,
|
|
||||||
boolean returnChosenNodes,
|
|
||||||
HashMap<Node, Node> excludedNodes, long blocksize) {
|
|
||||||
try {
|
|
||||||
FileType type = getFileType(srcPath);
|
|
||||||
if (type == FileType.NOT_RAID) {
|
|
||||||
return defaultPolicy.chooseTarget(
|
|
||||||
srcPath, numOfReplicas, writer, chosenNodes, blocksize);
|
|
||||||
}
|
|
||||||
if (excludedNodes == null) {
|
|
||||||
excludedNodes = new HashMap<Node, Node>();
|
|
||||||
}
|
|
||||||
addExcludedNodes(srcPath, type, excludedNodes);
|
|
||||||
DatanodeDescriptor[] result =
|
|
||||||
defaultPolicy.chooseTarget(numOfReplicas, writer,
|
|
||||||
chosenNodes, returnChosenNodes, excludedNodes, blocksize);
|
|
||||||
// Add the added block locations in the block locations cache.
|
|
||||||
// So the rest of the blocks know about these locations.
|
|
||||||
cachedLocatedBlocks.get(srcPath).
|
|
||||||
add(new LocatedBlock(new ExtendedBlock(), result));
|
|
||||||
return result;
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.debug("Error happend when choosing datanode to write:" +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
return defaultPolicy.chooseTarget(srcPath, numOfReplicas, writer,
|
|
||||||
chosenNodes, blocksize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int verifyBlockPlacement(String srcPath, LocatedBlock lBlk,
|
|
||||||
int minRacks) {
|
|
||||||
return defaultPolicy.verifyBlockPlacement(srcPath, lBlk, minRacks);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public DatanodeDescriptor chooseReplicaToDelete(BlockCollection bc,
|
|
||||||
Block block, short replicationFactor,
|
|
||||||
Collection<DatanodeDescriptor> first,
|
|
||||||
Collection<DatanodeDescriptor> second) {
|
|
||||||
|
|
||||||
DatanodeDescriptor chosenNode = null;
|
|
||||||
try {
|
|
||||||
String path = cachedFullPathNames.get(bc);
|
|
||||||
FileType type = getFileType(path);
|
|
||||||
if (type == FileType.NOT_RAID) {
|
|
||||||
return defaultPolicy.chooseReplicaToDelete(
|
|
||||||
bc, block, replicationFactor, first, second);
|
|
||||||
}
|
|
||||||
List<LocatedBlock> companionBlocks =
|
|
||||||
getCompanionBlocks(path, type, block);
|
|
||||||
if (companionBlocks == null || companionBlocks.size() == 0) {
|
|
||||||
// Use the default method if it is not a valid raided or parity file
|
|
||||||
return defaultPolicy.chooseReplicaToDelete(
|
|
||||||
bc, block, replicationFactor, first, second);
|
|
||||||
}
|
|
||||||
// Delete from the first collection first
|
|
||||||
// This ensures the number of unique rack of this block is not reduced
|
|
||||||
Collection<DatanodeDescriptor> all = new HashSet<DatanodeDescriptor>();
|
|
||||||
all.addAll(first);
|
|
||||||
all.addAll(second);
|
|
||||||
chosenNode = chooseReplicaToDelete(companionBlocks, all);
|
|
||||||
if (chosenNode != null) {
|
|
||||||
return chosenNode;
|
|
||||||
}
|
|
||||||
return defaultPolicy.chooseReplicaToDelete(
|
|
||||||
bc, block, replicationFactor, first, second);
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.debug("Error happend when choosing replica to delete" +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
return defaultPolicy.chooseReplicaToDelete(
|
|
||||||
bc, block, replicationFactor, first, second);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtain the excluded nodes for the current block that is being written
|
|
||||||
*/
|
|
||||||
void addExcludedNodes(String file, FileType type, HashMap<Node, Node> excluded)
|
|
||||||
throws IOException {
|
|
||||||
Collection<LocatedBlock> blocks = getCompanionBlocks(file, type, null);
|
|
||||||
if (blocks == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
for (LocatedBlock b : blocks) {
|
|
||||||
for (Node n : b.getLocations()) {
|
|
||||||
excluded.put(n, n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private DatanodeDescriptor chooseReplicaToDelete(
|
|
||||||
Collection<LocatedBlock> companionBlocks,
|
|
||||||
Collection<DatanodeDescriptor> dataNodes) throws IOException {
|
|
||||||
|
|
||||||
if (dataNodes.isEmpty()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
// Count the number of replicas on each node and rack
|
|
||||||
final Map<String, Integer> nodeCompanionBlockCount =
|
|
||||||
countCompanionBlocks(companionBlocks, false);
|
|
||||||
final Map<String, Integer> rackCompanionBlockCount =
|
|
||||||
countCompanionBlocks(companionBlocks, true);
|
|
||||||
|
|
||||||
NodeComparator comparator =
|
|
||||||
new NodeComparator(nodeCompanionBlockCount, rackCompanionBlockCount);
|
|
||||||
return Collections.max(dataNodes, comparator);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Count how many companion blocks are on each datanode or the each rack
|
|
||||||
* @param companionBlocks a collection of all the companion blocks
|
|
||||||
* @param doRackCount count the companion blocks on the racks of datanodes
|
|
||||||
* @param result the map from node name to the number of companion blocks
|
|
||||||
*/
|
|
||||||
static Map<String, Integer> countCompanionBlocks(
|
|
||||||
Collection<LocatedBlock> companionBlocks, boolean doRackCount) {
|
|
||||||
Map<String, Integer> result = new HashMap<String, Integer>();
|
|
||||||
for (LocatedBlock block : companionBlocks) {
|
|
||||||
for (DatanodeInfo d : block.getLocations()) {
|
|
||||||
String name = doRackCount ? d.getParent().getName() : d.getName();
|
|
||||||
if (result.containsKey(name)) {
|
|
||||||
int count = result.get(name) + 1;
|
|
||||||
result.put(name, count);
|
|
||||||
} else {
|
|
||||||
result.put(name, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compares the datanodes based on the number of companion blocks on the same
|
|
||||||
* node and rack. If even, compare the remaining space on the datanodes.
|
|
||||||
*/
|
|
||||||
class NodeComparator implements Comparator<DatanodeDescriptor> {
|
|
||||||
private Map<String, Integer> nodeBlockCount;
|
|
||||||
private Map<String, Integer> rackBlockCount;
|
|
||||||
private NodeComparator(Map<String, Integer> nodeBlockCount,
|
|
||||||
Map<String, Integer> rackBlockCount) {
|
|
||||||
this.nodeBlockCount = nodeBlockCount;
|
|
||||||
this.rackBlockCount = rackBlockCount;
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public int compare(DatanodeDescriptor d1, DatanodeDescriptor d2) {
|
|
||||||
int res = compareBlockCount(d1, d2, nodeBlockCount);
|
|
||||||
if (res != 0) {
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
res = compareBlockCount(d1.getParent(), d2.getParent(), rackBlockCount);
|
|
||||||
if (res != 0) {
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
if (d1.getRemaining() > d2.getRemaining()) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (d1.getRemaining() < d2.getRemaining()) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
private int compareBlockCount(Node node1, Node node2,
|
|
||||||
Map<String, Integer> blockCount) {
|
|
||||||
Integer count1 = blockCount.get(node1.getName());
|
|
||||||
Integer count2 = blockCount.get(node2.getName());
|
|
||||||
count1 = count1 == null ? 0 : count1;
|
|
||||||
count2 = count2 == null ? 0 : count2;
|
|
||||||
if (count1 > count2) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
if (count1 < count2) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtain the companion blocks of the give block
|
|
||||||
* Companion blocks are defined as the blocks that can help recover each
|
|
||||||
* others by using raid decoder.
|
|
||||||
* @param path The path of the file contains the block
|
|
||||||
* @param type The type of this file
|
|
||||||
* @param block The given block
|
|
||||||
* null if it is the block which is currently being written to
|
|
||||||
* @return the block locations of companion blocks
|
|
||||||
*/
|
|
||||||
List<LocatedBlock> getCompanionBlocks(String path, FileType type,
|
|
||||||
Block block) throws IOException {
|
|
||||||
switch (type) {
|
|
||||||
case NOT_RAID:
|
|
||||||
return new ArrayList<LocatedBlock>();
|
|
||||||
case XOR_HAR_TEMP_PARITY:
|
|
||||||
return getCompanionBlocksForHarParityBlock(
|
|
||||||
path, xorParityLength, block);
|
|
||||||
case RS_HAR_TEMP_PARITY:
|
|
||||||
return getCompanionBlocksForHarParityBlock(
|
|
||||||
path, rsParityLength, block);
|
|
||||||
case XOR_TEMP_PARITY:
|
|
||||||
return getCompanionBlocksForParityBlock(
|
|
||||||
getSourceFile(path, raidTempPrefix), path, xorParityLength, block);
|
|
||||||
case RS_TEMP_PARITY:
|
|
||||||
return getCompanionBlocksForParityBlock(
|
|
||||||
getSourceFile(path, raidrsTempPrefix), path, rsParityLength, block);
|
|
||||||
case XOR_PARITY:
|
|
||||||
return getCompanionBlocksForParityBlock(getSourceFile(path, xorPrefix),
|
|
||||||
path, xorParityLength, block);
|
|
||||||
case RS_PARITY:
|
|
||||||
return getCompanionBlocksForParityBlock(getSourceFile(path, rsPrefix),
|
|
||||||
path, rsParityLength, block);
|
|
||||||
case XOR_SOURCE:
|
|
||||||
return getCompanionBlocksForSourceBlock(
|
|
||||||
path, getParityFile(path), xorParityLength, block);
|
|
||||||
case RS_SOURCE:
|
|
||||||
return getCompanionBlocksForSourceBlock(
|
|
||||||
path, getParityFile(path), xorParityLength, block);
|
|
||||||
}
|
|
||||||
return new ArrayList<LocatedBlock>();
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<LocatedBlock> getCompanionBlocksForHarParityBlock(
|
|
||||||
String parity, int parityLength, Block block)
|
|
||||||
throws IOException {
|
|
||||||
int blockIndex = getBlockIndex(parity, block);
|
|
||||||
// consider only parity file in this case because source file block
|
|
||||||
// location is not easy to obtain
|
|
||||||
List<LocatedBlock> parityBlocks = cachedLocatedBlocks.get(parity);
|
|
||||||
List<LocatedBlock> result = new ArrayList<LocatedBlock>();
|
|
||||||
synchronized (parityBlocks) {
|
|
||||||
int start = Math.max(0, blockIndex - parityLength + 1);
|
|
||||||
int end = Math.min(parityBlocks.size(), blockIndex + parityLength);
|
|
||||||
result.addAll(parityBlocks.subList(start, end));
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<LocatedBlock> getCompanionBlocksForParityBlock(
|
|
||||||
String src, String parity, int parityLength, Block block)
|
|
||||||
throws IOException {
|
|
||||||
int blockIndex = getBlockIndex(parity, block);
|
|
||||||
List<LocatedBlock> result = new ArrayList<LocatedBlock>();
|
|
||||||
List<LocatedBlock> parityBlocks = cachedLocatedBlocks.get(parity);
|
|
||||||
int stripeIndex = blockIndex / parityLength;
|
|
||||||
synchronized (parityBlocks) {
|
|
||||||
int parityStart = stripeIndex * parityLength;
|
|
||||||
int parityEnd = Math.min(parityStart + parityLength,
|
|
||||||
parityBlocks.size());
|
|
||||||
// for parity, always consider the neighbor blocks as companion blocks
|
|
||||||
if (parityStart < parityBlocks.size()) {
|
|
||||||
result.addAll(parityBlocks.subList(parityStart, parityEnd));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (src == null) {
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
List<LocatedBlock> sourceBlocks = cachedLocatedBlocks.get(src);
|
|
||||||
synchronized (sourceBlocks) {
|
|
||||||
int sourceStart = stripeIndex * stripeLength;
|
|
||||||
int sourceEnd = Math.min(sourceStart + stripeLength,
|
|
||||||
sourceBlocks.size());
|
|
||||||
if (sourceStart < sourceBlocks.size()) {
|
|
||||||
result.addAll(sourceBlocks.subList(sourceStart, sourceEnd));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<LocatedBlock> getCompanionBlocksForSourceBlock(
|
|
||||||
String src, String parity, int parityLength, Block block)
|
|
||||||
throws IOException {
|
|
||||||
int blockIndex = getBlockIndex(src, block);
|
|
||||||
List<LocatedBlock> result = new ArrayList<LocatedBlock>();
|
|
||||||
List<LocatedBlock> sourceBlocks = cachedLocatedBlocks.get(src);
|
|
||||||
int stripeIndex = blockIndex / stripeLength;
|
|
||||||
synchronized (sourceBlocks) {
|
|
||||||
int sourceStart = stripeIndex * stripeLength;
|
|
||||||
int sourceEnd = Math.min(sourceStart + stripeLength,
|
|
||||||
sourceBlocks.size());
|
|
||||||
if (sourceStart < sourceBlocks.size()) {
|
|
||||||
result.addAll(sourceBlocks.subList(sourceStart, sourceEnd));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (parity == null) {
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
List<LocatedBlock> parityBlocks = cachedLocatedBlocks.get(parity);
|
|
||||||
synchronized (parityBlocks) {
|
|
||||||
int parityStart = stripeIndex * parityLength;
|
|
||||||
int parityEnd = Math.min(parityStart + parityLength,
|
|
||||||
parityBlocks.size());
|
|
||||||
if (parityStart < parityBlocks.size()) {
|
|
||||||
result.addAll(parityBlocks.subList(parityStart, parityEnd));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int getBlockIndex(String file, Block block) throws IOException {
|
|
||||||
List<LocatedBlock> blocks = cachedLocatedBlocks.get(file);
|
|
||||||
synchronized (blocks) {
|
|
||||||
// null indicates that this block is currently added. Return size()
|
|
||||||
// as the index in this case
|
|
||||||
if (block == null) {
|
|
||||||
return blocks.size();
|
|
||||||
}
|
|
||||||
for (int i = 0; i < blocks.size(); i++) {
|
|
||||||
if (blocks.get(i).getBlock().getLocalBlock().equals(block)) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw new IOException("Cannot locate " + block + " in file " + file);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Cache results for getFullPathName()
|
|
||||||
*/
|
|
||||||
static class CachedFullPathNames {
|
|
||||||
FSNamesystem namesystem;
|
|
||||||
CachedFullPathNames(FSNamesystem namesystem) {
|
|
||||||
this.namesystem = namesystem;
|
|
||||||
}
|
|
||||||
private Cache<INodeWithHashCode, String> cacheInternal =
|
|
||||||
new Cache<INodeWithHashCode, String>() {
|
|
||||||
@Override
|
|
||||||
public String getDirectly(INodeWithHashCode inode) throws IOException {
|
|
||||||
namesystem.readLock();
|
|
||||||
try {
|
|
||||||
return inode.getFullPathName();
|
|
||||||
} finally {
|
|
||||||
namesystem.readUnlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static private class INodeWithHashCode {
|
|
||||||
BlockCollection bc;
|
|
||||||
INodeWithHashCode(BlockCollection bc) {
|
|
||||||
this.bc= bc;
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
return bc== obj;
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return System.identityHashCode(bc);
|
|
||||||
}
|
|
||||||
String getFullPathName() {
|
|
||||||
return bc.getName();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String get(BlockCollection bc) throws IOException {
|
|
||||||
return cacheInternal.get(new INodeWithHashCode(bc));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Cache results for FSNamesystem.getBlockLocations()
|
|
||||||
*/
|
|
||||||
static class CachedLocatedBlocks extends Cache<String, List<LocatedBlock>> {
|
|
||||||
FSNamesystem namesystem;
|
|
||||||
CachedLocatedBlocks(FSNamesystem namesystem) {
|
|
||||||
this.namesystem = namesystem;
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public List<LocatedBlock> getDirectly(String file) throws IOException {
|
|
||||||
long len = NameNodeRaidUtil.getFileInfo(namesystem, file, true).getLen();
|
|
||||||
List<LocatedBlock> result = NameNodeRaidUtil.getBlockLocations(namesystem,
|
|
||||||
file, 0L, len, false, false).getLocatedBlocks();
|
|
||||||
if (result == null || result.isEmpty()) {
|
|
||||||
result = new ArrayList<LocatedBlock>();
|
|
||||||
}
|
|
||||||
return Collections.synchronizedList(result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static abstract class Cache<K, V> {
|
|
||||||
private Map<K, ValueWithTime> cache;
|
|
||||||
private static final long CACHE_TIMEOUT = 300000L; // 5 minutes
|
|
||||||
// The timeout is long but the consequence of stale value is not serious
|
|
||||||
Cache() {
|
|
||||||
Map<K, ValueWithTime> map = new LinkedHashMap<K, ValueWithTime>() {
|
|
||||||
private static final long serialVersionUID = 1L;
|
|
||||||
final private int MAX_ENTRIES = 50000;
|
|
||||||
@Override
|
|
||||||
protected boolean removeEldestEntry(
|
|
||||||
Map.Entry<K, ValueWithTime> eldest) {
|
|
||||||
return size() > MAX_ENTRIES;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
this.cache = Collections.synchronizedMap(map);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note that this method may hold FSNamesystem.readLock() and it may
|
|
||||||
// be called inside FSNamesystem.writeLock(). If we make this method
|
|
||||||
// synchronized, it will deadlock.
|
|
||||||
abstract protected V getDirectly(K key) throws IOException;
|
|
||||||
|
|
||||||
public V get(K key) throws IOException {
|
|
||||||
// The method is not synchronized so we may get some stale value here but
|
|
||||||
// it's OK.
|
|
||||||
ValueWithTime result = cache.get(key);
|
|
||||||
long now = Time.now();
|
|
||||||
if (result != null &&
|
|
||||||
now - result.cachedTime < CACHE_TIMEOUT) {
|
|
||||||
return result.value;
|
|
||||||
}
|
|
||||||
result = new ValueWithTime();
|
|
||||||
result.value = getDirectly(key);
|
|
||||||
result.cachedTime = now;
|
|
||||||
cache.put(key, result);
|
|
||||||
return result.value;
|
|
||||||
}
|
|
||||||
private class ValueWithTime {
|
|
||||||
V value = null;
|
|
||||||
long cachedTime = 0L;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get path for the corresponding source file for a valid parity
|
|
||||||
* file. Returns null if it does not exists
|
|
||||||
* @param parity the toUri path of the parity file
|
|
||||||
* @return the toUri path of the source file
|
|
||||||
*/
|
|
||||||
String getSourceFile(String parity, String prefix) throws IOException {
|
|
||||||
if (isHarFile(parity)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
// remove the prefix
|
|
||||||
String src = parity.substring(prefix.length());
|
|
||||||
if (NameNodeRaidUtil.getFileInfo(namesystem, src, true) == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return src;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get path for the corresponding parity file for a source file.
|
|
||||||
* Returns null if it does not exists
|
|
||||||
* @param src the toUri path of the source file
|
|
||||||
* @return the toUri path of the parity file
|
|
||||||
*/
|
|
||||||
String getParityFile(String src) throws IOException {
|
|
||||||
String xorParity = getParityFile(xorPrefix, src);
|
|
||||||
if (xorParity != null) {
|
|
||||||
return xorParity;
|
|
||||||
}
|
|
||||||
String rsParity = getParityFile(rsPrefix, src);
|
|
||||||
if (rsParity != null) {
|
|
||||||
return rsParity;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get path for the parity file. Returns null if it does not exists
|
|
||||||
* @param parityPrefix usuall "/raid/" or "/raidrs/"
|
|
||||||
* @return the toUri path of the parity file
|
|
||||||
*/
|
|
||||||
private String getParityFile(String parityPrefix, String src)
|
|
||||||
throws IOException {
|
|
||||||
String parity = parityPrefix + src;
|
|
||||||
if (NameNodeRaidUtil.getFileInfo(namesystem, parity, true) == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return parity;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isHarFile(String path) {
|
|
||||||
return path.lastIndexOf(RaidNode.HAR_SUFFIX) != -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
enum FileType {
|
|
||||||
NOT_RAID,
|
|
||||||
XOR_HAR_TEMP_PARITY,
|
|
||||||
XOR_TEMP_PARITY,
|
|
||||||
XOR_PARITY,
|
|
||||||
XOR_SOURCE,
|
|
||||||
RS_HAR_TEMP_PARITY,
|
|
||||||
RS_TEMP_PARITY,
|
|
||||||
RS_PARITY,
|
|
||||||
RS_SOURCE,
|
|
||||||
}
|
|
||||||
|
|
||||||
FileType getFileType(String path) throws IOException {
|
|
||||||
if (path.startsWith(raidHarTempPrefix + Path.SEPARATOR)) {
|
|
||||||
return FileType.XOR_HAR_TEMP_PARITY;
|
|
||||||
}
|
|
||||||
if (path.startsWith(raidrsHarTempPrefix + Path.SEPARATOR)) {
|
|
||||||
return FileType.RS_HAR_TEMP_PARITY;
|
|
||||||
}
|
|
||||||
if (path.startsWith(raidTempPrefix + Path.SEPARATOR)) {
|
|
||||||
return FileType.XOR_TEMP_PARITY;
|
|
||||||
}
|
|
||||||
if (path.startsWith(raidrsTempPrefix + Path.SEPARATOR)) {
|
|
||||||
return FileType.RS_TEMP_PARITY;
|
|
||||||
}
|
|
||||||
if (path.startsWith(xorPrefix + Path.SEPARATOR)) {
|
|
||||||
return FileType.XOR_PARITY;
|
|
||||||
}
|
|
||||||
if (path.startsWith(rsPrefix + Path.SEPARATOR)) {
|
|
||||||
return FileType.RS_PARITY;
|
|
||||||
}
|
|
||||||
String parity = getParityFile(path);
|
|
||||||
if (parity == null) {
|
|
||||||
return FileType.NOT_RAID;
|
|
||||||
}
|
|
||||||
if (parity.startsWith(xorPrefix + Path.SEPARATOR)) {
|
|
||||||
return FileType.XOR_SOURCE;
|
|
||||||
}
|
|
||||||
if (parity.startsWith(rsPrefix + Path.SEPARATOR)) {
|
|
||||||
return FileType.RS_SOURCE;
|
|
||||||
}
|
|
||||||
return FileType.NOT_RAID;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,505 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.hdfs.server.datanode;
|
|
||||||
|
|
||||||
import java.io.DataInputStream;
|
|
||||||
import java.io.DataOutputStream;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.net.SocketException;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.channels.FileChannel;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.hadoop.fs.ChecksumException;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
|
|
||||||
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
|
|
||||||
import org.apache.hadoop.io.IOUtils;
|
|
||||||
import org.apache.hadoop.io.nativeio.NativeIO;
|
|
||||||
import org.apache.hadoop.net.SocketOutputStream;
|
|
||||||
import org.apache.hadoop.util.DataChecksum;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a block from the disk and sends it to a recipient.
|
|
||||||
*/
|
|
||||||
public class RaidBlockSender implements java.io.Closeable {
|
|
||||||
public static final Log LOG = DataNode.LOG;
|
|
||||||
static final Log ClientTraceLog = DataNode.ClientTraceLog;
|
|
||||||
|
|
||||||
private ExtendedBlock block; // the block to read from
|
|
||||||
|
|
||||||
/** The visible length of a replica. */
|
|
||||||
private final long replicaVisibleLength;
|
|
||||||
|
|
||||||
private InputStream blockIn; // data stream
|
|
||||||
private long blockInPosition = -1; // updated while using transferTo().
|
|
||||||
private DataInputStream checksumIn; // checksum datastream
|
|
||||||
private DataChecksum checksum; // checksum stream
|
|
||||||
private long offset; // starting position to read
|
|
||||||
/** Initial position to read */
|
|
||||||
private long initialOffset;
|
|
||||||
private long endOffset; // ending position
|
|
||||||
private int chunkSize; // chunk size
|
|
||||||
private int checksumSize; // checksum size
|
|
||||||
private boolean corruptChecksumOk; // if need to verify checksum
|
|
||||||
private boolean chunkOffsetOK; // if need to send chunk offset
|
|
||||||
private long seqno; // sequence number of packet
|
|
||||||
|
|
||||||
private boolean transferToAllowed = true;
|
|
||||||
private boolean blockReadFully; //set when the whole block is read
|
|
||||||
private boolean verifyChecksum; //if true, check is verified while reading
|
|
||||||
private final String clientTraceFmt; // format of client trace log message
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Minimum buffer used while sending data to clients. Used only if
|
|
||||||
* transferTo() is enabled. 64KB is not that large. It could be larger, but
|
|
||||||
* not sure if there will be much more improvement.
|
|
||||||
*/
|
|
||||||
private static final int MIN_BUFFER_WITH_TRANSFERTO = 64*1024;
|
|
||||||
private static final int TRANSFERTO_BUFFER_SIZE = Math.max(
|
|
||||||
HdfsConstants.IO_FILE_BUFFER_SIZE, MIN_BUFFER_WITH_TRANSFERTO);
|
|
||||||
private volatile ChunkChecksum lastChunkChecksum = null;
|
|
||||||
|
|
||||||
|
|
||||||
public RaidBlockSender(ExtendedBlock block, long blockLength, long startOffset, long length,
|
|
||||||
boolean corruptChecksumOk, boolean chunkOffsetOK,
|
|
||||||
boolean verifyChecksum, boolean transferToAllowed,
|
|
||||||
DataInputStream metadataIn, InputStreamFactory streamFactory
|
|
||||||
) throws IOException {
|
|
||||||
this(block, blockLength, startOffset, length,
|
|
||||||
corruptChecksumOk, chunkOffsetOK,
|
|
||||||
verifyChecksum, transferToAllowed,
|
|
||||||
metadataIn, streamFactory, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public RaidBlockSender(ExtendedBlock block, long blockLength, long startOffset, long length,
|
|
||||||
boolean corruptChecksumOk, boolean chunkOffsetOK,
|
|
||||||
boolean verifyChecksum, boolean transferToAllowed,
|
|
||||||
DataInputStream metadataIn, InputStreamFactory streamFactory,
|
|
||||||
String clientTraceFmt) throws IOException {
|
|
||||||
try {
|
|
||||||
this.block = block;
|
|
||||||
this.chunkOffsetOK = chunkOffsetOK;
|
|
||||||
this.corruptChecksumOk = corruptChecksumOk;
|
|
||||||
this.verifyChecksum = verifyChecksum;
|
|
||||||
this.replicaVisibleLength = blockLength;
|
|
||||||
this.transferToAllowed = transferToAllowed;
|
|
||||||
this.clientTraceFmt = clientTraceFmt;
|
|
||||||
|
|
||||||
if ( !corruptChecksumOk || metadataIn != null) {
|
|
||||||
this.checksumIn = metadataIn;
|
|
||||||
|
|
||||||
// read and handle the common header here. For now just a version
|
|
||||||
BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn);
|
|
||||||
short version = header.getVersion();
|
|
||||||
|
|
||||||
if (version != BlockMetadataHeader.VERSION) {
|
|
||||||
LOG.warn("Wrong version (" + version + ") for metadata file for "
|
|
||||||
+ block + " ignoring ...");
|
|
||||||
}
|
|
||||||
checksum = header.getChecksum();
|
|
||||||
} else {
|
|
||||||
LOG.warn("Could not find metadata file for " + block);
|
|
||||||
// This only decides the buffer size. Use BUFFER_SIZE?
|
|
||||||
checksum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_NULL,
|
|
||||||
16 * 1024);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If bytesPerChecksum is very large, then the metadata file
|
|
||||||
* is mostly corrupted. For now just truncate bytesPerchecksum to
|
|
||||||
* blockLength.
|
|
||||||
*/
|
|
||||||
int size = checksum.getBytesPerChecksum();
|
|
||||||
if (size > 10*1024*1024 && size > replicaVisibleLength) {
|
|
||||||
checksum = DataChecksum.newDataChecksum(checksum.getChecksumType(),
|
|
||||||
Math.max((int)replicaVisibleLength, 10*1024*1024));
|
|
||||||
size = checksum.getBytesPerChecksum();
|
|
||||||
}
|
|
||||||
chunkSize = size;
|
|
||||||
checksumSize = checksum.getChecksumSize();
|
|
||||||
|
|
||||||
if (length < 0) {
|
|
||||||
length = replicaVisibleLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
endOffset = blockLength;
|
|
||||||
|
|
||||||
if (startOffset < 0 || startOffset > endOffset
|
|
||||||
|| (length + startOffset) > endOffset) {
|
|
||||||
String msg = " Offset " + startOffset + " and length " + length
|
|
||||||
+ " don't match block " + block + " ( blockLen " + endOffset + " )";
|
|
||||||
LOG.warn("sendBlock() : " + msg);
|
|
||||||
throw new IOException(msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
offset = (startOffset - (startOffset % chunkSize));
|
|
||||||
if (length >= 0) {
|
|
||||||
// Make sure endOffset points to end of a checksumed chunk.
|
|
||||||
long tmpLen = startOffset + length;
|
|
||||||
if (tmpLen % chunkSize != 0) {
|
|
||||||
tmpLen += (chunkSize - tmpLen % chunkSize);
|
|
||||||
}
|
|
||||||
if (tmpLen < endOffset) {
|
|
||||||
// will use on-disk checksum here since the end is a stable chunk
|
|
||||||
endOffset = tmpLen;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// seek to the right offsets
|
|
||||||
if (offset > 0) {
|
|
||||||
long checksumSkip = (offset / chunkSize) * checksumSize;
|
|
||||||
// note blockInStream is seeked when created below
|
|
||||||
if (checksumSkip > 0) {
|
|
||||||
// Should we use seek() for checksum file as well?
|
|
||||||
IOUtils.skipFully(checksumIn, checksumSkip);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
seqno = 0;
|
|
||||||
|
|
||||||
blockIn = streamFactory.createStream(offset);
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
IOUtils.closeStream(this);
|
|
||||||
IOUtils.closeStream(blockIn);
|
|
||||||
throw ioe;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* close opened files.
|
|
||||||
*/
|
|
||||||
public void close() throws IOException {
|
|
||||||
IOException ioe = null;
|
|
||||||
// close checksum file
|
|
||||||
if(checksumIn!=null) {
|
|
||||||
try {
|
|
||||||
checksumIn.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
ioe = e;
|
|
||||||
}
|
|
||||||
checksumIn = null;
|
|
||||||
}
|
|
||||||
// close data file
|
|
||||||
if(blockIn!=null) {
|
|
||||||
try {
|
|
||||||
blockIn.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
ioe = e;
|
|
||||||
}
|
|
||||||
blockIn = null;
|
|
||||||
}
|
|
||||||
// throw IOException if there is any
|
|
||||||
if(ioe!= null) {
|
|
||||||
throw ioe;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Converts an IOExcpetion (not subclasses) to SocketException.
|
|
||||||
* This is typically done to indicate to upper layers that the error
|
|
||||||
* was a socket error rather than often more serious exceptions like
|
|
||||||
* disk errors.
|
|
||||||
*/
|
|
||||||
private static IOException ioeToSocketException(IOException ioe) {
|
|
||||||
if (ioe.getClass().equals(IOException.class)) {
|
|
||||||
// "se" could be a new class in stead of SocketException.
|
|
||||||
IOException se = new SocketException("Original Exception : " + ioe);
|
|
||||||
se.initCause(ioe);
|
|
||||||
/* Change the stacktrace so that original trace is not truncated
|
|
||||||
* when printed.*/
|
|
||||||
se.setStackTrace(ioe.getStackTrace());
|
|
||||||
return se;
|
|
||||||
}
|
|
||||||
// otherwise just return the same exception.
|
|
||||||
return ioe;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param datalen Length of data
|
|
||||||
* @return number of chunks for data of given size
|
|
||||||
*/
|
|
||||||
private int numberOfChunks(long datalen) {
|
|
||||||
return (int) ((datalen + chunkSize - 1)/chunkSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Write packet header into {@code pkt}
|
|
||||||
*/
|
|
||||||
private void writePacketHeader(ByteBuffer pkt, int dataLen, int packetLen) {
|
|
||||||
pkt.clear();
|
|
||||||
PacketHeader header = new PacketHeader(packetLen, offset, seqno,
|
|
||||||
(dataLen == 0), dataLen, false);
|
|
||||||
header.putInBuffer(pkt);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Read checksum into given buffer
|
|
||||||
* @param buf buffer to read the checksum into
|
|
||||||
* @param checksumOffset offset at which to write the checksum into buf
|
|
||||||
* @param checksumLen length of checksum to write
|
|
||||||
* @throws IOException on error
|
|
||||||
*/
|
|
||||||
private void readChecksum(byte[] buf, final int checksumOffset,
|
|
||||||
final int checksumLen) throws IOException {
|
|
||||||
if (checksumSize <= 0 && checksumIn == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
checksumIn.readFully(buf, checksumOffset, checksumLen);
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOG.warn(" Could not read or failed to veirfy checksum for data"
|
|
||||||
+ " at offset " + offset + " for block " + block, e);
|
|
||||||
IOUtils.closeStream(checksumIn);
|
|
||||||
checksumIn = null;
|
|
||||||
if (corruptChecksumOk) {
|
|
||||||
if (checksumOffset < checksumLen) {
|
|
||||||
// Just fill the array with zeros.
|
|
||||||
Arrays.fill(buf, checksumOffset, checksumLen, (byte) 0);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sends a packet with up to maxChunks chunks of data.
|
|
||||||
*
|
|
||||||
* @param pkt buffer used for writing packet data
|
|
||||||
* @param maxChunks maximum number of chunks to send
|
|
||||||
* @param out stream to send data to
|
|
||||||
* @param transferTo use transferTo to send data
|
|
||||||
* @param throttler used for throttling data transfer bandwidth
|
|
||||||
*/
|
|
||||||
private int sendPacket(ByteBuffer pkt, int maxChunks, OutputStream out,
|
|
||||||
boolean transferTo, DataTransferThrottler throttler) throws IOException {
|
|
||||||
int dataLen = (int) Math.min(endOffset - offset,
|
|
||||||
(chunkSize * (long) maxChunks));
|
|
||||||
|
|
||||||
int numChunks = numberOfChunks(dataLen); // Number of chunks be sent in the packet
|
|
||||||
int checksumDataLen = numChunks * checksumSize;
|
|
||||||
int packetLen = dataLen + checksumDataLen + 4;
|
|
||||||
boolean lastDataPacket = offset + dataLen == endOffset && dataLen > 0;
|
|
||||||
|
|
||||||
writePacketHeader(pkt, dataLen, packetLen);
|
|
||||||
|
|
||||||
int checksumOff = pkt.position();
|
|
||||||
byte[] buf = pkt.array();
|
|
||||||
|
|
||||||
if (checksumSize > 0 && checksumIn != null) {
|
|
||||||
readChecksum(buf, checksumOff, checksumDataLen);
|
|
||||||
|
|
||||||
// write in progress that we need to use to get last checksum
|
|
||||||
if (lastDataPacket && lastChunkChecksum != null) {
|
|
||||||
int start = checksumOff + checksumDataLen - checksumSize;
|
|
||||||
byte[] updatedChecksum = lastChunkChecksum.getChecksum();
|
|
||||||
|
|
||||||
if (updatedChecksum != null) {
|
|
||||||
System.arraycopy(updatedChecksum, 0, buf, start, checksumSize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int dataOff = checksumOff + checksumDataLen;
|
|
||||||
if (!transferTo) { // normal transfer
|
|
||||||
IOUtils.readFully(blockIn, buf, dataOff, dataLen);
|
|
||||||
|
|
||||||
if (verifyChecksum) {
|
|
||||||
verifyChecksum(buf, dataOff, dataLen, numChunks, checksumOff);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (transferTo) {
|
|
||||||
SocketOutputStream sockOut = (SocketOutputStream)out;
|
|
||||||
sockOut.write(buf, 0, dataOff); // First write checksum
|
|
||||||
|
|
||||||
// no need to flush. since we know out is not a buffered stream.
|
|
||||||
sockOut.transferToFully(((FileInputStream)blockIn).getChannel(),
|
|
||||||
blockInPosition, dataLen);
|
|
||||||
blockInPosition += dataLen;
|
|
||||||
} else {
|
|
||||||
// normal transfer
|
|
||||||
out.write(buf, 0, dataOff + dataLen);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
/* Exception while writing to the client. Connection closure from
|
|
||||||
* the other end is mostly the case and we do not care much about
|
|
||||||
* it. But other things can go wrong, especially in transferTo(),
|
|
||||||
* which we do not want to ignore.
|
|
||||||
*
|
|
||||||
* The message parsing below should not be considered as a good
|
|
||||||
* coding example. NEVER do it to drive a program logic. NEVER.
|
|
||||||
* It was done here because the NIO throws an IOException for EPIPE.
|
|
||||||
*/
|
|
||||||
String ioem = e.getMessage();
|
|
||||||
if (!ioem.startsWith("Broken pipe") && !ioem.startsWith("Connection reset")) {
|
|
||||||
LOG.error("BlockSender.sendChunks() exception: ", e);
|
|
||||||
}
|
|
||||||
throw ioeToSocketException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (throttler != null) { // rebalancing so throttle
|
|
||||||
throttler.throttle(packetLen);
|
|
||||||
}
|
|
||||||
|
|
||||||
return dataLen;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute checksum for chunks and verify the checksum that is read from
|
|
||||||
* the metadata file is correct.
|
|
||||||
*
|
|
||||||
* @param buf buffer that has checksum and data
|
|
||||||
* @param dataOffset position where data is written in the buf
|
|
||||||
* @param datalen length of data
|
|
||||||
* @param numChunks number of chunks corresponding to data
|
|
||||||
* @param checksumOffset offset where checksum is written in the buf
|
|
||||||
* @throws ChecksumException on failed checksum verification
|
|
||||||
*/
|
|
||||||
public void verifyChecksum(final byte[] buf, final int dataOffset,
|
|
||||||
final int datalen, final int numChunks, final int checksumOffset)
|
|
||||||
throws ChecksumException {
|
|
||||||
int dOff = dataOffset;
|
|
||||||
int cOff = checksumOffset;
|
|
||||||
int dLeft = datalen;
|
|
||||||
|
|
||||||
for (int i = 0; i < numChunks; i++) {
|
|
||||||
checksum.reset();
|
|
||||||
int dLen = Math.min(dLeft, chunkSize);
|
|
||||||
checksum.update(buf, dOff, dLen);
|
|
||||||
if (!checksum.compare(buf, cOff)) {
|
|
||||||
long failedPos = offset + datalen - dLeft;
|
|
||||||
throw new ChecksumException("Checksum failed at " + failedPos,
|
|
||||||
failedPos);
|
|
||||||
}
|
|
||||||
dLeft -= dLen;
|
|
||||||
dOff += dLen;
|
|
||||||
cOff += checksumSize;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* sendBlock() is used to read block and its metadata and stream the data to
|
|
||||||
* either a client or to another datanode.
|
|
||||||
*
|
|
||||||
* @param out stream to which the block is written to
|
|
||||||
* @param baseStream optional. if non-null, <code>out</code> is assumed to
|
|
||||||
* be a wrapper over this stream. This enables optimizations for
|
|
||||||
* sending the data, e.g.
|
|
||||||
* {@link SocketOutputStream#transferToFully(FileChannel,
|
|
||||||
* long, int)}.
|
|
||||||
* @return total bytes reads, including crc.
|
|
||||||
*/
|
|
||||||
public long sendBlock(DataOutputStream out, OutputStream baseStream)
|
|
||||||
throws IOException {
|
|
||||||
if (out == null) {
|
|
||||||
throw new IOException( "out stream is null" );
|
|
||||||
}
|
|
||||||
initialOffset = offset;
|
|
||||||
long totalRead = 0;
|
|
||||||
OutputStream streamForSendChunks = out;
|
|
||||||
|
|
||||||
final long startTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0;
|
|
||||||
try {
|
|
||||||
int maxChunksPerPacket;
|
|
||||||
int pktSize = PacketHeader.PKT_HEADER_LEN;
|
|
||||||
boolean transferTo = transferToAllowed && !verifyChecksum
|
|
||||||
&& baseStream instanceof SocketOutputStream
|
|
||||||
&& blockIn instanceof FileInputStream;
|
|
||||||
if (transferTo) {
|
|
||||||
FileChannel fileChannel = ((FileInputStream)blockIn).getChannel();
|
|
||||||
blockInPosition = fileChannel.position();
|
|
||||||
streamForSendChunks = baseStream;
|
|
||||||
maxChunksPerPacket = numberOfChunks(TRANSFERTO_BUFFER_SIZE);
|
|
||||||
|
|
||||||
// Smaller packet size to only hold checksum when doing transferTo
|
|
||||||
pktSize += checksumSize * maxChunksPerPacket;
|
|
||||||
} else {
|
|
||||||
maxChunksPerPacket = Math.max(1,
|
|
||||||
numberOfChunks(HdfsConstants.IO_FILE_BUFFER_SIZE));
|
|
||||||
// Packet size includes both checksum and data
|
|
||||||
pktSize += (chunkSize + checksumSize) * maxChunksPerPacket;
|
|
||||||
}
|
|
||||||
|
|
||||||
ByteBuffer pktBuf = ByteBuffer.allocate(pktSize);
|
|
||||||
|
|
||||||
while (endOffset > offset) {
|
|
||||||
long len = sendPacket(pktBuf, maxChunksPerPacket, streamForSendChunks,
|
|
||||||
transferTo, null);
|
|
||||||
offset += len;
|
|
||||||
totalRead += len + (numberOfChunks(len) * checksumSize);
|
|
||||||
seqno++;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
// send an empty packet to mark the end of the block
|
|
||||||
sendPacket(pktBuf, maxChunksPerPacket, streamForSendChunks, transferTo,
|
|
||||||
null);
|
|
||||||
out.flush();
|
|
||||||
} catch (IOException e) { //socket error
|
|
||||||
throw ioeToSocketException(e);
|
|
||||||
}
|
|
||||||
blockReadFully = true;
|
|
||||||
} finally {
|
|
||||||
if (clientTraceFmt != null) {
|
|
||||||
final long endTime = System.nanoTime();
|
|
||||||
ClientTraceLog.info(String.format(clientTraceFmt, totalRead,
|
|
||||||
initialOffset, endTime - startTime));
|
|
||||||
}
|
|
||||||
close();
|
|
||||||
}
|
|
||||||
return totalRead;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean isBlockReadFully() {
|
|
||||||
return blockReadFully;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static interface InputStreamFactory {
|
|
||||||
public InputStream createStream(long offset) throws IOException;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the checksum type that will be used with this block transfer.
|
|
||||||
*/
|
|
||||||
public DataChecksum getChecksum() {
|
|
||||||
return checksum;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class BlockInputStreamFactory implements InputStreamFactory {
|
|
||||||
private final ExtendedBlock block;
|
|
||||||
private final FsDatasetSpi<?> data;
|
|
||||||
|
|
||||||
private BlockInputStreamFactory(ExtendedBlock block, FsDatasetSpi<?> data) {
|
|
||||||
this.block = block;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public InputStream createStream(long offset) throws IOException {
|
|
||||||
return data.getBlockInputStream(block, offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,56 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
|
|
||||||
import org.apache.hadoop.classification.*;
|
|
||||||
import org.apache.hadoop.fs.*;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.*;
|
|
||||||
import org.apache.hadoop.ipc.StandbyException;
|
|
||||||
import org.apache.hadoop.security.AccessControlException;
|
|
||||||
|
|
||||||
/** Utilities used by RAID for accessing NameNode. */
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
@InterfaceStability.Unstable
|
|
||||||
public class NameNodeRaidUtil {
|
|
||||||
/** Accessing FSDirectory.getFileInfo(..) */
|
|
||||||
public static HdfsFileStatus getFileInfo(final FSDirectory dir,
|
|
||||||
final String src, final boolean resolveLink
|
|
||||||
) throws UnresolvedLinkException {
|
|
||||||
return dir.getFileInfo(src, resolveLink);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Accessing FSNamesystem.getFileInfo(..)
|
|
||||||
* @throws StandbyException */
|
|
||||||
public static HdfsFileStatus getFileInfo(final FSNamesystem namesystem,
|
|
||||||
final String src, final boolean resolveLink
|
|
||||||
) throws AccessControlException, UnresolvedLinkException, StandbyException {
|
|
||||||
return namesystem.getFileInfo(src, resolveLink);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Accessing FSNamesystem.getBlockLocations(..) */
|
|
||||||
public static LocatedBlocks getBlockLocations(final FSNamesystem namesystem,
|
|
||||||
final String src, final long offset, final long length,
|
|
||||||
final boolean doAccessTime, final boolean needBlockToken
|
|
||||||
) throws FileNotFoundException, UnresolvedLinkException, IOException {
|
|
||||||
return namesystem.getBlockLocations(src, offset, length,
|
|
||||||
doAccessTime, needBlockToken, true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,840 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
|
|
||||||
|
|
||||||
import java.io.BufferedOutputStream;
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.DataInputStream;
|
|
||||||
import java.io.DataOutputStream;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
import java.net.Socket;
|
|
||||||
import java.nio.channels.SocketChannel;
|
|
||||||
import java.lang.reflect.Constructor;
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
|
|
||||||
import org.apache.hadoop.util.DataChecksum;
|
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.*;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.Block;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.FSConstants;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.RaidBlockSender;
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.conf.Configured;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.hadoop.util.Progressable;
|
|
||||||
import org.apache.hadoop.net.NetUtils;
|
|
||||||
|
|
||||||
import org.apache.hadoop.raid.RaidNode;
|
|
||||||
import org.apache.hadoop.raid.RaidUtils;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* contains the core functionality of the block fixer
|
|
||||||
*
|
|
||||||
* configuration options:
|
|
||||||
* raid.blockfix.classname - the class name of the block fixer
|
|
||||||
* implementation to use
|
|
||||||
*
|
|
||||||
* raid.blockfix.interval - interval between checks for corrupt files
|
|
||||||
*
|
|
||||||
* raid.blockfix.history.interval - interval before fixing same file again
|
|
||||||
*
|
|
||||||
* raid.blockfix.read.timeout - read time out
|
|
||||||
*
|
|
||||||
* raid.blockfix.write.timeout - write time out
|
|
||||||
*/
|
|
||||||
public abstract class BlockFixer extends Configured implements Runnable {
|
|
||||||
|
|
||||||
public static final String BLOCKFIX_CLASSNAME = "raid.blockfix.classname";
|
|
||||||
public static final String BLOCKFIX_INTERVAL = "raid.blockfix.interval";
|
|
||||||
public static final String BLOCKFIX_HISTORY_INTERVAL =
|
|
||||||
"raid.blockfix.history.interval";
|
|
||||||
public static final String BLOCKFIX_READ_TIMEOUT =
|
|
||||||
"raid.blockfix.read.timeout";
|
|
||||||
public static final String BLOCKFIX_WRITE_TIMEOUT =
|
|
||||||
"raid.blockfix.write.timeout";
|
|
||||||
|
|
||||||
public static final long DEFAULT_BLOCKFIX_INTERVAL = 60 * 1000; // 1 min
|
|
||||||
public static final long DEFAULT_BLOCKFIX_HISTORY_INTERVAL =
|
|
||||||
60 * 60 * 1000; // 60 mins
|
|
||||||
|
|
||||||
public static BlockFixer createBlockFixer(Configuration conf)
|
|
||||||
throws ClassNotFoundException {
|
|
||||||
try {
|
|
||||||
// default to distributed block fixer
|
|
||||||
Class<?> blockFixerClass =
|
|
||||||
conf.getClass(BLOCKFIX_CLASSNAME, DistBlockFixer.class);
|
|
||||||
if (!BlockFixer.class.isAssignableFrom(blockFixerClass)) {
|
|
||||||
throw new ClassNotFoundException("not an implementation of blockfixer");
|
|
||||||
}
|
|
||||||
Constructor<?> constructor =
|
|
||||||
blockFixerClass.getConstructor(new Class[] {Configuration.class} );
|
|
||||||
return (BlockFixer) constructor.newInstance(conf);
|
|
||||||
} catch (NoSuchMethodException e) {
|
|
||||||
throw new ClassNotFoundException("cannot construct blockfixer", e);
|
|
||||||
} catch (InstantiationException e) {
|
|
||||||
throw new ClassNotFoundException("cannot construct blockfixer", e);
|
|
||||||
} catch (IllegalAccessException e) {
|
|
||||||
throw new ClassNotFoundException("cannot construct blockfixer", e);
|
|
||||||
} catch (InvocationTargetException e) {
|
|
||||||
throw new ClassNotFoundException("cannot construct blockfixer", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private long numFilesFixed = 0;
|
|
||||||
|
|
||||||
public volatile boolean running = true;
|
|
||||||
|
|
||||||
// interval between checks for corrupt files
|
|
||||||
protected long blockFixInterval;
|
|
||||||
|
|
||||||
// interval before fixing same file again
|
|
||||||
protected long historyInterval;
|
|
||||||
|
|
||||||
public BlockFixer(Configuration conf) {
|
|
||||||
super(conf);
|
|
||||||
blockFixInterval =
|
|
||||||
getConf().getLong(BLOCKFIX_INTERVAL, DEFAULT_BLOCKFIX_INTERVAL);
|
|
||||||
historyInterval =
|
|
||||||
getConf().getLong(BLOCKFIX_HISTORY_INTERVAL,
|
|
||||||
DEFAULT_BLOCKFIX_HISTORY_INTERVAL);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public abstract void run();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* returns the number of files that have been fixed by this block fixer
|
|
||||||
*/
|
|
||||||
public synchronized long filesFixed() {
|
|
||||||
return numFilesFixed;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* increments the number of files that have been fixed by this block fixer
|
|
||||||
*/
|
|
||||||
protected synchronized void incrFilesFixed() {
|
|
||||||
numFilesFixed++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* increments the number of files that have been fixed by this block fixer
|
|
||||||
*/
|
|
||||||
protected synchronized void incrFilesFixed(long incr) {
|
|
||||||
if (incr < 0) {
|
|
||||||
throw new IllegalArgumentException("cannot increment by negative value " +
|
|
||||||
incr);
|
|
||||||
}
|
|
||||||
|
|
||||||
numFilesFixed += incr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static boolean isSourceFile(Path p, String[] destPrefixes) {
|
|
||||||
String pathStr = p.toUri().getPath();
|
|
||||||
for (String destPrefix: destPrefixes) {
|
|
||||||
if (pathStr.startsWith(destPrefix)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void filterUnfixableSourceFiles(Iterator<Path> it) throws IOException {
|
|
||||||
String xorPrefix = RaidNode.xorDestinationPath(getConf()).toUri().getPath();
|
|
||||||
if (!xorPrefix.endsWith(Path.SEPARATOR)) {
|
|
||||||
xorPrefix += Path.SEPARATOR;
|
|
||||||
}
|
|
||||||
String rsPrefix = RaidNode.rsDestinationPath(getConf()).toUri().getPath();
|
|
||||||
if (!rsPrefix.endsWith(Path.SEPARATOR)) {
|
|
||||||
rsPrefix += Path.SEPARATOR;
|
|
||||||
}
|
|
||||||
String[] destPrefixes = new String[]{xorPrefix, rsPrefix};
|
|
||||||
while (it.hasNext()) {
|
|
||||||
Path p = it.next();
|
|
||||||
if (isSourceFile(p, destPrefixes) &&
|
|
||||||
RaidNode.xorParityForSource(p, getConf()) == null &&
|
|
||||||
RaidNode.rsParityForSource(p, getConf()) == null) {
|
|
||||||
it.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* this class implements the actual fixing functionality
|
|
||||||
* we keep this in a separate class so that
|
|
||||||
* the distributed block fixer can use it
|
|
||||||
*/
|
|
||||||
static class BlockFixerHelper extends Configured {
|
|
||||||
|
|
||||||
public static final Log LOG = LogFactory.getLog(BlockFixer.
|
|
||||||
BlockFixerHelper.class);
|
|
||||||
|
|
||||||
private String xorPrefix;
|
|
||||||
private String rsPrefix;
|
|
||||||
private XOREncoder xorEncoder;
|
|
||||||
private XORDecoder xorDecoder;
|
|
||||||
private ReedSolomonEncoder rsEncoder;
|
|
||||||
private ReedSolomonDecoder rsDecoder;
|
|
||||||
|
|
||||||
public BlockFixerHelper(Configuration conf) throws IOException {
|
|
||||||
super(conf);
|
|
||||||
|
|
||||||
xorPrefix = RaidNode.xorDestinationPath(getConf()).toUri().getPath();
|
|
||||||
if (!xorPrefix.endsWith(Path.SEPARATOR)) {
|
|
||||||
xorPrefix += Path.SEPARATOR;
|
|
||||||
}
|
|
||||||
rsPrefix = RaidNode.rsDestinationPath(getConf()).toUri().getPath();
|
|
||||||
if (!rsPrefix.endsWith(Path.SEPARATOR)) {
|
|
||||||
rsPrefix += Path.SEPARATOR;
|
|
||||||
}
|
|
||||||
int stripeLength = RaidNode.getStripeLength(getConf());
|
|
||||||
xorEncoder = new XOREncoder(getConf(), stripeLength);
|
|
||||||
xorDecoder = new XORDecoder(getConf(), stripeLength);
|
|
||||||
int parityLength = RaidNode.rsParityLength(getConf());
|
|
||||||
rsEncoder = new ReedSolomonEncoder(getConf(), stripeLength, parityLength);
|
|
||||||
rsDecoder = new ReedSolomonDecoder(getConf(), stripeLength, parityLength);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks whether file is xor parity file
|
|
||||||
*/
|
|
||||||
boolean isXorParityFile(Path p) {
|
|
||||||
String pathStr = p.toUri().getPath();
|
|
||||||
if (pathStr.contains(RaidNode.HAR_SUFFIX)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return pathStr.startsWith(xorPrefix);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks whether file is rs parity file
|
|
||||||
*/
|
|
||||||
boolean isRsParityFile(Path p) {
|
|
||||||
String pathStr = p.toUri().getPath();
|
|
||||||
if (pathStr.contains(RaidNode.HAR_SUFFIX)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return pathStr.startsWith(rsPrefix);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fix a file, do not report progess.
|
|
||||||
*
|
|
||||||
* @return true if file has been fixed, false if no fixing
|
|
||||||
* was necessary or possible.
|
|
||||||
*/
|
|
||||||
boolean fixFile(Path srcPath) throws IOException {
|
|
||||||
return fixFile(srcPath, new RaidUtils.DummyProgressable());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fix a file, report progess.
|
|
||||||
*
|
|
||||||
* @return true if file has been fixed, false if no fixing
|
|
||||||
* was necessary or possible.
|
|
||||||
*/
|
|
||||||
boolean fixFile(Path srcPath, Progressable progress) throws IOException {
|
|
||||||
|
|
||||||
if (RaidNode.isParityHarPartFile(srcPath)) {
|
|
||||||
return processCorruptParityHarPartFile(srcPath, progress);
|
|
||||||
}
|
|
||||||
|
|
||||||
// The corrupted file is a XOR parity file
|
|
||||||
if (isXorParityFile(srcPath)) {
|
|
||||||
return processCorruptParityFile(srcPath, xorEncoder, progress);
|
|
||||||
}
|
|
||||||
|
|
||||||
// The corrupted file is a ReedSolomon parity file
|
|
||||||
if (isRsParityFile(srcPath)) {
|
|
||||||
return processCorruptParityFile(srcPath, rsEncoder, progress);
|
|
||||||
}
|
|
||||||
|
|
||||||
// The corrupted file is a source file
|
|
||||||
RaidNode.ParityFilePair ppair =
|
|
||||||
RaidNode.xorParityForSource(srcPath, getConf());
|
|
||||||
Decoder decoder = null;
|
|
||||||
if (ppair != null) {
|
|
||||||
decoder = xorDecoder;
|
|
||||||
} else {
|
|
||||||
ppair = RaidNode.rsParityForSource(srcPath, getConf());
|
|
||||||
if (ppair != null) {
|
|
||||||
decoder = rsDecoder;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we have a parity file, process the file and fix it.
|
|
||||||
if (ppair != null) {
|
|
||||||
return processCorruptFile(srcPath, ppair, decoder, progress);
|
|
||||||
}
|
|
||||||
|
|
||||||
// there was nothing to do
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sorts source files ahead of parity files.
|
|
||||||
*/
|
|
||||||
void sortCorruptFiles(List<Path> files) {
|
|
||||||
// TODO: We should first fix the files that lose more blocks
|
|
||||||
Comparator<Path> comp = new Comparator<Path>() {
|
|
||||||
public int compare(Path p1, Path p2) {
|
|
||||||
if (isXorParityFile(p2) || isRsParityFile(p2)) {
|
|
||||||
// If p2 is a parity file, p1 is smaller.
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (isXorParityFile(p1) || isRsParityFile(p1)) {
|
|
||||||
// If p1 is a parity file, p2 is smaller.
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
// If both are source files, they are equal.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
Collections.sort(files, comp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a DistributedFileSystem hosting the path supplied.
|
|
||||||
*/
|
|
||||||
protected DistributedFileSystem getDFS(Path p) throws IOException {
|
|
||||||
return (DistributedFileSystem) p.getFileSystem(getConf());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads through a corrupt source file fixing corrupt blocks on the way.
|
|
||||||
* @param srcPath Path identifying the corrupt file.
|
|
||||||
* @throws IOException
|
|
||||||
* @return true if file has been fixed, false if no fixing
|
|
||||||
* was necessary or possible.
|
|
||||||
*/
|
|
||||||
boolean processCorruptFile(Path srcPath, RaidNode.ParityFilePair parityPair,
|
|
||||||
Decoder decoder, Progressable progress)
|
|
||||||
throws IOException {
|
|
||||||
LOG.info("Processing corrupt file " + srcPath);
|
|
||||||
|
|
||||||
DistributedFileSystem srcFs = getDFS(srcPath);
|
|
||||||
FileStatus srcStat = srcFs.getFileStatus(srcPath);
|
|
||||||
long blockSize = srcStat.getBlockSize();
|
|
||||||
long srcFileSize = srcStat.getLen();
|
|
||||||
String uriPath = srcPath.toUri().getPath();
|
|
||||||
|
|
||||||
int numBlocksFixed = 0;
|
|
||||||
List<LocatedBlock> corrupt =
|
|
||||||
RaidDFSUtil.corruptBlocksInFile(srcFs, uriPath, 0, srcFileSize);
|
|
||||||
if (corrupt.size() == 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (LocatedBlock lb: corrupt) {
|
|
||||||
ExtendedBlock corruptBlock = lb.getBlock();
|
|
||||||
long corruptOffset = lb.getStartOffset();
|
|
||||||
|
|
||||||
LOG.info("Found corrupt block " + corruptBlock +
|
|
||||||
", offset " + corruptOffset);
|
|
||||||
|
|
||||||
final long blockContentsSize =
|
|
||||||
Math.min(blockSize, srcFileSize - corruptOffset);
|
|
||||||
File localBlockFile =
|
|
||||||
File.createTempFile(corruptBlock.getBlockName(), ".tmp");
|
|
||||||
localBlockFile.deleteOnExit();
|
|
||||||
|
|
||||||
try {
|
|
||||||
decoder.recoverBlockToFile(srcFs, srcPath, parityPair.getFileSystem(),
|
|
||||||
parityPair.getPath(), blockSize,
|
|
||||||
corruptOffset, localBlockFile,
|
|
||||||
blockContentsSize);
|
|
||||||
|
|
||||||
// We have a the contents of the block, send them.
|
|
||||||
DatanodeInfo datanode = chooseDatanode(lb.getLocations());
|
|
||||||
computeMetadataAndSendFixedBlock(datanode, localBlockFile,
|
|
||||||
lb, blockContentsSize);
|
|
||||||
numBlocksFixed++;
|
|
||||||
} finally {
|
|
||||||
localBlockFile.delete();
|
|
||||||
}
|
|
||||||
progress.progress();
|
|
||||||
}
|
|
||||||
LOG.info("Fixed " + numBlocksFixed + " blocks in " + srcPath);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fixes corrupt blocks in a parity file.
|
|
||||||
* This function uses the corresponding source file to regenerate parity
|
|
||||||
* file blocks.
|
|
||||||
* @return true if file has been fixed, false if no fixing
|
|
||||||
* was necessary or possible.
|
|
||||||
*/
|
|
||||||
boolean processCorruptParityFile(Path parityPath, Encoder encoder,
|
|
||||||
Progressable progress)
|
|
||||||
throws IOException {
|
|
||||||
LOG.info("Processing corrupt file " + parityPath);
|
|
||||||
Path srcPath = sourcePathFromParityPath(parityPath);
|
|
||||||
if (srcPath == null) {
|
|
||||||
LOG.warn("Unusable parity file " + parityPath);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
DistributedFileSystem parityFs = getDFS(parityPath);
|
|
||||||
FileStatus parityStat = parityFs.getFileStatus(parityPath);
|
|
||||||
long blockSize = parityStat.getBlockSize();
|
|
||||||
long parityFileSize = parityStat.getLen();
|
|
||||||
FileStatus srcStat = getDFS(srcPath).getFileStatus(srcPath);
|
|
||||||
long srcFileSize = srcStat.getLen();
|
|
||||||
|
|
||||||
// Check timestamp.
|
|
||||||
if (srcStat.getModificationTime() != parityStat.getModificationTime()) {
|
|
||||||
LOG.info("Mismatching timestamp for " + srcPath + " and " + parityPath +
|
|
||||||
", moving on...");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
String uriPath = parityPath.toUri().getPath();
|
|
||||||
int numBlocksFixed = 0;
|
|
||||||
List<LocatedBlock> corrupt =
|
|
||||||
RaidDFSUtil.corruptBlocksInFile(parityFs, uriPath, 0, parityFileSize);
|
|
||||||
if (corrupt.size() == 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (LocatedBlock lb: corrupt) {
|
|
||||||
ExtendedBlock corruptBlock = lb.getBlock();
|
|
||||||
long corruptOffset = lb.getStartOffset();
|
|
||||||
|
|
||||||
LOG.info("Found corrupt block " + corruptBlock +
|
|
||||||
", offset " + corruptOffset);
|
|
||||||
|
|
||||||
File localBlockFile =
|
|
||||||
File.createTempFile(corruptBlock.getBlockName(), ".tmp");
|
|
||||||
localBlockFile.deleteOnExit();
|
|
||||||
|
|
||||||
try {
|
|
||||||
encoder.recoverParityBlockToFile(parityFs, srcPath, srcFileSize,
|
|
||||||
blockSize, parityPath,
|
|
||||||
corruptOffset, localBlockFile);
|
|
||||||
// We have a the contents of the block, send them.
|
|
||||||
DatanodeInfo datanode = chooseDatanode(lb.getLocations());
|
|
||||||
computeMetadataAndSendFixedBlock(datanode, localBlockFile, lb,
|
|
||||||
blockSize);
|
|
||||||
|
|
||||||
numBlocksFixed++;
|
|
||||||
} finally {
|
|
||||||
localBlockFile.delete();
|
|
||||||
}
|
|
||||||
progress.progress();
|
|
||||||
}
|
|
||||||
LOG.info("Fixed " + numBlocksFixed + " blocks in " + parityPath);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads through a parity HAR part file, fixing corrupt blocks on the way.
|
|
||||||
* A HAR block can contain many file blocks, as long as the HAR part file
|
|
||||||
* block size is a multiple of the file block size.
|
|
||||||
* @return true if file has been fixed, false if no fixing
|
|
||||||
* was necessary or possible.
|
|
||||||
*/
|
|
||||||
boolean processCorruptParityHarPartFile(Path partFile,
|
|
||||||
Progressable progress)
|
|
||||||
throws IOException {
|
|
||||||
LOG.info("Processing corrupt file " + partFile);
|
|
||||||
// Get some basic information.
|
|
||||||
DistributedFileSystem dfs = getDFS(partFile);
|
|
||||||
FileStatus partFileStat = dfs.getFileStatus(partFile);
|
|
||||||
long partFileSize = partFileStat.getLen();
|
|
||||||
long partFileBlockSize = partFileStat.getBlockSize();
|
|
||||||
LOG.info(partFile + " has block size " + partFileBlockSize);
|
|
||||||
|
|
||||||
// Find the path to the index file.
|
|
||||||
// Parity file HARs are only one level deep, so the index files is at the
|
|
||||||
// same level as the part file.
|
|
||||||
String harDirectory = partFile.toUri().getPath(); // Temporarily.
|
|
||||||
harDirectory =
|
|
||||||
harDirectory.substring(0, harDirectory.lastIndexOf(Path.SEPARATOR));
|
|
||||||
Path indexFile = new Path(harDirectory + "/" + HarIndex.indexFileName);
|
|
||||||
FileStatus indexStat = dfs.getFileStatus(indexFile);
|
|
||||||
// Parses through the HAR index file.
|
|
||||||
HarIndex harIndex = new HarIndex(dfs.open(indexFile), indexStat.getLen());
|
|
||||||
|
|
||||||
String uriPath = partFile.toUri().getPath();
|
|
||||||
int numBlocksFixed = 0;
|
|
||||||
List<LocatedBlock> corrupt =
|
|
||||||
RaidDFSUtil.corruptBlocksInFile(dfs, uriPath, 0, partFileSize);
|
|
||||||
if (corrupt.size() == 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (LocatedBlock lb: corrupt) {
|
|
||||||
ExtendedBlock corruptBlock = lb.getBlock();
|
|
||||||
long corruptOffset = lb.getStartOffset();
|
|
||||||
|
|
||||||
File localBlockFile =
|
|
||||||
File.createTempFile(corruptBlock.getBlockName(), ".tmp");
|
|
||||||
localBlockFile.deleteOnExit();
|
|
||||||
processCorruptParityHarPartBlock(dfs, partFile, corruptBlock,
|
|
||||||
corruptOffset, partFileStat, harIndex,
|
|
||||||
localBlockFile, progress);
|
|
||||||
// Now we have recovered the part file block locally, send it.
|
|
||||||
try {
|
|
||||||
DatanodeInfo datanode = chooseDatanode(lb.getLocations());
|
|
||||||
computeMetadataAndSendFixedBlock(datanode, localBlockFile,
|
|
||||||
lb, localBlockFile.length());
|
|
||||||
numBlocksFixed++;
|
|
||||||
} finally {
|
|
||||||
localBlockFile.delete();
|
|
||||||
}
|
|
||||||
progress.progress();
|
|
||||||
}
|
|
||||||
LOG.info("Fixed " + numBlocksFixed + " blocks in " + partFile);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This fixes a single part file block by recovering in sequence each
|
|
||||||
* parity block in the part file block.
|
|
||||||
*/
|
|
||||||
private void processCorruptParityHarPartBlock(FileSystem dfs, Path partFile,
|
|
||||||
ExtendedBlock corruptBlock,
|
|
||||||
long corruptOffset,
|
|
||||||
FileStatus partFileStat,
|
|
||||||
HarIndex harIndex,
|
|
||||||
File localBlockFile,
|
|
||||||
Progressable progress)
|
|
||||||
throws IOException {
|
|
||||||
String partName = partFile.toUri().getPath(); // Temporarily.
|
|
||||||
partName = partName.substring(1 + partName.lastIndexOf(Path.SEPARATOR));
|
|
||||||
|
|
||||||
OutputStream out = new FileOutputStream(localBlockFile);
|
|
||||||
|
|
||||||
try {
|
|
||||||
// A HAR part file block could map to several parity files. We need to
|
|
||||||
// use all of them to recover this block.
|
|
||||||
final long corruptEnd = Math.min(corruptOffset +
|
|
||||||
partFileStat.getBlockSize(),
|
|
||||||
partFileStat.getLen());
|
|
||||||
for (long offset = corruptOffset; offset < corruptEnd; ) {
|
|
||||||
HarIndex.IndexEntry entry = harIndex.findEntry(partName, offset);
|
|
||||||
if (entry == null) {
|
|
||||||
String msg = "Corrupt index file has no matching index entry for " +
|
|
||||||
partName + ":" + offset;
|
|
||||||
LOG.warn(msg);
|
|
||||||
throw new IOException(msg);
|
|
||||||
}
|
|
||||||
Path parityFile = new Path(entry.fileName);
|
|
||||||
Encoder encoder;
|
|
||||||
if (isXorParityFile(parityFile)) {
|
|
||||||
encoder = xorEncoder;
|
|
||||||
} else if (isRsParityFile(parityFile)) {
|
|
||||||
encoder = rsEncoder;
|
|
||||||
} else {
|
|
||||||
String msg = "Could not figure out parity file correctly";
|
|
||||||
LOG.warn(msg);
|
|
||||||
throw new IOException(msg);
|
|
||||||
}
|
|
||||||
Path srcFile = sourcePathFromParityPath(parityFile);
|
|
||||||
FileStatus srcStat = dfs.getFileStatus(srcFile);
|
|
||||||
if (srcStat.getModificationTime() != entry.mtime) {
|
|
||||||
String msg = "Modification times of " + parityFile + " and " +
|
|
||||||
srcFile + " do not match.";
|
|
||||||
LOG.warn(msg);
|
|
||||||
throw new IOException(msg);
|
|
||||||
}
|
|
||||||
long corruptOffsetInParity = offset - entry.startOffset;
|
|
||||||
LOG.info(partFile + ":" + offset + " maps to " +
|
|
||||||
parityFile + ":" + corruptOffsetInParity +
|
|
||||||
" and will be recovered from " + srcFile);
|
|
||||||
encoder.recoverParityBlockToStream(dfs, srcFile, srcStat.getLen(),
|
|
||||||
srcStat.getBlockSize(), parityFile,
|
|
||||||
corruptOffsetInParity, out);
|
|
||||||
// Finished recovery of one parity block. Since a parity block has the
|
|
||||||
// same size as a source block, we can move offset by source block size.
|
|
||||||
offset += srcStat.getBlockSize();
|
|
||||||
LOG.info("Recovered " + srcStat.getBlockSize() + " part file bytes ");
|
|
||||||
if (offset > corruptEnd) {
|
|
||||||
String msg =
|
|
||||||
"Recovered block spills across part file blocks. Cannot continue.";
|
|
||||||
throw new IOException(msg);
|
|
||||||
}
|
|
||||||
progress.progress();
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Choose a datanode (hostname:portnumber). The datanode is chosen at
|
|
||||||
* random from the live datanodes.
|
|
||||||
* @param locationsToAvoid locations to avoid.
|
|
||||||
* @return A datanode
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private DatanodeInfo chooseDatanode(DatanodeInfo[] locationsToAvoid)
|
|
||||||
throws IOException {
|
|
||||||
DistributedFileSystem dfs = getDFS(new Path("/"));
|
|
||||||
DatanodeInfo[] live =
|
|
||||||
dfs.getClient().datanodeReport(DatanodeReportType.LIVE);
|
|
||||||
LOG.info("Choosing a datanode from " + live.length +
|
|
||||||
" live nodes while avoiding " + locationsToAvoid.length);
|
|
||||||
Random rand = new Random();
|
|
||||||
DatanodeInfo chosen = null;
|
|
||||||
int maxAttempts = 1000;
|
|
||||||
for (int i = 0; i < maxAttempts && chosen == null; i++) {
|
|
||||||
int idx = rand.nextInt(live.length);
|
|
||||||
chosen = live[idx];
|
|
||||||
for (DatanodeInfo avoid: locationsToAvoid) {
|
|
||||||
if (chosen.getName().equals(avoid.getName())) {
|
|
||||||
LOG.info("Avoiding " + avoid.getName());
|
|
||||||
chosen = null;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (chosen == null) {
|
|
||||||
throw new IOException("Could not choose datanode");
|
|
||||||
}
|
|
||||||
LOG.info("Choosing datanode " + chosen.getName());
|
|
||||||
return chosen;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads data from the data stream provided and computes metadata.
|
|
||||||
*/
|
|
||||||
static DataInputStream computeMetadata(Configuration conf,
|
|
||||||
InputStream dataStream)
|
|
||||||
throws IOException {
|
|
||||||
ByteArrayOutputStream mdOutBase = new ByteArrayOutputStream(1024*1024);
|
|
||||||
DataOutputStream mdOut = new DataOutputStream(mdOutBase);
|
|
||||||
|
|
||||||
// First, write out the version.
|
|
||||||
mdOut.writeShort(BlockMetadataHeader.VERSION);
|
|
||||||
|
|
||||||
// Create a summer and write out its header.
|
|
||||||
int bytesPerChecksum = conf.getInt("dfs.bytes-per-checksum", 512);
|
|
||||||
DataChecksum sum =
|
|
||||||
DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
|
|
||||||
bytesPerChecksum);
|
|
||||||
sum.writeHeader(mdOut);
|
|
||||||
|
|
||||||
// Buffer to read in a chunk of data.
|
|
||||||
byte[] buf = new byte[bytesPerChecksum];
|
|
||||||
// Buffer to store the checksum bytes.
|
|
||||||
byte[] chk = new byte[sum.getChecksumSize()];
|
|
||||||
|
|
||||||
// Read data till we reach the end of the input stream.
|
|
||||||
int bytesSinceFlush = 0;
|
|
||||||
while (true) {
|
|
||||||
// Read some bytes.
|
|
||||||
int bytesRead = dataStream.read(buf, bytesSinceFlush,
|
|
||||||
bytesPerChecksum-bytesSinceFlush);
|
|
||||||
if (bytesRead == -1) {
|
|
||||||
if (bytesSinceFlush > 0) {
|
|
||||||
boolean reset = true;
|
|
||||||
sum.writeValue(chk, 0, reset); // This also resets the sum.
|
|
||||||
// Write the checksum to the stream.
|
|
||||||
mdOut.write(chk, 0, chk.length);
|
|
||||||
bytesSinceFlush = 0;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Update the checksum.
|
|
||||||
sum.update(buf, bytesSinceFlush, bytesRead);
|
|
||||||
bytesSinceFlush += bytesRead;
|
|
||||||
|
|
||||||
// Flush the checksum if necessary.
|
|
||||||
if (bytesSinceFlush == bytesPerChecksum) {
|
|
||||||
boolean reset = true;
|
|
||||||
sum.writeValue(chk, 0, reset); // This also resets the sum.
|
|
||||||
// Write the checksum to the stream.
|
|
||||||
mdOut.write(chk, 0, chk.length);
|
|
||||||
bytesSinceFlush = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
byte[] mdBytes = mdOutBase.toByteArray();
|
|
||||||
return new DataInputStream(new ByteArrayInputStream(mdBytes));
|
|
||||||
}
|
|
||||||
|
|
||||||
private void computeMetadataAndSendFixedBlock(DatanodeInfo datanode,
|
|
||||||
File localBlockFile,
|
|
||||||
LocatedBlock block,
|
|
||||||
long blockSize)
|
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
LOG.info("Computing metdata");
|
|
||||||
InputStream blockContents = null;
|
|
||||||
DataInputStream blockMetadata = null;
|
|
||||||
try {
|
|
||||||
blockContents = new FileInputStream(localBlockFile);
|
|
||||||
blockMetadata = computeMetadata(getConf(), blockContents);
|
|
||||||
blockContents.close();
|
|
||||||
// Reopen
|
|
||||||
blockContents = new FileInputStream(localBlockFile);
|
|
||||||
sendFixedBlock(datanode, blockContents, blockMetadata, block,
|
|
||||||
blockSize);
|
|
||||||
} finally {
|
|
||||||
if (blockContents != null) {
|
|
||||||
blockContents.close();
|
|
||||||
blockContents = null;
|
|
||||||
}
|
|
||||||
if (blockMetadata != null) {
|
|
||||||
blockMetadata.close();
|
|
||||||
blockMetadata = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Send a generated block to a datanode.
|
|
||||||
* @param datanode Chosen datanode name in host:port form.
|
|
||||||
* @param blockContents Stream with the block contents.
|
|
||||||
* @param corruptBlock Block identifying the block to be sent.
|
|
||||||
* @param blockSize size of the block.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private void sendFixedBlock(DatanodeInfo datanode,
|
|
||||||
final InputStream blockContents,
|
|
||||||
DataInputStream metadataIn,
|
|
||||||
LocatedBlock block, long blockSize)
|
|
||||||
throws IOException {
|
|
||||||
InetSocketAddress target = NetUtils.createSocketAddr(datanode.getName());
|
|
||||||
Socket sock = SocketChannel.open().socket();
|
|
||||||
|
|
||||||
int readTimeout =
|
|
||||||
getConf().getInt(BLOCKFIX_READ_TIMEOUT,
|
|
||||||
HdfsServerConstants.READ_TIMEOUT);
|
|
||||||
NetUtils.connect(sock, target, readTimeout);
|
|
||||||
sock.setSoTimeout(readTimeout);
|
|
||||||
|
|
||||||
int writeTimeout = getConf().getInt(BLOCKFIX_WRITE_TIMEOUT,
|
|
||||||
HdfsServerConstants.WRITE_TIMEOUT);
|
|
||||||
|
|
||||||
OutputStream baseStream = NetUtils.getOutputStream(sock, writeTimeout);
|
|
||||||
DataOutputStream out =
|
|
||||||
new DataOutputStream(new BufferedOutputStream(baseStream,
|
|
||||||
HdfsConstants.
|
|
||||||
SMALL_BUFFER_SIZE));
|
|
||||||
|
|
||||||
boolean corruptChecksumOk = false;
|
|
||||||
boolean chunkOffsetOK = false;
|
|
||||||
boolean verifyChecksum = true;
|
|
||||||
boolean transferToAllowed = false;
|
|
||||||
|
|
||||||
try {
|
|
||||||
LOG.info("Sending block " + block.getBlock() +
|
|
||||||
" from " + sock.getLocalSocketAddress().toString() +
|
|
||||||
" to " + sock.getRemoteSocketAddress().toString() +
|
|
||||||
" " + blockSize + " bytes");
|
|
||||||
RaidBlockSender blockSender =
|
|
||||||
new RaidBlockSender(block.getBlock(), blockSize, 0, blockSize,
|
|
||||||
corruptChecksumOk, chunkOffsetOK, verifyChecksum,
|
|
||||||
transferToAllowed, metadataIn,
|
|
||||||
new RaidBlockSender.InputStreamFactory() {
|
|
||||||
@Override
|
|
||||||
public InputStream
|
|
||||||
createStream(long offset) throws IOException {
|
|
||||||
// we are passing 0 as the offset above,
|
|
||||||
// so we can safely ignore
|
|
||||||
// the offset passed
|
|
||||||
return blockContents;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
DatanodeInfo[] nodes = new DatanodeInfo[]{datanode};
|
|
||||||
DataChecksum checksum = blockSender.getChecksum();
|
|
||||||
new Sender(out).writeBlock(block.getBlock(), block.getBlockToken(), "",
|
|
||||||
nodes, null, BlockConstructionStage.PIPELINE_SETUP_CREATE,
|
|
||||||
1, 0L, blockSize, 0L, DataChecksum.newDataChecksum(
|
|
||||||
checksum.getChecksumType(), checksum.getBytesPerChecksum()));
|
|
||||||
blockSender.sendBlock(out, baseStream);
|
|
||||||
|
|
||||||
LOG.info("Sent block " + block.getBlock() + " to " + datanode.getName());
|
|
||||||
} finally {
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* returns the source file corresponding to a parity file
|
|
||||||
*/
|
|
||||||
Path sourcePathFromParityPath(Path parityPath) {
|
|
||||||
String parityPathStr = parityPath.toUri().getPath();
|
|
||||||
if (parityPathStr.startsWith(xorPrefix)) {
|
|
||||||
// Remove the prefix to get the source file.
|
|
||||||
String src = parityPathStr.replaceFirst(xorPrefix, "/");
|
|
||||||
return new Path(src);
|
|
||||||
} else if (parityPathStr.startsWith(rsPrefix)) {
|
|
||||||
// Remove the prefix to get the source file.
|
|
||||||
String src = parityPathStr.replaceFirst(rsPrefix, "/");
|
|
||||||
return new Path(src);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the corrupt blocks in a file.
|
|
||||||
*/
|
|
||||||
List<LocatedBlock> corruptBlocksInFile(DistributedFileSystem fs,
|
|
||||||
String uriPath, FileStatus stat)
|
|
||||||
throws IOException {
|
|
||||||
List<LocatedBlock> corrupt = new LinkedList<LocatedBlock>();
|
|
||||||
LocatedBlocks locatedBlocks =
|
|
||||||
RaidDFSUtil.getBlockLocations(fs, uriPath, 0, stat.getLen());
|
|
||||||
for (LocatedBlock b: locatedBlocks.getLocatedBlocks()) {
|
|
||||||
if (b.isCorrupt() ||
|
|
||||||
(b.getLocations().length == 0 && b.getBlockSize() > 0)) {
|
|
||||||
corrupt.add(b);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return corrupt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,408 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import javax.xml.parsers.DocumentBuilder;
|
|
||||||
import javax.xml.parsers.DocumentBuilderFactory;
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.w3c.dom.Document;
|
|
||||||
import org.w3c.dom.Element;
|
|
||||||
import org.w3c.dom.Node;
|
|
||||||
import org.w3c.dom.NodeList;
|
|
||||||
import org.w3c.dom.Text;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyList;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Maintains the configuration xml file that is read into memory.
|
|
||||||
*/
|
|
||||||
class ConfigManager {
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.ConfigManager");
|
|
||||||
|
|
||||||
/** Time to wait between checks of the config file */
|
|
||||||
public static final long RELOAD_INTERVAL = 10 * 1000;
|
|
||||||
|
|
||||||
/** Time to wait between successive runs of all policies */
|
|
||||||
public static final long RESCAN_INTERVAL = 3600 * 1000;
|
|
||||||
|
|
||||||
public static final long HAR_PARTFILE_SIZE = 10 * 1024 * 1024 * 1024l;
|
|
||||||
|
|
||||||
public static final int DISTRAID_MAX_JOBS = 10;
|
|
||||||
|
|
||||||
public static final int DISTRAID_MAX_FILES = 10000;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Time to wait after the config file has been modified before reloading it
|
|
||||||
* (this is done to prevent loading a file that hasn't been fully written).
|
|
||||||
*/
|
|
||||||
public static final long RELOAD_WAIT = 5 * 1000;
|
|
||||||
|
|
||||||
private Configuration conf; // Hadoop configuration
|
|
||||||
private String configFileName; // Path to config XML file
|
|
||||||
|
|
||||||
private long lastReloadAttempt; // Last time we tried to reload the config file
|
|
||||||
private long lastSuccessfulReload; // Last time we successfully reloaded config
|
|
||||||
private boolean lastReloadAttemptFailed = false;
|
|
||||||
private long reloadInterval = RELOAD_INTERVAL;
|
|
||||||
private long periodicity; // time between runs of all policies
|
|
||||||
private long harPartfileSize;
|
|
||||||
private int maxJobsPerPolicy; // Max no. of jobs running simultaneously for
|
|
||||||
// a job.
|
|
||||||
private int maxFilesPerJob; // Max no. of files raided by a job.
|
|
||||||
|
|
||||||
// Reload the configuration
|
|
||||||
private boolean doReload;
|
|
||||||
private Thread reloadThread;
|
|
||||||
private volatile boolean running = false;
|
|
||||||
|
|
||||||
// Collection of all configured policies.
|
|
||||||
Collection<PolicyList> allPolicies = new ArrayList<PolicyList>();
|
|
||||||
|
|
||||||
public ConfigManager(Configuration conf) throws IOException, SAXException,
|
|
||||||
RaidConfigurationException, ClassNotFoundException, ParserConfigurationException {
|
|
||||||
this.conf = conf;
|
|
||||||
this.configFileName = conf.get("raid.config.file");
|
|
||||||
this.doReload = conf.getBoolean("raid.config.reload", true);
|
|
||||||
this.reloadInterval = conf.getLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
|
||||||
this.periodicity = conf.getLong("raid.policy.rescan.interval", RESCAN_INTERVAL);
|
|
||||||
this.harPartfileSize = conf.getLong("raid.har.partfile.size", HAR_PARTFILE_SIZE);
|
|
||||||
this.maxJobsPerPolicy = conf.getInt("raid.distraid.max.jobs",
|
|
||||||
DISTRAID_MAX_JOBS);
|
|
||||||
this.maxFilesPerJob = conf.getInt("raid.distraid.max.files",
|
|
||||||
DISTRAID_MAX_FILES);
|
|
||||||
if (configFileName == null) {
|
|
||||||
String msg = "No raid.config.file given in conf - " +
|
|
||||||
"the Hadoop Raid utility cannot run. Aborting....";
|
|
||||||
LOG.warn(msg);
|
|
||||||
throw new IOException(msg);
|
|
||||||
}
|
|
||||||
reloadConfigs();
|
|
||||||
lastSuccessfulReload = RaidNode.now();
|
|
||||||
lastReloadAttempt = RaidNode.now();
|
|
||||||
running = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reload config file if it hasn't been loaded in a while
|
|
||||||
* Returns true if the file was reloaded.
|
|
||||||
*/
|
|
||||||
public synchronized boolean reloadConfigsIfNecessary() {
|
|
||||||
long time = RaidNode.now();
|
|
||||||
if (time > lastReloadAttempt + reloadInterval) {
|
|
||||||
lastReloadAttempt = time;
|
|
||||||
try {
|
|
||||||
File file = new File(configFileName);
|
|
||||||
long lastModified = file.lastModified();
|
|
||||||
if (lastModified > lastSuccessfulReload &&
|
|
||||||
time > lastModified + RELOAD_WAIT) {
|
|
||||||
reloadConfigs();
|
|
||||||
lastSuccessfulReload = time;
|
|
||||||
lastReloadAttemptFailed = false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
if (!lastReloadAttemptFailed) {
|
|
||||||
LOG.error("Failed to reload config file - " +
|
|
||||||
"will use existing configuration.", e);
|
|
||||||
}
|
|
||||||
lastReloadAttemptFailed = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Updates the in-memory data structures from the config file. This file is
|
|
||||||
* expected to be in the following whitespace-separated format:
|
|
||||||
*
|
|
||||||
<configuration>
|
|
||||||
<srcPath prefix="hdfs://hadoop.myhost.com:9000/user/warehouse/u_full/*">
|
|
||||||
<policy name = RaidScanWeekly>
|
|
||||||
<destPath> hdfs://dfsname.myhost.com:9000/archive/</destPath>
|
|
||||||
<parentPolicy> RaidScanMonthly</parentPolicy>
|
|
||||||
<property>
|
|
||||||
<name>targetReplication</name>
|
|
||||||
<value>2</value>
|
|
||||||
<description> after RAIDing, decrease the replication factor of the file to
|
|
||||||
this value.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>metaReplication</name>
|
|
||||||
<value>2</value>
|
|
||||||
<description> the replication factor of the RAID meta file
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>stripeLength</name>
|
|
||||||
<value>10</value>
|
|
||||||
<description> the number of blocks to RAID together
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
</policy>
|
|
||||||
</srcPath>
|
|
||||||
</configuration>
|
|
||||||
*
|
|
||||||
* Blank lines and lines starting with # are ignored.
|
|
||||||
*
|
|
||||||
* @throws IOException if the config file cannot be read.
|
|
||||||
* @throws RaidConfigurationException if configuration entries are invalid.
|
|
||||||
* @throws ClassNotFoundException if user-defined policy classes cannot be loaded
|
|
||||||
* @throws ParserConfigurationException if XML parser is misconfigured.
|
|
||||||
* @throws SAXException if config file is malformed.
|
|
||||||
* @returns A new set of policy categories.
|
|
||||||
*/
|
|
||||||
void reloadConfigs() throws IOException, ParserConfigurationException,
|
|
||||||
SAXException, ClassNotFoundException, RaidConfigurationException {
|
|
||||||
|
|
||||||
if (configFileName == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
File file = new File(configFileName);
|
|
||||||
if (!file.exists()) {
|
|
||||||
throw new RaidConfigurationException("Configuration file " + configFileName +
|
|
||||||
" does not exist.");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create some temporary hashmaps to hold the new allocs, and we only save
|
|
||||||
// them in our fields if we have parsed the entire allocs file successfully.
|
|
||||||
List<PolicyList> all = new ArrayList<PolicyList>();
|
|
||||||
long periodicityValue = periodicity;
|
|
||||||
|
|
||||||
|
|
||||||
// Read and parse the configuration file.
|
|
||||||
// allow include files in configuration file
|
|
||||||
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
|
|
||||||
docBuilderFactory.setIgnoringComments(true);
|
|
||||||
docBuilderFactory.setNamespaceAware(true);
|
|
||||||
try {
|
|
||||||
docBuilderFactory.setXIncludeAware(true);
|
|
||||||
} catch (UnsupportedOperationException e) {
|
|
||||||
LOG.error("Failed to set setXIncludeAware(true) for raid parser "
|
|
||||||
+ docBuilderFactory + ":" + e, e);
|
|
||||||
}
|
|
||||||
LOG.error("Reloading config file " + file);
|
|
||||||
|
|
||||||
DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
|
|
||||||
Document doc = builder.parse(file);
|
|
||||||
Element root = doc.getDocumentElement();
|
|
||||||
if (!"configuration".equalsIgnoreCase(root.getTagName()))
|
|
||||||
throw new RaidConfigurationException("Bad configuration file: " +
|
|
||||||
"top-level element not <configuration>");
|
|
||||||
NodeList elements = root.getChildNodes();
|
|
||||||
|
|
||||||
Map<String, PolicyInfo> existingPolicies =
|
|
||||||
new HashMap<String, PolicyInfo>();
|
|
||||||
// loop through all the configured source paths.
|
|
||||||
for (int i = 0; i < elements.getLength(); i++) {
|
|
||||||
Node node = elements.item(i);
|
|
||||||
if (!(node instanceof Element)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Element element = (Element)node;
|
|
||||||
String elementTagName = element.getTagName();
|
|
||||||
if ("srcPath".equalsIgnoreCase(elementTagName)) {
|
|
||||||
String srcPathPrefix = element.getAttribute("prefix");
|
|
||||||
|
|
||||||
PolicyList policyList = null;
|
|
||||||
if (srcPathPrefix != null && srcPathPrefix.length() != 0) {
|
|
||||||
// Empty srcPath will have no effect but policies will be processed
|
|
||||||
// This allow us to define some "abstract" policies
|
|
||||||
policyList = new PolicyList();
|
|
||||||
all.add(policyList);
|
|
||||||
policyList.setSrcPath(conf, srcPathPrefix);
|
|
||||||
}
|
|
||||||
|
|
||||||
// loop through all the policies for this source path
|
|
||||||
NodeList policies = element.getChildNodes();
|
|
||||||
for (int j = 0; j < policies.getLength(); j++) {
|
|
||||||
Node node1 = policies.item(j);
|
|
||||||
if (!(node1 instanceof Element)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Element policy = (Element)node1;
|
|
||||||
if (!"policy".equalsIgnoreCase(policy.getTagName())) {
|
|
||||||
throw new RaidConfigurationException("Bad configuration file: " +
|
|
||||||
"Expecting <policy> for srcPath " + srcPathPrefix);
|
|
||||||
}
|
|
||||||
String policyName = policy.getAttribute("name");
|
|
||||||
PolicyInfo curr = new PolicyInfo(policyName, conf);
|
|
||||||
if (srcPathPrefix != null && srcPathPrefix.length() > 0) {
|
|
||||||
curr.setSrcPath(srcPathPrefix);
|
|
||||||
}
|
|
||||||
// loop through all the properties of this policy
|
|
||||||
NodeList properties = policy.getChildNodes();
|
|
||||||
PolicyInfo parent = null;
|
|
||||||
for (int k = 0; k < properties.getLength(); k++) {
|
|
||||||
Node node2 = properties.item(k);
|
|
||||||
if (!(node2 instanceof Element)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Element property = (Element)node2;
|
|
||||||
String propertyName = property.getTagName();
|
|
||||||
if ("erasureCode".equalsIgnoreCase(propertyName)) {
|
|
||||||
String text = ((Text)property.getFirstChild()).getData().trim();
|
|
||||||
LOG.info(policyName + ".erasureCode = " + text);
|
|
||||||
curr.setErasureCode(text);
|
|
||||||
} else if ("description".equalsIgnoreCase(propertyName)) {
|
|
||||||
String text = ((Text)property.getFirstChild()).getData().trim();
|
|
||||||
curr.setDescription(text);
|
|
||||||
} else if ("parentPolicy".equalsIgnoreCase(propertyName)) {
|
|
||||||
String text = ((Text)property.getFirstChild()).getData().trim();
|
|
||||||
parent = existingPolicies.get(text);
|
|
||||||
} else if ("property".equalsIgnoreCase(propertyName)) {
|
|
||||||
NodeList nl = property.getChildNodes();
|
|
||||||
String pname=null,pvalue=null;
|
|
||||||
for (int l = 0; l < nl.getLength(); l++){
|
|
||||||
Node node3 = nl.item(l);
|
|
||||||
if (!(node3 instanceof Element)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Element item = (Element) node3;
|
|
||||||
String itemName = item.getTagName();
|
|
||||||
if ("name".equalsIgnoreCase(itemName)){
|
|
||||||
pname = ((Text)item.getFirstChild()).getData().trim();
|
|
||||||
} else if ("value".equalsIgnoreCase(itemName)){
|
|
||||||
pvalue = ((Text)item.getFirstChild()).getData().trim();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (pname != null && pvalue != null) {
|
|
||||||
LOG.info(policyName + "." + pname + " = " + pvalue);
|
|
||||||
curr.setProperty(pname,pvalue);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG.warn("Found bad property " + propertyName +
|
|
||||||
" for srcPath" + srcPathPrefix +
|
|
||||||
" policy name " + policyName +
|
|
||||||
". Ignoring.");
|
|
||||||
}
|
|
||||||
} // done with all properties of this policy
|
|
||||||
|
|
||||||
PolicyInfo pinfo;
|
|
||||||
if (parent != null) {
|
|
||||||
pinfo = new PolicyInfo(policyName, conf);
|
|
||||||
pinfo.copyFrom(parent);
|
|
||||||
pinfo.copyFrom(curr);
|
|
||||||
} else {
|
|
||||||
pinfo = curr;
|
|
||||||
}
|
|
||||||
if (policyList != null) {
|
|
||||||
policyList.add(pinfo);
|
|
||||||
}
|
|
||||||
existingPolicies.put(policyName, pinfo);
|
|
||||||
|
|
||||||
} // done with all policies for this srcpath
|
|
||||||
}
|
|
||||||
} // done with all srcPaths
|
|
||||||
setAllPolicies(all);
|
|
||||||
periodicity = periodicityValue;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public synchronized long getPeriodicity() {
|
|
||||||
return periodicity;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized long getHarPartfileSize() {
|
|
||||||
return harPartfileSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized int getMaxJobsPerPolicy() {
|
|
||||||
return maxJobsPerPolicy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized int getMaxFilesPerJob() {
|
|
||||||
return maxFilesPerJob;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a collection of all policies
|
|
||||||
*/
|
|
||||||
public synchronized Collection<PolicyList> getAllPolicies() {
|
|
||||||
return new ArrayList(allPolicies);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set a collection of all policies
|
|
||||||
*/
|
|
||||||
protected synchronized void setAllPolicies(Collection<PolicyList> value) {
|
|
||||||
this.allPolicies = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Start a background thread to reload the config file
|
|
||||||
*/
|
|
||||||
void startReload() {
|
|
||||||
if (doReload) {
|
|
||||||
reloadThread = new UpdateThread();
|
|
||||||
reloadThread.start();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stop the background thread that reload the config file
|
|
||||||
*/
|
|
||||||
void stopReload() throws InterruptedException {
|
|
||||||
if (reloadThread != null) {
|
|
||||||
running = false;
|
|
||||||
reloadThread.interrupt();
|
|
||||||
reloadThread.join();
|
|
||||||
reloadThread = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A thread which reloads the config file.
|
|
||||||
*/
|
|
||||||
private class UpdateThread extends Thread {
|
|
||||||
private UpdateThread() {
|
|
||||||
super("Raid update thread");
|
|
||||||
}
|
|
||||||
|
|
||||||
public void run() {
|
|
||||||
while (running) {
|
|
||||||
try {
|
|
||||||
Thread.sleep(reloadInterval);
|
|
||||||
reloadConfigsIfNecessary();
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
// do nothing
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.error("Failed to reload config file ", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,213 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.hdfs.BlockMissingException;
|
|
||||||
import org.apache.hadoop.fs.ChecksumException;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Represents a generic decoder that can be used to read a file with
|
|
||||||
* corrupt blocks by using the parity file.
|
|
||||||
* This is an abstract class, concrete subclasses need to implement
|
|
||||||
* fixErasedBlock.
|
|
||||||
*/
|
|
||||||
public abstract class Decoder {
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.Decoder");
|
|
||||||
protected Configuration conf;
|
|
||||||
protected int stripeSize;
|
|
||||||
protected int paritySize;
|
|
||||||
protected Random rand;
|
|
||||||
protected int bufSize;
|
|
||||||
protected byte[][] readBufs;
|
|
||||||
protected byte[][] writeBufs;
|
|
||||||
|
|
||||||
Decoder(Configuration conf, int stripeSize, int paritySize) {
|
|
||||||
this.conf = conf;
|
|
||||||
this.stripeSize = stripeSize;
|
|
||||||
this.paritySize = paritySize;
|
|
||||||
this.rand = new Random();
|
|
||||||
this.bufSize = conf.getInt("raid.decoder.bufsize", 1024 * 1024);
|
|
||||||
this.readBufs = new byte[stripeSize + paritySize][];
|
|
||||||
this.writeBufs = new byte[paritySize][];
|
|
||||||
allocateBuffers();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void allocateBuffers() {
|
|
||||||
for (int i = 0; i < stripeSize + paritySize; i++) {
|
|
||||||
readBufs[i] = new byte[bufSize];
|
|
||||||
}
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
writeBufs[i] = new byte[bufSize];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void configureBuffers(long blockSize) {
|
|
||||||
if ((long)bufSize > blockSize) {
|
|
||||||
bufSize = (int)blockSize;
|
|
||||||
allocateBuffers();
|
|
||||||
} else if (blockSize % bufSize != 0) {
|
|
||||||
bufSize = (int)(blockSize / 256L); // heuristic.
|
|
||||||
if (bufSize == 0) {
|
|
||||||
bufSize = 1024;
|
|
||||||
}
|
|
||||||
bufSize = Math.min(bufSize, 1024 * 1024);
|
|
||||||
allocateBuffers();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The interface to generate a decoded file using the good portion of the
|
|
||||||
* source file and the parity file.
|
|
||||||
* @param fs The filesystem containing the source file.
|
|
||||||
* @param srcFile The damaged source file.
|
|
||||||
* @param parityFs The filesystem containing the parity file. This could be
|
|
||||||
* different from fs in case the parity file is part of a HAR archive.
|
|
||||||
* @param parityFile The parity file.
|
|
||||||
* @param errorOffset Known location of error in the source file. There could
|
|
||||||
* be additional errors in the source file that are discovered during
|
|
||||||
* the decode process.
|
|
||||||
* @param decodedFile The decoded file. This will have the exact same contents
|
|
||||||
* as the source file on success.
|
|
||||||
*/
|
|
||||||
public void decodeFile(
|
|
||||||
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
|
|
||||||
long errorOffset, Path decodedFile) throws IOException {
|
|
||||||
|
|
||||||
LOG.info("Create " + decodedFile + " for error at " +
|
|
||||||
srcFile + ":" + errorOffset);
|
|
||||||
FileStatus srcStat = fs.getFileStatus(srcFile);
|
|
||||||
long blockSize = srcStat.getBlockSize();
|
|
||||||
configureBuffers(blockSize);
|
|
||||||
// Move the offset to the start of the block.
|
|
||||||
errorOffset = (errorOffset / blockSize) * blockSize;
|
|
||||||
|
|
||||||
// Create the decoded file.
|
|
||||||
FSDataOutputStream out = fs.create(
|
|
||||||
decodedFile, false, conf.getInt("io.file.buffer.size", 64 * 1024),
|
|
||||||
srcStat.getReplication(), srcStat.getBlockSize());
|
|
||||||
|
|
||||||
// Open the source file.
|
|
||||||
FSDataInputStream in = fs.open(
|
|
||||||
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
|
|
||||||
|
|
||||||
// Start copying data block-by-block.
|
|
||||||
for (long offset = 0; offset < srcStat.getLen(); offset += blockSize) {
|
|
||||||
long limit = Math.min(blockSize, srcStat.getLen() - offset);
|
|
||||||
long bytesAlreadyCopied = 0;
|
|
||||||
if (offset != errorOffset) {
|
|
||||||
try {
|
|
||||||
in = fs.open(
|
|
||||||
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
|
|
||||||
in.seek(offset);
|
|
||||||
RaidUtils.copyBytes(in, out, readBufs[0], limit);
|
|
||||||
assert(out.getPos() == offset +limit);
|
|
||||||
LOG.info("Copied till " + out.getPos() + " from " + srcFile);
|
|
||||||
continue;
|
|
||||||
} catch (BlockMissingException e) {
|
|
||||||
LOG.warn("Encountered BME at " + srcFile + ":" + offset);
|
|
||||||
bytesAlreadyCopied = out.getPos() - offset;
|
|
||||||
} catch (ChecksumException e) {
|
|
||||||
LOG.warn("Encountered CE at " + srcFile + ":" + offset);
|
|
||||||
bytesAlreadyCopied = out.getPos() - offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If we are here offset == errorOffset or we got an exception.
|
|
||||||
// Recover the block starting at offset.
|
|
||||||
fixErasedBlock(fs, srcFile, parityFs, parityFile, blockSize, offset,
|
|
||||||
bytesAlreadyCopied, limit, out);
|
|
||||||
}
|
|
||||||
out.close();
|
|
||||||
|
|
||||||
try {
|
|
||||||
fs.setOwner(decodedFile, srcStat.getOwner(), srcStat.getGroup());
|
|
||||||
fs.setPermission(decodedFile, srcStat.getPermission());
|
|
||||||
fs.setTimes(decodedFile, srcStat.getModificationTime(),
|
|
||||||
srcStat.getAccessTime());
|
|
||||||
} catch (Exception exc) {
|
|
||||||
LOG.warn("Didn't manage to copy meta information because of " + exc +
|
|
||||||
" Ignoring...");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recovers a corrupt block to local file.
|
|
||||||
*
|
|
||||||
* @param srcFs The filesystem containing the source file.
|
|
||||||
* @param srcPath The damaged source file.
|
|
||||||
* @param parityFs The filesystem containing the parity file. This could be
|
|
||||||
* different from fs in case the parity file is part of a HAR archive.
|
|
||||||
* @param parityPath The parity file.
|
|
||||||
* @param blockSize The block size of the file.
|
|
||||||
* @param blockOffset Known location of error in the source file. There could
|
|
||||||
* be additional errors in the source file that are discovered during
|
|
||||||
* the decode process.
|
|
||||||
* @param localBlockFile The file to write the block to.
|
|
||||||
* @param limit The maximum number of bytes to be written out.
|
|
||||||
* This is to prevent writing beyond the end of the file.
|
|
||||||
*/
|
|
||||||
public void recoverBlockToFile(
|
|
||||||
FileSystem srcFs, Path srcPath, FileSystem parityFs, Path parityPath,
|
|
||||||
long blockSize, long blockOffset, File localBlockFile, long limit)
|
|
||||||
throws IOException {
|
|
||||||
OutputStream out = new FileOutputStream(localBlockFile);
|
|
||||||
fixErasedBlock(srcFs, srcPath, parityFs, parityPath,
|
|
||||||
blockSize, blockOffset, 0, limit, out);
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implementation-specific mechanism of writing a fixed block.
|
|
||||||
* @param fs The filesystem containing the source file.
|
|
||||||
* @param srcFile The damaged source file.
|
|
||||||
* @param parityFs The filesystem containing the parity file. This could be
|
|
||||||
* different from fs in case the parity file is part of a HAR archive.
|
|
||||||
* @param parityFile The parity file.
|
|
||||||
* @param blockSize The maximum size of a block.
|
|
||||||
* @param errorOffset Known location of error in the source file. There could
|
|
||||||
* be additional errors in the source file that are discovered during
|
|
||||||
* the decode process.
|
|
||||||
* @param bytesToSkip After the block is generated, these many bytes should be
|
|
||||||
* skipped before writing to the output. This is needed because the
|
|
||||||
* output may have a portion of the block written from the source file
|
|
||||||
* before a new corruption is discovered in the block.
|
|
||||||
* @param limit The maximum number of bytes to be written out, including
|
|
||||||
* bytesToSkip. This is to prevent writing beyond the end of the file.
|
|
||||||
* @param out The output.
|
|
||||||
*/
|
|
||||||
protected abstract void fixErasedBlock(
|
|
||||||
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
|
|
||||||
long blockSize, long errorOffset, long bytesToSkip, long limit,
|
|
||||||
OutputStream out) throws IOException;
|
|
||||||
}
|
|
|
@ -1,323 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Stack;
|
|
||||||
import java.util.concurrent.Executor;
|
|
||||||
import java.util.concurrent.ExecutorService;
|
|
||||||
import java.util.concurrent.Executors;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.concurrent.Semaphore;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements depth-first traversal using a Stack object. The traversal
|
|
||||||
* can be stopped at any time and the state of traversal is saved.
|
|
||||||
*/
|
|
||||||
public class DirectoryTraversal {
|
|
||||||
public static final Log LOG =
|
|
||||||
LogFactory.getLog("org.apache.hadoop.raid.DirectoryTraversal");
|
|
||||||
|
|
||||||
private FileSystem fs;
|
|
||||||
private List<FileStatus> paths;
|
|
||||||
private int pathIdx = 0; // Next path to process.
|
|
||||||
private Stack<Node> stack = new Stack<Node>();
|
|
||||||
private ExecutorService executor;
|
|
||||||
|
|
||||||
private int numThreads;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A FileFilter object can be used to choose files during directory traversal.
|
|
||||||
*/
|
|
||||||
public interface FileFilter {
|
|
||||||
/**
|
|
||||||
* @return a boolean value indicating if the file passes the filter.
|
|
||||||
*/
|
|
||||||
boolean check(FileStatus f) throws IOException;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Represents a directory node in directory traversal.
|
|
||||||
*/
|
|
||||||
static class Node {
|
|
||||||
private FileStatus path; // Path that this node represents.
|
|
||||||
private FileStatus[] elements; // Elements in the node.
|
|
||||||
private int idx = 0;
|
|
||||||
|
|
||||||
public Node(FileStatus path, FileStatus[] elements) {
|
|
||||||
this.path = path;
|
|
||||||
this.elements = elements;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasNext() {
|
|
||||||
return idx < elements.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
public FileStatus next() {
|
|
||||||
return elements[idx++];
|
|
||||||
}
|
|
||||||
|
|
||||||
public FileStatus path() {
|
|
||||||
return this.path;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor.
|
|
||||||
* @param fs The filesystem to use.
|
|
||||||
* @param startPaths A list of paths that need to be traversed
|
|
||||||
*/
|
|
||||||
public DirectoryTraversal(FileSystem fs, List<FileStatus> startPaths) {
|
|
||||||
this(fs, startPaths, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public DirectoryTraversal(
|
|
||||||
FileSystem fs, List<FileStatus> startPaths, int numThreads) {
|
|
||||||
this.fs = fs;
|
|
||||||
paths = startPaths;
|
|
||||||
pathIdx = 0;
|
|
||||||
this.numThreads = numThreads;
|
|
||||||
executor = Executors.newFixedThreadPool(numThreads);
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<FileStatus> getFilteredFiles(FileFilter filter, int limit) {
|
|
||||||
List<FileStatus> filtered = new ArrayList<FileStatus>();
|
|
||||||
if (limit == 0)
|
|
||||||
return filtered;
|
|
||||||
|
|
||||||
// We need this semaphore to block when the number of running workitems
|
|
||||||
// is equal to the number of threads. FixedThreadPool limits the number
|
|
||||||
// of threads, but not the queue size. This way we will limit the memory
|
|
||||||
// usage.
|
|
||||||
Semaphore slots = new Semaphore(numThreads);
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
FilterFileWorkItem work = null;
|
|
||||||
try {
|
|
||||||
slots.acquire();
|
|
||||||
synchronized(filtered) {
|
|
||||||
if (filtered.size() >= limit) {
|
|
||||||
slots.release();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Node next = getNextDirectoryNode();
|
|
||||||
if (next == null) {
|
|
||||||
slots.release();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
work = new FilterFileWorkItem(filter, next, filtered, slots);
|
|
||||||
} catch (InterruptedException ie) {
|
|
||||||
slots.release();
|
|
||||||
break;
|
|
||||||
} catch (IOException e) {
|
|
||||||
slots.release();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
executor.execute(work);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Wait for all submitted items to finish.
|
|
||||||
slots.acquire(numThreads);
|
|
||||||
// If this traversal is finished, shutdown the executor.
|
|
||||||
if (doneTraversal()) {
|
|
||||||
executor.shutdown();
|
|
||||||
executor.awaitTermination(1, TimeUnit.HOURS);
|
|
||||||
}
|
|
||||||
} catch (InterruptedException ie) {
|
|
||||||
}
|
|
||||||
|
|
||||||
return filtered;
|
|
||||||
}
|
|
||||||
|
|
||||||
class FilterFileWorkItem implements Runnable {
|
|
||||||
FileFilter filter;
|
|
||||||
Node dir;
|
|
||||||
List<FileStatus> filtered;
|
|
||||||
Semaphore slots;
|
|
||||||
|
|
||||||
FilterFileWorkItem(FileFilter filter, Node dir, List<FileStatus> filtered,
|
|
||||||
Semaphore slots) {
|
|
||||||
this.slots = slots;
|
|
||||||
this.filter = filter;
|
|
||||||
this.dir = dir;
|
|
||||||
this.filtered = filtered;
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
|
||||||
public void run() {
|
|
||||||
try {
|
|
||||||
LOG.info("Initiating file filtering for " + dir.path.getPath());
|
|
||||||
for (FileStatus f: dir.elements) {
|
|
||||||
if (!f.isFile()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (filter.check(f)) {
|
|
||||||
synchronized(filtered) {
|
|
||||||
filtered.add(f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.error("Error in directory traversal: "
|
|
||||||
+ StringUtils.stringifyException(e));
|
|
||||||
} finally {
|
|
||||||
slots.release();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the next file.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public FileStatus getNextFile() throws IOException {
|
|
||||||
// Check if traversal is done.
|
|
||||||
while (!doneTraversal()) {
|
|
||||||
// If traversal is not done, check if the stack is not empty.
|
|
||||||
while (!stack.isEmpty()) {
|
|
||||||
// If the stack is not empty, look at the top node.
|
|
||||||
Node node = stack.peek();
|
|
||||||
// Check if the top node has an element.
|
|
||||||
if (node.hasNext()) {
|
|
||||||
FileStatus element = node.next();
|
|
||||||
// Is the next element a directory.
|
|
||||||
if (!element.isDir()) {
|
|
||||||
// It is a file, return it.
|
|
||||||
return element;
|
|
||||||
}
|
|
||||||
// Next element is a directory, push it on to the stack and
|
|
||||||
// continue
|
|
||||||
try {
|
|
||||||
pushNewNode(element);
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
// Ignore and move to the next element.
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
// Top node has no next element, pop it and continue.
|
|
||||||
stack.pop();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If the stack is empty, do we have more paths?
|
|
||||||
while (!paths.isEmpty()) {
|
|
||||||
FileStatus next = paths.remove(0);
|
|
||||||
pathIdx++;
|
|
||||||
if (!next.isDir()) {
|
|
||||||
return next;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
pushNewNode(next);
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the next directory in the tree. The algorithm returns deeper directories
|
|
||||||
* first.
|
|
||||||
* @return A FileStatus representing the directory.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public FileStatus getNextDirectory() throws IOException {
|
|
||||||
Node dirNode = getNextDirectoryNode();
|
|
||||||
if (dirNode != null) {
|
|
||||||
return dirNode.path;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Node getNextDirectoryNode() throws IOException {
|
|
||||||
|
|
||||||
// Check if traversal is done.
|
|
||||||
while (!doneTraversal()) {
|
|
||||||
// If traversal is not done, check if the stack is not empty.
|
|
||||||
while (!stack.isEmpty()) {
|
|
||||||
// If the stack is not empty, look at the top node.
|
|
||||||
Node node = stack.peek();
|
|
||||||
// Check if the top node has an element.
|
|
||||||
if (node.hasNext()) {
|
|
||||||
FileStatus element = node.next();
|
|
||||||
// Is the next element a directory.
|
|
||||||
if (element.isDir()) {
|
|
||||||
// Next element is a directory, push it on to the stack and
|
|
||||||
// continue
|
|
||||||
try {
|
|
||||||
pushNewNode(element);
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
// Ignore and move to the next element.
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
stack.pop();
|
|
||||||
return node;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If the stack is empty, do we have more paths?
|
|
||||||
while (!paths.isEmpty()) {
|
|
||||||
FileStatus next = paths.remove(0);
|
|
||||||
pathIdx++;
|
|
||||||
if (next.isDir()) {
|
|
||||||
try {
|
|
||||||
pushNewNode(next);
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void pushNewNode(FileStatus stat) throws IOException {
|
|
||||||
if (!stat.isDir()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Path p = stat.getPath();
|
|
||||||
FileStatus[] elements = fs.listStatus(p);
|
|
||||||
Node newNode = new Node(stat, (elements == null? new FileStatus[0]: elements));
|
|
||||||
stack.push(newNode);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean doneTraversal() {
|
|
||||||
return paths.isEmpty() && stack.isEmpty();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,660 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
|
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
||||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
|
|
||||||
import org.apache.hadoop.io.SequenceFile;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.hadoop.io.WritableComparable;
|
|
||||||
import org.apache.hadoop.io.LongWritable;
|
|
||||||
import org.apache.hadoop.io.Writable;
|
|
||||||
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
|
|
||||||
import org.apache.hadoop.mapreduce.Mapper;
|
|
||||||
import org.apache.hadoop.mapreduce.InputFormat;
|
|
||||||
import org.apache.hadoop.mapreduce.Job;
|
|
||||||
import org.apache.hadoop.mapreduce.JobContext;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
|
|
||||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* distributed block fixer, uses map reduce jobs to fix corrupt files
|
|
||||||
*
|
|
||||||
* configuration options
|
|
||||||
* raid.blockfix.filespertask - number of corrupt files to fix in a single
|
|
||||||
* map reduce task (i.e., at one mapper node)
|
|
||||||
*
|
|
||||||
* raid.blockfix.maxpendingfiles - maximum number of files to fix
|
|
||||||
* simultaneously
|
|
||||||
*/
|
|
||||||
public class DistBlockFixer extends BlockFixer {
|
|
||||||
// volatile should be sufficient since only the block fixer thread
|
|
||||||
// updates numJobsRunning (other threads may read)
|
|
||||||
private volatile int numJobsRunning = 0;
|
|
||||||
|
|
||||||
private static final String WORK_DIR_PREFIX = "blockfixer";
|
|
||||||
private static final String IN_FILE_SUFFIX = ".in";
|
|
||||||
private static final String PART_PREFIX = "part-";
|
|
||||||
|
|
||||||
private static final String BLOCKFIX_FILES_PER_TASK =
|
|
||||||
"raid.blockfix.filespertask";
|
|
||||||
private static final String BLOCKFIX_MAX_PENDING_FILES =
|
|
||||||
"raid.blockfix.maxpendingfiles";
|
|
||||||
|
|
||||||
// default number of files to fix in a task
|
|
||||||
private static final long DEFAULT_BLOCKFIX_FILES_PER_TASK = 10L;
|
|
||||||
|
|
||||||
// default number of files to fix simultaneously
|
|
||||||
private static final long DEFAULT_BLOCKFIX_MAX_PENDING_FILES = 1000L;
|
|
||||||
|
|
||||||
protected static final Log LOG = LogFactory.getLog(DistBlockFixer.class);
|
|
||||||
|
|
||||||
// number of files to fix in a task
|
|
||||||
private long filesPerTask;
|
|
||||||
|
|
||||||
// number of files to fix simultaneously
|
|
||||||
final private long maxPendingFiles;
|
|
||||||
|
|
||||||
// number of files being fixed right now
|
|
||||||
private long pendingFiles;
|
|
||||||
|
|
||||||
private long lastCheckTime;
|
|
||||||
|
|
||||||
private final SimpleDateFormat dateFormat =
|
|
||||||
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
|
||||||
|
|
||||||
private Map<String, CorruptFileInfo> fileIndex =
|
|
||||||
new HashMap<String, CorruptFileInfo>();
|
|
||||||
private Map<Job, List<CorruptFileInfo>> jobIndex =
|
|
||||||
new HashMap<Job, List<CorruptFileInfo>>();
|
|
||||||
|
|
||||||
static enum Counter {
|
|
||||||
FILES_SUCCEEDED, FILES_FAILED, FILES_NOACTION
|
|
||||||
}
|
|
||||||
|
|
||||||
public DistBlockFixer(Configuration conf) {
|
|
||||||
super(conf);
|
|
||||||
filesPerTask = DistBlockFixer.filesPerTask(getConf());
|
|
||||||
maxPendingFiles = DistBlockFixer.maxPendingFiles(getConf());
|
|
||||||
pendingFiles = 0L;
|
|
||||||
|
|
||||||
// start off due for the first iteration
|
|
||||||
lastCheckTime = Time.now() - blockFixInterval;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* determines how many files to fix in a single task
|
|
||||||
*/
|
|
||||||
protected static long filesPerTask(Configuration conf) {
|
|
||||||
return conf.getLong(BLOCKFIX_FILES_PER_TASK,
|
|
||||||
DEFAULT_BLOCKFIX_FILES_PER_TASK);
|
|
||||||
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* determines how many files to fix simultaneously
|
|
||||||
*/
|
|
||||||
protected static long maxPendingFiles(Configuration conf) {
|
|
||||||
return conf.getLong(BLOCKFIX_MAX_PENDING_FILES,
|
|
||||||
DEFAULT_BLOCKFIX_MAX_PENDING_FILES);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* runs the block fixer periodically
|
|
||||||
*/
|
|
||||||
public void run() {
|
|
||||||
while (running) {
|
|
||||||
// check if it is time to run the block fixer
|
|
||||||
long now = Time.now();
|
|
||||||
if (now >= lastCheckTime + blockFixInterval) {
|
|
||||||
lastCheckTime = now;
|
|
||||||
try {
|
|
||||||
checkAndFixBlocks(now);
|
|
||||||
} catch (InterruptedException ignore) {
|
|
||||||
LOG.info("interrupted");
|
|
||||||
} catch (Exception e) {
|
|
||||||
// log exceptions and keep running
|
|
||||||
LOG.error(StringUtils.stringifyException(e));
|
|
||||||
} catch (Error e) {
|
|
||||||
LOG.error(StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// try to sleep for the remainder of the interval
|
|
||||||
long sleepPeriod = (lastCheckTime - Time.now()) +
|
|
||||||
blockFixInterval;
|
|
||||||
|
|
||||||
if ((sleepPeriod > 0L) && running) {
|
|
||||||
try {
|
|
||||||
Thread.sleep(sleepPeriod);
|
|
||||||
} catch (InterruptedException ignore) {
|
|
||||||
LOG.info("interrupted");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks for corrupt blocks and fixes them (if any)
|
|
||||||
*/
|
|
||||||
private void checkAndFixBlocks(long startTime)
|
|
||||||
throws IOException, InterruptedException, ClassNotFoundException {
|
|
||||||
checkJobs();
|
|
||||||
|
|
||||||
if (pendingFiles >= maxPendingFiles) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
List<Path> corruptFiles = getCorruptFiles();
|
|
||||||
filterUnfixableSourceFiles(corruptFiles.iterator());
|
|
||||||
|
|
||||||
String startTimeStr = dateFormat.format(new Date(startTime));
|
|
||||||
|
|
||||||
LOG.info("found " + corruptFiles.size() + " corrupt files");
|
|
||||||
|
|
||||||
if (corruptFiles.size() > 0) {
|
|
||||||
String jobName = "blockfixer." + startTime;
|
|
||||||
startJob(jobName, corruptFiles);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Handle a failed job.
|
|
||||||
*/
|
|
||||||
private void failJob(Job job) throws IOException {
|
|
||||||
// assume no files have been fixed
|
|
||||||
LOG.error("DistBlockFixer job " + job.getJobID() + "(" + job.getJobName() +
|
|
||||||
") finished (failed)");
|
|
||||||
for (CorruptFileInfo fileInfo: jobIndex.get(job)) {
|
|
||||||
fileInfo.fail();
|
|
||||||
}
|
|
||||||
numJobsRunning--;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Handle a successful job.
|
|
||||||
*/
|
|
||||||
private void succeedJob(Job job, long filesSucceeded, long filesFailed)
|
|
||||||
throws IOException {
|
|
||||||
LOG.info("DistBlockFixer job " + job.getJobID() + "(" + job.getJobName() +
|
|
||||||
") finished (succeeded)");
|
|
||||||
|
|
||||||
if (filesFailed == 0) {
|
|
||||||
// no files have failed
|
|
||||||
for (CorruptFileInfo fileInfo: jobIndex.get(job)) {
|
|
||||||
fileInfo.succeed();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// we have to look at the output to check which files have failed
|
|
||||||
Set<String> failedFiles = getFailedFiles(job);
|
|
||||||
|
|
||||||
for (CorruptFileInfo fileInfo: jobIndex.get(job)) {
|
|
||||||
if (failedFiles.contains(fileInfo.getFile().toString())) {
|
|
||||||
fileInfo.fail();
|
|
||||||
} else {
|
|
||||||
// call succeed for files that have succeeded or for which no action
|
|
||||||
// was taken
|
|
||||||
fileInfo.succeed();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// report succeeded files to metrics
|
|
||||||
incrFilesFixed(filesSucceeded);
|
|
||||||
numJobsRunning--;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks if jobs have completed and updates job and file index
|
|
||||||
* returns a list of failed files for restarting
|
|
||||||
*/
|
|
||||||
private void checkJobs() throws IOException {
|
|
||||||
Iterator<Job> jobIter = jobIndex.keySet().iterator();
|
|
||||||
while(jobIter.hasNext()) {
|
|
||||||
Job job = jobIter.next();
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (job.isComplete()) {
|
|
||||||
long filesSucceeded =
|
|
||||||
job.getCounters().findCounter(Counter.FILES_SUCCEEDED).getValue();
|
|
||||||
long filesFailed =
|
|
||||||
job.getCounters().findCounter(Counter.FILES_FAILED).getValue();
|
|
||||||
long filesNoAction =
|
|
||||||
job.getCounters().findCounter(Counter.FILES_NOACTION).getValue();
|
|
||||||
int files = jobIndex.get(job).size();
|
|
||||||
if (job.isSuccessful() &&
|
|
||||||
(filesSucceeded + filesFailed + filesNoAction ==
|
|
||||||
((long) files))) {
|
|
||||||
// job has processed all files
|
|
||||||
succeedJob(job, filesSucceeded, filesFailed);
|
|
||||||
} else {
|
|
||||||
failJob(job);
|
|
||||||
}
|
|
||||||
jobIter.remove();
|
|
||||||
} else {
|
|
||||||
LOG.info("job " + job.getJobName() + " still running");
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.error(StringUtils.stringifyException(e));
|
|
||||||
failJob(job);
|
|
||||||
try {
|
|
||||||
job.killJob();
|
|
||||||
} catch (Exception ee) {
|
|
||||||
LOG.error(StringUtils.stringifyException(ee));
|
|
||||||
}
|
|
||||||
jobIter.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
purgeFileIndex();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* determines which files have failed for a given job
|
|
||||||
*/
|
|
||||||
private Set<String> getFailedFiles(Job job) throws IOException {
|
|
||||||
Set<String> failedFiles = new HashSet<String>();
|
|
||||||
|
|
||||||
Path outDir = SequenceFileOutputFormat.getOutputPath(job);
|
|
||||||
FileSystem fs = outDir.getFileSystem(getConf());
|
|
||||||
if (!fs.getFileStatus(outDir).isDir()) {
|
|
||||||
throw new IOException(outDir.toString() + " is not a directory");
|
|
||||||
}
|
|
||||||
|
|
||||||
FileStatus[] files = fs.listStatus(outDir);
|
|
||||||
|
|
||||||
for (FileStatus f: files) {
|
|
||||||
Path fPath = f.getPath();
|
|
||||||
if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) {
|
|
||||||
LOG.info("opening " + fPath.toString());
|
|
||||||
SequenceFile.Reader reader =
|
|
||||||
new SequenceFile.Reader(fs, fPath, getConf());
|
|
||||||
|
|
||||||
Text key = new Text();
|
|
||||||
Text value = new Text();
|
|
||||||
while (reader.next(key, value)) {
|
|
||||||
failedFiles.add(key.toString());
|
|
||||||
}
|
|
||||||
reader.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return failedFiles;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* purge expired jobs from the file index
|
|
||||||
*/
|
|
||||||
private void purgeFileIndex() {
|
|
||||||
Iterator<String> fileIter = fileIndex.keySet().iterator();
|
|
||||||
while(fileIter.hasNext()) {
|
|
||||||
String file = fileIter.next();
|
|
||||||
if (fileIndex.get(file).isExpired()) {
|
|
||||||
fileIter.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* creates and submits a job, updates file index and job index
|
|
||||||
*/
|
|
||||||
private Job startJob(String jobName, List<Path> corruptFiles)
|
|
||||||
throws IOException, InterruptedException, ClassNotFoundException {
|
|
||||||
Path inDir = new Path(WORK_DIR_PREFIX + "/in/" + jobName);
|
|
||||||
Path outDir = new Path(WORK_DIR_PREFIX + "/out/" + jobName);
|
|
||||||
List<Path> filesInJob = createInputFile(jobName, inDir, corruptFiles);
|
|
||||||
|
|
||||||
Configuration jobConf = new Configuration(getConf());
|
|
||||||
Job job = new Job(jobConf, jobName);
|
|
||||||
job.setJarByClass(getClass());
|
|
||||||
job.setMapperClass(DistBlockFixerMapper.class);
|
|
||||||
job.setNumReduceTasks(0);
|
|
||||||
job.setInputFormatClass(DistBlockFixerInputFormat.class);
|
|
||||||
job.setOutputFormatClass(SequenceFileOutputFormat.class);
|
|
||||||
job.setOutputKeyClass(Text.class);
|
|
||||||
job.setOutputValueClass(Text.class);
|
|
||||||
|
|
||||||
DistBlockFixerInputFormat.setInputPaths(job, inDir);
|
|
||||||
SequenceFileOutputFormat.setOutputPath(job, outDir);
|
|
||||||
|
|
||||||
job.submit();
|
|
||||||
LOG.info("DistBlockFixer job " + job.getJobID() + "(" + job.getJobName() +
|
|
||||||
") started");
|
|
||||||
|
|
||||||
// submit the job before inserting it into the index
|
|
||||||
// this way, if submit fails, we won't have added anything to the index
|
|
||||||
insertJob(job, filesInJob);
|
|
||||||
return job;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* inserts new job into file index and job index
|
|
||||||
*/
|
|
||||||
private void insertJob(Job job, List<Path> corruptFiles) {
|
|
||||||
List<CorruptFileInfo> fileInfos = new LinkedList<CorruptFileInfo>();
|
|
||||||
|
|
||||||
for (Path file: corruptFiles) {
|
|
||||||
CorruptFileInfo fileInfo = new CorruptFileInfo(file, job);
|
|
||||||
fileInfos.add(fileInfo);
|
|
||||||
fileIndex.put(file.toString(), fileInfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
jobIndex.put(job, fileInfos);
|
|
||||||
numJobsRunning++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* creates the input file (containing the names of the files to be fixed
|
|
||||||
*/
|
|
||||||
private List<Path> createInputFile(String jobName, Path inDir,
|
|
||||||
List<Path> corruptFiles)
|
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
Path file = new Path(inDir, jobName + IN_FILE_SUFFIX);
|
|
||||||
FileSystem fs = file.getFileSystem(getConf());
|
|
||||||
SequenceFile.Writer fileOut = SequenceFile.createWriter(fs, getConf(), file,
|
|
||||||
LongWritable.class,
|
|
||||||
Text.class);
|
|
||||||
long index = 0L;
|
|
||||||
|
|
||||||
List<Path> filesAdded = new LinkedList<Path>();
|
|
||||||
|
|
||||||
for (Path corruptFile: corruptFiles) {
|
|
||||||
if (pendingFiles >= maxPendingFiles) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
String corruptFileName = corruptFile.toString();
|
|
||||||
fileOut.append(new LongWritable(index++), new Text(corruptFileName));
|
|
||||||
filesAdded.add(corruptFile);
|
|
||||||
pendingFiles++;
|
|
||||||
|
|
||||||
if (index % filesPerTask == 0) {
|
|
||||||
fileOut.sync(); // create sync point to make sure we can split here
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fileOut.close();
|
|
||||||
return filesAdded;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* gets a list of corrupt files from the name node
|
|
||||||
* and filters out files that are currently being fixed or
|
|
||||||
* that were recently fixed
|
|
||||||
*/
|
|
||||||
private List<Path> getCorruptFiles() throws IOException {
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)
|
|
||||||
(new Path("/")).getFileSystem(getConf());
|
|
||||||
|
|
||||||
String[] files = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
List<Path> corruptFiles = new LinkedList<Path>();
|
|
||||||
|
|
||||||
for (String f: files) {
|
|
||||||
Path p = new Path(f);
|
|
||||||
// filter out files that are being fixed or that were recently fixed
|
|
||||||
if (!fileIndex.containsKey(p.toString())) {
|
|
||||||
corruptFiles.add(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
RaidUtils.filterTrash(getConf(), corruptFiles);
|
|
||||||
|
|
||||||
return corruptFiles;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* returns the number of map reduce jobs running
|
|
||||||
*/
|
|
||||||
public int jobsRunning() {
|
|
||||||
return numJobsRunning;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* hold information about a corrupt file that is being fixed
|
|
||||||
*/
|
|
||||||
class CorruptFileInfo {
|
|
||||||
|
|
||||||
private Path file;
|
|
||||||
private Job job;
|
|
||||||
private boolean done;
|
|
||||||
private long time;
|
|
||||||
|
|
||||||
public CorruptFileInfo(Path file, Job job) {
|
|
||||||
this.file = file;
|
|
||||||
this.job = job;
|
|
||||||
this.done = false;
|
|
||||||
this.time = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isDone() {
|
|
||||||
return done;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isExpired() {
|
|
||||||
return done && ((Time.now() - time) > historyInterval);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Path getFile() {
|
|
||||||
return file;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* updates file index to record a failed attempt at fixing a file,
|
|
||||||
* immediately removes the entry from the file index
|
|
||||||
* (instead of letting it expire)
|
|
||||||
* so that we can retry right away
|
|
||||||
*/
|
|
||||||
public void fail() {
|
|
||||||
// remove this file from the index
|
|
||||||
CorruptFileInfo removed = fileIndex.remove(file.toString());
|
|
||||||
if (removed == null) {
|
|
||||||
LOG.error("trying to remove file not in file index: " +
|
|
||||||
file.toString());
|
|
||||||
} else {
|
|
||||||
LOG.error("fixing " + file.toString() + " failed");
|
|
||||||
}
|
|
||||||
pendingFiles--;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* marks a file as fixed successfully
|
|
||||||
* and sets time stamp for expiry after specified interval
|
|
||||||
*/
|
|
||||||
public void succeed() {
|
|
||||||
// leave the file in the index,
|
|
||||||
// will be pruged later
|
|
||||||
job = null;
|
|
||||||
done = true;
|
|
||||||
time = Time.now();
|
|
||||||
LOG.info("fixing " + file.toString() + " succeeded");
|
|
||||||
pendingFiles--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static class DistBlockFixerInputFormat
|
|
||||||
extends SequenceFileInputFormat<LongWritable, Text> {
|
|
||||||
|
|
||||||
protected static final Log LOG =
|
|
||||||
LogFactory.getLog(DistBlockFixerMapper.class);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* splits the input files into tasks handled by a single node
|
|
||||||
* we have to read the input files to do this based on a number of
|
|
||||||
* items in a sequence
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public List <InputSplit> getSplits(JobContext job)
|
|
||||||
throws IOException {
|
|
||||||
long filesPerTask = DistBlockFixer.filesPerTask(job.getConfiguration());
|
|
||||||
|
|
||||||
Path[] inPaths = getInputPaths(job);
|
|
||||||
|
|
||||||
List<InputSplit> splits = new LinkedList<InputSplit>();
|
|
||||||
|
|
||||||
long fileCounter = 0;
|
|
||||||
|
|
||||||
for (Path inPath: inPaths) {
|
|
||||||
|
|
||||||
FileSystem fs = inPath.getFileSystem(job.getConfiguration());
|
|
||||||
|
|
||||||
if (!fs.getFileStatus(inPath).isDir()) {
|
|
||||||
throw new IOException(inPath.toString() + " is not a directory");
|
|
||||||
}
|
|
||||||
|
|
||||||
FileStatus[] inFiles = fs.listStatus(inPath);
|
|
||||||
|
|
||||||
for (FileStatus inFileStatus: inFiles) {
|
|
||||||
Path inFile = inFileStatus.getPath();
|
|
||||||
|
|
||||||
if (!inFileStatus.isDir() &&
|
|
||||||
(inFile.getName().equals(job.getJobName() + IN_FILE_SUFFIX))) {
|
|
||||||
|
|
||||||
fileCounter++;
|
|
||||||
SequenceFile.Reader inFileReader =
|
|
||||||
new SequenceFile.Reader(fs, inFile, job.getConfiguration());
|
|
||||||
|
|
||||||
long startPos = inFileReader.getPosition();
|
|
||||||
long counter = 0;
|
|
||||||
|
|
||||||
// create an input split every filesPerTask items in the sequence
|
|
||||||
LongWritable key = new LongWritable();
|
|
||||||
Text value = new Text();
|
|
||||||
try {
|
|
||||||
while (inFileReader.next(key, value)) {
|
|
||||||
if (counter % filesPerTask == filesPerTask - 1L) {
|
|
||||||
splits.add(new FileSplit(inFile, startPos,
|
|
||||||
inFileReader.getPosition() -
|
|
||||||
startPos,
|
|
||||||
null));
|
|
||||||
startPos = inFileReader.getPosition();
|
|
||||||
}
|
|
||||||
counter++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// create input split for remaining items if necessary
|
|
||||||
// this includes the case where no splits were created by the loop
|
|
||||||
if (startPos != inFileReader.getPosition()) {
|
|
||||||
splits.add(new FileSplit(inFile, startPos,
|
|
||||||
inFileReader.getPosition() - startPos,
|
|
||||||
null));
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
inFileReader.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG.info("created " + splits.size() + " input splits from " +
|
|
||||||
fileCounter + " files");
|
|
||||||
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* indicates that input file can be split
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean isSplitable (JobContext job, Path file) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* mapper for fixing stripes with corrupt blocks
|
|
||||||
*/
|
|
||||||
static class DistBlockFixerMapper
|
|
||||||
extends Mapper<LongWritable, Text, Text, Text> {
|
|
||||||
|
|
||||||
protected static final Log LOG =
|
|
||||||
LogFactory.getLog(DistBlockFixerMapper.class);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* fix a stripe
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void map(LongWritable key, Text fileText, Context context)
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
|
|
||||||
BlockFixerHelper helper =
|
|
||||||
new BlockFixerHelper(context.getConfiguration());
|
|
||||||
|
|
||||||
String fileStr = fileText.toString();
|
|
||||||
LOG.info("fixing " + fileStr);
|
|
||||||
|
|
||||||
Path file = new Path(fileStr);
|
|
||||||
boolean success = false;
|
|
||||||
|
|
||||||
try {
|
|
||||||
boolean fixed = helper.fixFile(file, context);
|
|
||||||
|
|
||||||
if (fixed) {
|
|
||||||
context.getCounter(Counter.FILES_SUCCEEDED).increment(1L);
|
|
||||||
} else {
|
|
||||||
context.getCounter(Counter.FILES_NOACTION).increment(1L);
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.error(StringUtils.stringifyException(e));
|
|
||||||
|
|
||||||
// report file as failed
|
|
||||||
context.getCounter(Counter.FILES_FAILED).increment(1L);
|
|
||||||
String outkey = fileStr;
|
|
||||||
String outval = "failed";
|
|
||||||
context.write(new Text(outkey), new Text(outval));
|
|
||||||
}
|
|
||||||
|
|
||||||
context.progress();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,374 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.conf.Configured;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.io.SequenceFile;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.hadoop.io.SequenceFile.Writer;
|
|
||||||
import org.apache.hadoop.io.SequenceFile.Reader;
|
|
||||||
|
|
||||||
import org.apache.hadoop.mapreduce.JobContext;
|
|
||||||
import org.apache.hadoop.mapreduce.JobID;
|
|
||||||
import org.apache.hadoop.mapreduce.Mapper;
|
|
||||||
import org.apache.hadoop.mapreduce.Job;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
|
||||||
import org.apache.hadoop.raid.RaidNode.Statistics;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
|
|
||||||
public class DistRaid extends Configured {
|
|
||||||
|
|
||||||
protected static final Log LOG = LogFactory.getLog(DistRaid.class);
|
|
||||||
|
|
||||||
static final String NAME = "distRaid";
|
|
||||||
static final String JOB_DIR_LABEL = NAME + ".job.dir";
|
|
||||||
static final int OP_LIST_BLOCK_SIZE = 32 * 1024 * 1024; // block size of control file
|
|
||||||
static final short OP_LIST_REPLICATION = 10; // replication factor of control file
|
|
||||||
|
|
||||||
public static final String OPS_PER_TASK = "raid.distraid.opspertask";
|
|
||||||
private static final int DEFAULT_OPS_PER_TASK = 100;
|
|
||||||
private static final int SYNC_FILE_MAX = 10;
|
|
||||||
private static final SimpleDateFormat dateForm = new SimpleDateFormat("yyyy-MM-dd HH:mm");
|
|
||||||
|
|
||||||
static enum Counter {
|
|
||||||
FILES_SUCCEEDED, FILES_FAILED, PROCESSED_BLOCKS, PROCESSED_SIZE, META_BLOCKS, META_SIZE
|
|
||||||
}
|
|
||||||
|
|
||||||
public DistRaid(Configuration conf) {
|
|
||||||
super(conf);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final Random RANDOM = new Random();
|
|
||||||
|
|
||||||
protected static String getRandomId() {
|
|
||||||
return Integer.toString(RANDOM.nextInt(Integer.MAX_VALUE), 36);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* helper class which holds the policy and paths
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public static class RaidPolicyPathPair {
|
|
||||||
public PolicyInfo policy;
|
|
||||||
public List<FileStatus> srcPaths;
|
|
||||||
|
|
||||||
RaidPolicyPathPair(PolicyInfo policy, List<FileStatus> srcPaths) {
|
|
||||||
this.policy = policy;
|
|
||||||
this.srcPaths = srcPaths;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
List<RaidPolicyPathPair> raidPolicyPathPairList = new ArrayList<RaidPolicyPathPair>();
|
|
||||||
|
|
||||||
private Job runningJob;
|
|
||||||
private String lastReport = null;
|
|
||||||
|
|
||||||
/** Responsible for generating splits of the src file list. */
|
|
||||||
static class DistRaidInputFormat extends
|
|
||||||
SequenceFileInputFormat<Text, PolicyInfo> {
|
|
||||||
/**
|
|
||||||
* Produce splits such that each is no greater than the quotient of the
|
|
||||||
* total size and the number of splits requested.
|
|
||||||
*
|
|
||||||
* @param job
|
|
||||||
* The handle to the Configuration object
|
|
||||||
* @param numSplits
|
|
||||||
* Number of splits requested
|
|
||||||
*/
|
|
||||||
public List<InputSplit> getSplits(JobContext job) throws IOException {
|
|
||||||
Configuration conf = job.getConfiguration();
|
|
||||||
|
|
||||||
// We create only one input file. So just get the first file in the first
|
|
||||||
// input directory.
|
|
||||||
Path inDir = getInputPaths(job)[0];
|
|
||||||
FileSystem fs = inDir.getFileSystem(conf);
|
|
||||||
FileStatus[] inputFiles = fs.listStatus(inDir);
|
|
||||||
Path inputFile = inputFiles[0].getPath();
|
|
||||||
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
SequenceFile.Reader in =
|
|
||||||
new SequenceFile.Reader(conf, Reader.file(inputFile));
|
|
||||||
long prev = 0L;
|
|
||||||
final int opsPerTask = conf.getInt(OPS_PER_TASK, DEFAULT_OPS_PER_TASK);
|
|
||||||
try {
|
|
||||||
Text key = new Text();
|
|
||||||
PolicyInfo value = new PolicyInfo();
|
|
||||||
int count = 0; // count src
|
|
||||||
while (in.next(key, value)) {
|
|
||||||
long curr = in.getPosition();
|
|
||||||
long delta = curr - prev;
|
|
||||||
if (++count > opsPerTask) {
|
|
||||||
count = 0;
|
|
||||||
splits.add(new FileSplit(inputFile, prev, delta, (String[]) null));
|
|
||||||
prev = curr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
in.close();
|
|
||||||
}
|
|
||||||
long remaining = fs.getFileStatus(inputFile).getLen() - prev;
|
|
||||||
if (remaining != 0) {
|
|
||||||
splits.add(new FileSplit(inputFile, prev, remaining, (String[]) null));
|
|
||||||
}
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** The mapper for raiding files. */
|
|
||||||
static class DistRaidMapper extends Mapper<Text, PolicyInfo, Text, Text> {
|
|
||||||
private boolean ignoreFailures = false;
|
|
||||||
|
|
||||||
private int failcount = 0;
|
|
||||||
private int succeedcount = 0;
|
|
||||||
private Statistics st = new Statistics();
|
|
||||||
|
|
||||||
private String getCountString() {
|
|
||||||
return "Succeeded: " + succeedcount + " Failed: " + failcount;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Run a FileOperation
|
|
||||||
* @throws IOException
|
|
||||||
* @throws InterruptedException */
|
|
||||||
public void map(Text key, PolicyInfo policy, Context context)
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
try {
|
|
||||||
Configuration jobConf = context.getConfiguration();
|
|
||||||
LOG.info("Raiding file=" + key.toString() + " policy=" + policy);
|
|
||||||
Path p = new Path(key.toString());
|
|
||||||
FileStatus fs = p.getFileSystem(jobConf).getFileStatus(p);
|
|
||||||
st.clear();
|
|
||||||
RaidNode.doRaid(jobConf, policy, fs, st, context);
|
|
||||||
|
|
||||||
++succeedcount;
|
|
||||||
|
|
||||||
context.getCounter(Counter.PROCESSED_BLOCKS).increment(st.numProcessedBlocks);
|
|
||||||
context.getCounter(Counter.PROCESSED_SIZE).increment(st.processedSize);
|
|
||||||
context.getCounter(Counter.META_BLOCKS).increment(st.numMetaBlocks);
|
|
||||||
context.getCounter(Counter.META_SIZE).increment(st.metaSize);
|
|
||||||
context.getCounter(Counter.FILES_SUCCEEDED).increment(1);
|
|
||||||
} catch (IOException e) {
|
|
||||||
++failcount;
|
|
||||||
context.getCounter(Counter.FILES_FAILED).increment(1);
|
|
||||||
|
|
||||||
String s = "FAIL: " + policy + ", " + key + " "
|
|
||||||
+ StringUtils.stringifyException(e);
|
|
||||||
context.write(new Text(key), new Text(s));
|
|
||||||
LOG.error(s);
|
|
||||||
} finally {
|
|
||||||
context.setStatus(getCountString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void close() throws IOException {
|
|
||||||
if (failcount == 0 || ignoreFailures) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
throw new IOException(getCountString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set options specified in raid.scheduleroption.
|
|
||||||
* The string should be formatted as key:value[,key:value]*
|
|
||||||
*/
|
|
||||||
static void setSchedulerOption(Configuration conf) {
|
|
||||||
String schedulerOption = conf.get("raid.scheduleroption");
|
|
||||||
if (schedulerOption != null) {
|
|
||||||
// Parse the scheduler option to get key:value pairs.
|
|
||||||
String[] keyValues = schedulerOption.trim().split(",");
|
|
||||||
for (String keyValue: keyValues) {
|
|
||||||
String[] fields = keyValue.trim().split(":");
|
|
||||||
String key = fields[0].trim();
|
|
||||||
String value = fields[1].trim();
|
|
||||||
conf.set(key, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new Job object.
|
|
||||||
* @param conf
|
|
||||||
* @return a Job object
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
static Job createJob(Configuration jobConf) throws IOException {
|
|
||||||
String jobName = NAME + " " + dateForm.format(new Date(RaidNode.now()));
|
|
||||||
|
|
||||||
setSchedulerOption(jobConf);
|
|
||||||
|
|
||||||
Job job = Job.getInstance(jobConf, jobName);
|
|
||||||
job.setSpeculativeExecution(false);
|
|
||||||
job.setJarByClass(DistRaid.class);
|
|
||||||
job.setInputFormatClass(DistRaidInputFormat.class);
|
|
||||||
job.setOutputKeyClass(Text.class);
|
|
||||||
job.setOutputValueClass(Text.class);
|
|
||||||
|
|
||||||
job.setMapperClass(DistRaidMapper.class);
|
|
||||||
job.setNumReduceTasks(0);
|
|
||||||
|
|
||||||
return job;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Add paths to be raided */
|
|
||||||
public void addRaidPaths(PolicyInfo info, List<FileStatus> paths) {
|
|
||||||
raidPolicyPathPairList.add(new RaidPolicyPathPair(info, paths));
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Invokes a map-reduce job do parallel raiding.
|
|
||||||
* @return true if the job was started, false otherwise
|
|
||||||
* @throws InterruptedException
|
|
||||||
*/
|
|
||||||
public boolean startDistRaid() throws IOException {
|
|
||||||
assert(raidPolicyPathPairList.size() > 0);
|
|
||||||
Job job = createJob(getConf());
|
|
||||||
createInputFile(job);
|
|
||||||
try {
|
|
||||||
job.submit();
|
|
||||||
this.runningJob = job;
|
|
||||||
LOG.info("Job Started: " + runningJob.getJobID());
|
|
||||||
return true;
|
|
||||||
} catch (ClassNotFoundException e) {
|
|
||||||
throw new IOException(e);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Checks if the map-reduce job has completed.
|
|
||||||
*
|
|
||||||
* @return true if the job completed, false otherwise.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public boolean checkComplete() throws IOException {
|
|
||||||
JobID jobID = runningJob.getJobID();
|
|
||||||
LOG.info("Checking job " + jobID);
|
|
||||||
try {
|
|
||||||
if (runningJob.isComplete()) {
|
|
||||||
// delete job directory
|
|
||||||
Configuration jobConf = runningJob.getConfiguration();
|
|
||||||
final String jobdir = jobConf.get(JOB_DIR_LABEL);
|
|
||||||
if (jobdir != null) {
|
|
||||||
final Path jobpath = new Path(jobdir);
|
|
||||||
jobpath.getFileSystem(jobConf).delete(jobpath, true);
|
|
||||||
}
|
|
||||||
if (runningJob.isSuccessful()) {
|
|
||||||
LOG.info("Job Complete(Succeeded): " + jobID);
|
|
||||||
} else {
|
|
||||||
LOG.error("Job Complete(Failed): " + jobID);
|
|
||||||
}
|
|
||||||
raidPolicyPathPairList.clear();
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
String report = (" job " + jobID +
|
|
||||||
" map " + StringUtils.formatPercent(runningJob.mapProgress(), 0)+
|
|
||||||
" reduce " + StringUtils.formatPercent(runningJob.reduceProgress(), 0));
|
|
||||||
if (!report.equals(lastReport)) {
|
|
||||||
LOG.info(report);
|
|
||||||
lastReport = report;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean successful() throws IOException {
|
|
||||||
try {
|
|
||||||
return runningJob.isSuccessful();
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* set up input file which has the list of input files.
|
|
||||||
*
|
|
||||||
* @return boolean
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private void createInputFile(Job job) throws IOException {
|
|
||||||
Configuration jobConf = job.getConfiguration();
|
|
||||||
Path jobDir = new Path(JOB_DIR_LABEL + getRandomId());
|
|
||||||
Path inDir = new Path(jobDir, "in");
|
|
||||||
Path outDir = new Path(jobDir, "out");
|
|
||||||
FileInputFormat.setInputPaths(job, inDir);
|
|
||||||
FileOutputFormat.setOutputPath(job, outDir);
|
|
||||||
Path opList = new Path(inDir, NAME);
|
|
||||||
|
|
||||||
Configuration tmp = new Configuration(jobConf);
|
|
||||||
// The control file should have small size blocks. This helps
|
|
||||||
// in spreading out the load from mappers that will be spawned.
|
|
||||||
tmp.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE);
|
|
||||||
FileSystem fs = opList.getFileSystem(tmp);
|
|
||||||
|
|
||||||
int opCount = 0, synCount = 0;
|
|
||||||
SequenceFile.Writer opWriter = null;
|
|
||||||
try {
|
|
||||||
opWriter = SequenceFile.createWriter(
|
|
||||||
jobConf, Writer.file(opList), Writer.keyClass(Text.class),
|
|
||||||
Writer.valueClass(PolicyInfo.class),
|
|
||||||
Writer.compression(SequenceFile.CompressionType.NONE));
|
|
||||||
for (RaidPolicyPathPair p : raidPolicyPathPairList) {
|
|
||||||
// If a large set of files are Raided for the first time, files
|
|
||||||
// in the same directory that tend to have the same size will end up
|
|
||||||
// with the same map. This shuffle mixes things up, allowing a better
|
|
||||||
// mix of files.
|
|
||||||
java.util.Collections.shuffle(p.srcPaths);
|
|
||||||
for (FileStatus st : p.srcPaths) {
|
|
||||||
opWriter.append(new Text(st.getPath().toString()), p.policy);
|
|
||||||
opCount++;
|
|
||||||
if (++synCount > SYNC_FILE_MAX) {
|
|
||||||
opWriter.sync();
|
|
||||||
synCount = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
if (opWriter != null) {
|
|
||||||
opWriter.close();
|
|
||||||
}
|
|
||||||
// increase replication for control file
|
|
||||||
fs.setReplication(opList, OP_LIST_REPLICATION);
|
|
||||||
}
|
|
||||||
raidPolicyPathPairList.clear();
|
|
||||||
LOG.info("Number of files=" + opCount);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,106 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
|
|
||||||
import org.apache.hadoop.util.Daemon;
|
|
||||||
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implementation of {@link RaidNode} that uses map reduce jobs to raid files.
|
|
||||||
*/
|
|
||||||
public class DistRaidNode extends RaidNode {
|
|
||||||
|
|
||||||
public static final Log LOG = LogFactory.getLog(DistRaidNode.class);
|
|
||||||
|
|
||||||
/** Daemon thread to monitor raid job progress */
|
|
||||||
JobMonitor jobMonitor = null;
|
|
||||||
Daemon jobMonitorThread = null;
|
|
||||||
|
|
||||||
public DistRaidNode(Configuration conf) throws IOException {
|
|
||||||
super(conf);
|
|
||||||
this.jobMonitor = new JobMonitor(conf);
|
|
||||||
this.jobMonitorThread = new Daemon(this.jobMonitor);
|
|
||||||
this.jobMonitorThread.start();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void join() {
|
|
||||||
super.join();
|
|
||||||
try {
|
|
||||||
if (jobMonitorThread != null) jobMonitorThread.join();
|
|
||||||
} catch (InterruptedException ie) {
|
|
||||||
// do nothing
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void stop() {
|
|
||||||
if (stopRequested) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
super.stop();
|
|
||||||
if (jobMonitor != null) jobMonitor.running = false;
|
|
||||||
if (jobMonitorThread != null) jobMonitorThread.interrupt();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
void raidFiles(PolicyInfo info, List<FileStatus> paths) throws IOException {
|
|
||||||
// We already checked that no job for this policy is running
|
|
||||||
// So we can start a new job.
|
|
||||||
DistRaid dr = new DistRaid(conf);
|
|
||||||
//add paths for distributed raiding
|
|
||||||
dr.addRaidPaths(info, paths);
|
|
||||||
boolean started = dr.startDistRaid();
|
|
||||||
if (started) {
|
|
||||||
jobMonitor.monitorJob(info.getName(), dr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDoc}
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
int getRunningJobsForPolicy(String policyName) {
|
|
||||||
return jobMonitor.runningJobsCount(policyName);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,350 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.hadoop.util.Progressable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Represents a generic encoder that can generate a parity file for a source
|
|
||||||
* file.
|
|
||||||
* This is an abstract class, concrete subclasses need to implement
|
|
||||||
* encodeFileImpl.
|
|
||||||
*/
|
|
||||||
public abstract class Encoder {
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.Encoder");
|
|
||||||
protected Configuration conf;
|
|
||||||
protected int stripeSize;
|
|
||||||
protected int paritySize;
|
|
||||||
protected Random rand;
|
|
||||||
protected int bufSize;
|
|
||||||
protected byte[][] readBufs;
|
|
||||||
protected byte[][] writeBufs;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A class that acts as a sink for data, similar to /dev/null.
|
|
||||||
*/
|
|
||||||
static class NullOutputStream extends OutputStream {
|
|
||||||
public void write(byte[] b) throws IOException {}
|
|
||||||
public void write(int b) throws IOException {}
|
|
||||||
public void write(byte[] b, int off, int len) throws IOException {}
|
|
||||||
}
|
|
||||||
|
|
||||||
Encoder(
|
|
||||||
Configuration conf, int stripeSize, int paritySize) {
|
|
||||||
this.conf = conf;
|
|
||||||
this.stripeSize = stripeSize;
|
|
||||||
this.paritySize = paritySize;
|
|
||||||
this.rand = new Random();
|
|
||||||
this.bufSize = conf.getInt("raid.encoder.bufsize", 1024 * 1024);
|
|
||||||
this.readBufs = new byte[stripeSize][];
|
|
||||||
this.writeBufs = new byte[paritySize][];
|
|
||||||
allocateBuffers();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void allocateBuffers() {
|
|
||||||
for (int i = 0; i < stripeSize; i++) {
|
|
||||||
readBufs[i] = new byte[bufSize];
|
|
||||||
}
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
writeBufs[i] = new byte[bufSize];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void configureBuffers(long blockSize) {
|
|
||||||
if ((long)bufSize > blockSize) {
|
|
||||||
bufSize = (int)blockSize;
|
|
||||||
allocateBuffers();
|
|
||||||
} else if (blockSize % bufSize != 0) {
|
|
||||||
bufSize = (int)(blockSize / 256L); // heuristic.
|
|
||||||
if (bufSize == 0) {
|
|
||||||
bufSize = 1024;
|
|
||||||
}
|
|
||||||
bufSize = Math.min(bufSize, 1024 * 1024);
|
|
||||||
allocateBuffers();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The interface to use to generate a parity file.
|
|
||||||
* This method can be called multiple times with the same Encoder object,
|
|
||||||
* thus allowing reuse of the buffers allocated by the Encoder object.
|
|
||||||
*
|
|
||||||
* @param fs The filesystem containing the source file.
|
|
||||||
* @param srcFile The source file.
|
|
||||||
* @param parityFile The parity file to be generated.
|
|
||||||
*/
|
|
||||||
public void encodeFile(
|
|
||||||
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
|
|
||||||
short parityRepl, Progressable reporter) throws IOException {
|
|
||||||
FileStatus srcStat = fs.getFileStatus(srcFile);
|
|
||||||
long srcSize = srcStat.getLen();
|
|
||||||
long blockSize = srcStat.getBlockSize();
|
|
||||||
|
|
||||||
configureBuffers(blockSize);
|
|
||||||
|
|
||||||
// Create a tmp file to which we will write first.
|
|
||||||
Path tmpDir = getParityTempPath();
|
|
||||||
if (!parityFs.mkdirs(tmpDir)) {
|
|
||||||
throw new IOException("Could not create tmp dir " + tmpDir);
|
|
||||||
}
|
|
||||||
Path parityTmp = new Path(tmpDir,
|
|
||||||
parityFile.getName() + rand.nextLong());
|
|
||||||
FSDataOutputStream out = parityFs.create(
|
|
||||||
parityTmp,
|
|
||||||
true,
|
|
||||||
conf.getInt("io.file.buffer.size", 64 * 1024),
|
|
||||||
parityRepl,
|
|
||||||
blockSize);
|
|
||||||
|
|
||||||
try {
|
|
||||||
encodeFileToStream(fs, srcFile, srcSize, blockSize, out, reporter);
|
|
||||||
out.close();
|
|
||||||
out = null;
|
|
||||||
LOG.info("Wrote temp parity file " + parityTmp);
|
|
||||||
|
|
||||||
// delete destination if exists
|
|
||||||
if (parityFs.exists(parityFile)){
|
|
||||||
parityFs.delete(parityFile, false);
|
|
||||||
}
|
|
||||||
parityFs.mkdirs(parityFile.getParent());
|
|
||||||
if (!parityFs.rename(parityTmp, parityFile)) {
|
|
||||||
String msg = "Unable to rename file " + parityTmp + " to " + parityFile;
|
|
||||||
throw new IOException (msg);
|
|
||||||
}
|
|
||||||
LOG.info("Wrote parity file " + parityFile);
|
|
||||||
} finally {
|
|
||||||
if (out != null) {
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
parityFs.delete(parityTmp, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recovers a corrupt block in a parity file to a local file.
|
|
||||||
*
|
|
||||||
* The encoder generates paritySize parity blocks for a source file stripe.
|
|
||||||
* Since we want only one of the parity blocks, this function creates
|
|
||||||
* null outputs for the blocks to be discarded.
|
|
||||||
*
|
|
||||||
* @param fs The filesystem in which both srcFile and parityFile reside.
|
|
||||||
* @param srcFile The source file.
|
|
||||||
* @param srcSize The size of the source file.
|
|
||||||
* @param blockSize The block size for the source/parity files.
|
|
||||||
* @param corruptOffset The location of corruption in the parity file.
|
|
||||||
* @param localBlockFile The destination for the reovered block.
|
|
||||||
*/
|
|
||||||
public void recoverParityBlockToFile(
|
|
||||||
FileSystem fs,
|
|
||||||
Path srcFile, long srcSize, long blockSize,
|
|
||||||
Path parityFile, long corruptOffset,
|
|
||||||
File localBlockFile) throws IOException {
|
|
||||||
OutputStream out = new FileOutputStream(localBlockFile);
|
|
||||||
try {
|
|
||||||
recoverParityBlockToStream(fs, srcFile, srcSize, blockSize, parityFile,
|
|
||||||
corruptOffset, out);
|
|
||||||
} finally {
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recovers a corrupt block in a parity file to a local file.
|
|
||||||
*
|
|
||||||
* The encoder generates paritySize parity blocks for a source file stripe.
|
|
||||||
* Since we want only one of the parity blocks, this function creates
|
|
||||||
* null outputs for the blocks to be discarded.
|
|
||||||
*
|
|
||||||
* @param fs The filesystem in which both srcFile and parityFile reside.
|
|
||||||
* @param srcFile The source file.
|
|
||||||
* @param srcSize The size of the source file.
|
|
||||||
* @param blockSize The block size for the source/parity files.
|
|
||||||
* @param corruptOffset The location of corruption in the parity file.
|
|
||||||
* @param out The destination for the reovered block.
|
|
||||||
*/
|
|
||||||
public void recoverParityBlockToStream(
|
|
||||||
FileSystem fs,
|
|
||||||
Path srcFile, long srcSize, long blockSize,
|
|
||||||
Path parityFile, long corruptOffset,
|
|
||||||
OutputStream out) throws IOException {
|
|
||||||
LOG.info("Recovering parity block" + parityFile + ":" + corruptOffset);
|
|
||||||
// Get the start offset of the corrupt block.
|
|
||||||
corruptOffset = (corruptOffset / blockSize) * blockSize;
|
|
||||||
// Output streams to each block in the parity file stripe.
|
|
||||||
OutputStream[] outs = new OutputStream[paritySize];
|
|
||||||
long indexOfCorruptBlockInParityStripe =
|
|
||||||
(corruptOffset / blockSize) % paritySize;
|
|
||||||
LOG.info("Index of corrupt block in parity stripe: " +
|
|
||||||
indexOfCorruptBlockInParityStripe);
|
|
||||||
// Create a real output stream for the block we want to recover,
|
|
||||||
// and create null streams for the rest.
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
if (indexOfCorruptBlockInParityStripe == i) {
|
|
||||||
outs[i] = out;
|
|
||||||
} else {
|
|
||||||
outs[i] = new NullOutputStream();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Get the stripe index and start offset of stripe.
|
|
||||||
long stripeIdx = corruptOffset / (paritySize * blockSize);
|
|
||||||
long stripeStart = stripeIdx * blockSize * stripeSize;
|
|
||||||
|
|
||||||
// Get input streams to each block in the source file stripe.
|
|
||||||
InputStream[] blocks = stripeInputs(fs, srcFile, stripeStart,
|
|
||||||
srcSize, blockSize);
|
|
||||||
LOG.info("Starting recovery by using source stripe " +
|
|
||||||
srcFile + ":" + stripeStart);
|
|
||||||
// Read the data from the blocks and write to the parity file.
|
|
||||||
encodeStripe(blocks, stripeStart, blockSize, outs,
|
|
||||||
new RaidUtils.DummyProgressable());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recovers a corrupt block in a parity file to an output stream.
|
|
||||||
*
|
|
||||||
* The encoder generates paritySize parity blocks for a source file stripe.
|
|
||||||
* Since there is only one output provided, some blocks are written out to
|
|
||||||
* files before being written out to the output.
|
|
||||||
*
|
|
||||||
* @param fs The filesystem in which both srcFile and parityFile reside.
|
|
||||||
* @param srcFile The source file.
|
|
||||||
* @param srcSize The size of the source file.
|
|
||||||
* @param blockSize The block size for the source/parity files.
|
|
||||||
* @param out The destination for the reovered block.
|
|
||||||
*/
|
|
||||||
private void encodeFileToStream(FileSystem fs, Path srcFile, long srcSize,
|
|
||||||
long blockSize, OutputStream out, Progressable reporter) throws IOException {
|
|
||||||
OutputStream[] tmpOuts = new OutputStream[paritySize];
|
|
||||||
// One parity block can be written directly to out, rest to local files.
|
|
||||||
tmpOuts[0] = out;
|
|
||||||
File[] tmpFiles = new File[paritySize - 1];
|
|
||||||
for (int i = 0; i < paritySize - 1; i++) {
|
|
||||||
tmpFiles[i] = File.createTempFile("parity", "_" + i);
|
|
||||||
LOG.info("Created tmp file " + tmpFiles[i]);
|
|
||||||
tmpFiles[i].deleteOnExit();
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
// Loop over stripes in the file.
|
|
||||||
for (long stripeStart = 0; stripeStart < srcSize;
|
|
||||||
stripeStart += blockSize * stripeSize) {
|
|
||||||
reporter.progress();
|
|
||||||
LOG.info("Starting encoding of stripe " + srcFile + ":" + stripeStart);
|
|
||||||
// Create input streams for blocks in the stripe.
|
|
||||||
InputStream[] blocks = stripeInputs(fs, srcFile, stripeStart,
|
|
||||||
srcSize, blockSize);
|
|
||||||
// Create output streams to the temp files.
|
|
||||||
for (int i = 0; i < paritySize - 1; i++) {
|
|
||||||
tmpOuts[i + 1] = new FileOutputStream(tmpFiles[i]);
|
|
||||||
}
|
|
||||||
// Call the implementation of encoding.
|
|
||||||
encodeStripe(blocks, stripeStart, blockSize, tmpOuts, reporter);
|
|
||||||
// Close output streams to the temp files and write the temp files
|
|
||||||
// to the output provided.
|
|
||||||
for (int i = 0; i < paritySize - 1; i++) {
|
|
||||||
tmpOuts[i + 1].close();
|
|
||||||
tmpOuts[i + 1] = null;
|
|
||||||
InputStream in = new FileInputStream(tmpFiles[i]);
|
|
||||||
RaidUtils.copyBytes(in, out, writeBufs[i], blockSize);
|
|
||||||
reporter.progress();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
for (int i = 0; i < paritySize - 1; i++) {
|
|
||||||
if (tmpOuts[i + 1] != null) {
|
|
||||||
tmpOuts[i + 1].close();
|
|
||||||
}
|
|
||||||
tmpFiles[i].delete();
|
|
||||||
LOG.info("Deleted tmp file " + tmpFiles[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return input streams for each block in a source file's stripe.
|
|
||||||
* @param fs The filesystem where the file resides.
|
|
||||||
* @param srcFile The source file.
|
|
||||||
* @param stripeStartOffset The start offset of the stripe.
|
|
||||||
* @param srcSize The size of the source file.
|
|
||||||
* @param blockSize The block size for the source file.
|
|
||||||
*/
|
|
||||||
protected InputStream[] stripeInputs(
|
|
||||||
FileSystem fs,
|
|
||||||
Path srcFile,
|
|
||||||
long stripeStartOffset,
|
|
||||||
long srcSize,
|
|
||||||
long blockSize
|
|
||||||
) throws IOException {
|
|
||||||
InputStream[] blocks = new InputStream[stripeSize];
|
|
||||||
for (int i = 0; i < stripeSize; i++) {
|
|
||||||
long seekOffset = stripeStartOffset + i * blockSize;
|
|
||||||
if (seekOffset < srcSize) {
|
|
||||||
FSDataInputStream in = fs.open(
|
|
||||||
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
|
|
||||||
in.seek(seekOffset);
|
|
||||||
LOG.info("Opening stream at " + srcFile + ":" + seekOffset);
|
|
||||||
blocks[i] = in;
|
|
||||||
} else {
|
|
||||||
LOG.info("Using zeros at offset " + seekOffset);
|
|
||||||
// We have no src data at this offset.
|
|
||||||
blocks[i] = new RaidUtils.ZeroInputStream(
|
|
||||||
seekOffset + blockSize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return blocks;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The implementation of generating parity data for a stripe.
|
|
||||||
*
|
|
||||||
* @param blocks The streams to blocks in the stripe.
|
|
||||||
* @param stripeStartOffset The start offset of the stripe
|
|
||||||
* @param blockSize The maximum size of a block.
|
|
||||||
* @param outs output streams to the parity blocks.
|
|
||||||
* @param reporter progress indicator.
|
|
||||||
*/
|
|
||||||
protected abstract void encodeStripe(
|
|
||||||
InputStream[] blocks,
|
|
||||||
long stripeStartOffset,
|
|
||||||
long blockSize,
|
|
||||||
OutputStream[] outs,
|
|
||||||
Progressable reporter) throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the temp path for the parity file
|
|
||||||
*/
|
|
||||||
protected abstract Path getParityTempPath();
|
|
||||||
}
|
|
|
@ -1,60 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
public interface ErasureCode {
|
|
||||||
/**
|
|
||||||
* Encodes the given message.
|
|
||||||
* @param message The data of the message. The data is present in the least
|
|
||||||
* significant bits of each int. The number of data bits is
|
|
||||||
* symbolSize(). The number of elements of message is
|
|
||||||
* stripeSize().
|
|
||||||
* @param parity (out) The information is present in the least
|
|
||||||
* significant bits of each int. The number of parity bits is
|
|
||||||
* symbolSize(). The number of elements in the code is
|
|
||||||
* paritySize().
|
|
||||||
*/
|
|
||||||
public void encode(int[] message, int[] parity);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generates missing portions of data.
|
|
||||||
* @param data The message and parity. The parity should be placed in the
|
|
||||||
* first part of the array. In each integer, the relevant portion
|
|
||||||
* is present in the least significant bits of each int.
|
|
||||||
* The number of elements in data is stripeSize() + paritySize().
|
|
||||||
* @param erasedLocations The indexes in data which are not available.
|
|
||||||
* @param erasedValues (out)The decoded values corresponding to erasedLocations.
|
|
||||||
*/
|
|
||||||
public void decode(int[] data, int[] erasedLocations, int[] erasedValues);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The number of elements in the message.
|
|
||||||
*/
|
|
||||||
public int stripeSize();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The number of elements in the code.
|
|
||||||
*/
|
|
||||||
public int paritySize();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Number of bits for each symbol.
|
|
||||||
*/
|
|
||||||
public int symbolSize();
|
|
||||||
}
|
|
|
@ -1,350 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implementation of Galois field arithmetics with 2^p elements.
|
|
||||||
* The input must be unsigned integers.
|
|
||||||
*/
|
|
||||||
public class GaloisField {
|
|
||||||
|
|
||||||
private final int[] logTable;
|
|
||||||
private final int[] powTable;
|
|
||||||
private final int[][] mulTable;
|
|
||||||
private final int[][] divTable;
|
|
||||||
private final int fieldSize;
|
|
||||||
private final int primitivePeriod;
|
|
||||||
private final int primitivePolynomial;
|
|
||||||
|
|
||||||
// Field size 256 is good for byte based system
|
|
||||||
private static final int DEFAULT_FIELD_SIZE = 256;
|
|
||||||
// primitive polynomial 1 + X^2 + X^3 + X^4 + X^8
|
|
||||||
private static final int DEFAULT_PRIMITIVE_POLYNOMIAL = 285;
|
|
||||||
|
|
||||||
static private final Map<Integer, GaloisField> instances =
|
|
||||||
new HashMap<Integer, GaloisField>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the object performs Galois field arithmetics
|
|
||||||
* @param fieldSize size of the field
|
|
||||||
* @param primitivePolynomial a primitive polynomial corresponds to the size
|
|
||||||
*/
|
|
||||||
public static GaloisField getInstance(int fieldSize,
|
|
||||||
int primitivePolynomial) {
|
|
||||||
int key = ((fieldSize << 16) & 0xFFFF0000) + (primitivePolynomial & 0x0000FFFF);
|
|
||||||
GaloisField gf;
|
|
||||||
synchronized (instances) {
|
|
||||||
gf = instances.get(key);
|
|
||||||
if (gf == null) {
|
|
||||||
gf = new GaloisField(fieldSize, primitivePolynomial);
|
|
||||||
instances.put(key, gf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return gf;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the object performs Galois field arithmetics with default setting
|
|
||||||
*/
|
|
||||||
public static GaloisField getInstance() {
|
|
||||||
return getInstance(DEFAULT_FIELD_SIZE, DEFAULT_PRIMITIVE_POLYNOMIAL);
|
|
||||||
}
|
|
||||||
|
|
||||||
private GaloisField(int fieldSize, int primitivePolynomial) {
|
|
||||||
assert fieldSize > 0;
|
|
||||||
assert primitivePolynomial > 0;
|
|
||||||
|
|
||||||
this.fieldSize = fieldSize;
|
|
||||||
this.primitivePeriod = fieldSize - 1;
|
|
||||||
this.primitivePolynomial = primitivePolynomial;
|
|
||||||
logTable = new int[fieldSize];
|
|
||||||
powTable = new int[fieldSize];
|
|
||||||
mulTable = new int[fieldSize][fieldSize];
|
|
||||||
divTable = new int[fieldSize][fieldSize];
|
|
||||||
int value = 1;
|
|
||||||
for (int pow = 0; pow < fieldSize - 1; pow++) {
|
|
||||||
powTable[pow] = value;
|
|
||||||
logTable[value] = pow;
|
|
||||||
value = value * 2;
|
|
||||||
if (value >= fieldSize) {
|
|
||||||
value = value ^ primitivePolynomial;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// building multiplication table
|
|
||||||
for (int i = 0; i < fieldSize; i++) {
|
|
||||||
for (int j = 0; j < fieldSize; j++) {
|
|
||||||
if (i == 0 || j == 0) {
|
|
||||||
mulTable[i][j] = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int z = logTable[i] + logTable[j];
|
|
||||||
z = z >= primitivePeriod ? z - primitivePeriod : z;
|
|
||||||
z = powTable[z];
|
|
||||||
mulTable[i][j] = z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// building division table
|
|
||||||
for (int i = 0; i < fieldSize; i++) {
|
|
||||||
for (int j = 1; j < fieldSize; j++) {
|
|
||||||
if (i == 0) {
|
|
||||||
divTable[i][j] = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int z = logTable[i] - logTable[j];
|
|
||||||
z = z < 0 ? z + primitivePeriod : z;
|
|
||||||
z = powTable[z];
|
|
||||||
divTable[i][j] = z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return number of elements in the field
|
|
||||||
* @return number of elements in the field
|
|
||||||
*/
|
|
||||||
public int getFieldSize() {
|
|
||||||
return fieldSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the primitive polynomial in GF(2)
|
|
||||||
* @return primitive polynomial as a integer
|
|
||||||
*/
|
|
||||||
public int getPrimitivePolynomial() {
|
|
||||||
return primitivePolynomial;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the sum of two fields
|
|
||||||
* @param x input field
|
|
||||||
* @param y input field
|
|
||||||
* @return result of addition
|
|
||||||
*/
|
|
||||||
public int add(int x, int y) {
|
|
||||||
assert(x >= 0 && x < getFieldSize() && y >= 0 && y < getFieldSize());
|
|
||||||
return x ^ y;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the multiplication of two fields
|
|
||||||
* @param x input field
|
|
||||||
* @param y input field
|
|
||||||
* @return result of multiplication
|
|
||||||
*/
|
|
||||||
public int multiply(int x, int y) {
|
|
||||||
assert(x >= 0 && x < getFieldSize() && y >= 0 && y < getFieldSize());
|
|
||||||
return mulTable[x][y];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the division of two fields
|
|
||||||
* @param x input field
|
|
||||||
* @param y input field
|
|
||||||
* @return x/y
|
|
||||||
*/
|
|
||||||
public int divide(int x, int y) {
|
|
||||||
assert(x >= 0 && x < getFieldSize() && y > 0 && y < getFieldSize());
|
|
||||||
return divTable[x][y];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute power n of a field
|
|
||||||
* @param x input field
|
|
||||||
* @param n power
|
|
||||||
* @return x^n
|
|
||||||
*/
|
|
||||||
public int power(int x, int n) {
|
|
||||||
assert(x >= 0 && x < getFieldSize());
|
|
||||||
if (n == 0) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
if (x == 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
x = logTable[x] * n;
|
|
||||||
if (x < primitivePeriod) {
|
|
||||||
return powTable[x];
|
|
||||||
}
|
|
||||||
x = x % primitivePeriod;
|
|
||||||
return powTable[x];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Given a Vandermonde matrix V[i][j]=x[j]^i and vector y, solve for z such
|
|
||||||
* that Vz=y. The output z will be placed in y.
|
|
||||||
* @param x the vector which describe the Vandermonde matrix
|
|
||||||
* @param y right-hand side of the Vandermonde system equation.
|
|
||||||
* will be replaced the output in this vector
|
|
||||||
*/
|
|
||||||
public void solveVandermondeSystem(int[] x, int[] y) {
|
|
||||||
solveVandermondeSystem(x, y, x.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Given a Vandermonde matrix V[i][j]=x[j]^i and vector y, solve for z such
|
|
||||||
* that Vz=y. The output z will be placed in y.
|
|
||||||
* @param x the vector which describe the Vandermonde matrix
|
|
||||||
* @param y right-hand side of the Vandermonde system equation.
|
|
||||||
* will be replaced the output in this vector
|
|
||||||
* @param len consider x and y only from 0...len-1
|
|
||||||
*/
|
|
||||||
public void solveVandermondeSystem(int[] x, int[] y, int len) {
|
|
||||||
assert(y.length <= len);
|
|
||||||
for (int i = 0; i < len - 1; i++) {
|
|
||||||
for (int j = len - 1; j > i; j--) {
|
|
||||||
y[j] = y[j] ^ mulTable[x[i]][y[j - 1]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = len - 1; i >= 0; i--) {
|
|
||||||
for (int j = i + 1; j < len; j++) {
|
|
||||||
y[j] = divTable[y[j]][x[j] ^ x[j - i - 1]];
|
|
||||||
}
|
|
||||||
for (int j = i; j < len - 1; j++) {
|
|
||||||
y[j] = y[j] ^ y[j + 1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the multiplication of two polynomials. The index in the
|
|
||||||
* array corresponds to the power of the entry. For example p[0] is the
|
|
||||||
* constant term of the polynomial p.
|
|
||||||
* @param p input polynomial
|
|
||||||
* @param q input polynomial
|
|
||||||
* @return polynomial represents p*q
|
|
||||||
*/
|
|
||||||
public int[] multiply(int[] p, int[] q) {
|
|
||||||
int len = p.length + q.length - 1;
|
|
||||||
int[] result = new int[len];
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
result[i] = 0;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < p.length; i++) {
|
|
||||||
for (int j = 0; j < q.length; j++) {
|
|
||||||
result[i + j] = add(result[i + j], multiply(p[i], q[j]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the remainder of a dividend and divisor pair. The index in the
|
|
||||||
* array corresponds to the power of the entry. For example p[0] is the
|
|
||||||
* constant term of the polynomial p.
|
|
||||||
* @param dividend dividend polynomial, the remainder will be placed here when return
|
|
||||||
* @param divisor divisor polynomial
|
|
||||||
*/
|
|
||||||
public void remainder(int[] dividend, int[] divisor) {
|
|
||||||
for (int i = dividend.length - divisor.length; i >= 0; i--) {
|
|
||||||
int ratio =
|
|
||||||
divTable[dividend[i + divisor.length - 1]][divisor[divisor.length - 1]];
|
|
||||||
for (int j = 0; j < divisor.length; j++) {
|
|
||||||
int k = j + i;
|
|
||||||
dividend[k] = dividend[k] ^ mulTable[ratio][divisor[j]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the sum of two polynomials. The index in the
|
|
||||||
* array corresponds to the power of the entry. For example p[0] is the
|
|
||||||
* constant term of the polynomial p.
|
|
||||||
* @param p input polynomial
|
|
||||||
* @param q input polynomial
|
|
||||||
* @return polynomial represents p+q
|
|
||||||
*/
|
|
||||||
public int[] add(int[] p, int[] q) {
|
|
||||||
int len = Math.max(p.length, q.length);
|
|
||||||
int[] result = new int[len];
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
if (i < p.length && i < q.length) {
|
|
||||||
result[i] = add(p[i], q[i]);
|
|
||||||
} else if (i < p.length) {
|
|
||||||
result[i] = p[i];
|
|
||||||
} else {
|
|
||||||
result[i] = q[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Substitute x into polynomial p(x).
|
|
||||||
* @param p input polynomial
|
|
||||||
* @param x input field
|
|
||||||
* @return p(x)
|
|
||||||
*/
|
|
||||||
public int substitute(int[] p, int x) {
|
|
||||||
int result = 0;
|
|
||||||
int y = 1;
|
|
||||||
for (int i = 0; i < p.length; i++) {
|
|
||||||
result = result ^ mulTable[p[i]][y];
|
|
||||||
y = mulTable[x][y];
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Perform Gaussian elimination on the given matrix. This matrix has to be a
|
|
||||||
* fat matrix (number of rows > number of columns).
|
|
||||||
*/
|
|
||||||
public void gaussianElimination(int[][] matrix) {
|
|
||||||
assert(matrix != null && matrix.length > 0 && matrix[0].length > 0
|
|
||||||
&& matrix.length < matrix[0].length);
|
|
||||||
int height = matrix.length;
|
|
||||||
int width = matrix[0].length;
|
|
||||||
for (int i = 0; i < height; i++) {
|
|
||||||
boolean pivotFound = false;
|
|
||||||
// scan the column for a nonzero pivot and swap it to the diagonal
|
|
||||||
for (int j = i; j < height; j++) {
|
|
||||||
if (matrix[i][j] != 0) {
|
|
||||||
int[] tmp = matrix[i];
|
|
||||||
matrix[i] = matrix[j];
|
|
||||||
matrix[j] = tmp;
|
|
||||||
pivotFound = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!pivotFound) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int pivot = matrix[i][i];
|
|
||||||
for (int j = i; j < width; j++) {
|
|
||||||
matrix[i][j] = divide(matrix[i][j], pivot);
|
|
||||||
}
|
|
||||||
for (int j = i + 1; j < height; j++) {
|
|
||||||
int lead = matrix[j][i];
|
|
||||||
for (int k = i; k < width; k++) {
|
|
||||||
matrix[j][k] = add(matrix[j][k], multiply(lead, matrix[i][k]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = height - 1; i >=0; i--) {
|
|
||||||
for (int j = 0; j < i; j++) {
|
|
||||||
int lead = matrix[j][i];
|
|
||||||
for (int k = i; k < width; k++) {
|
|
||||||
matrix[j][k] = add(matrix[j][k], multiply(lead, matrix[i][k]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,144 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.net.URLDecoder;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
|
|
||||||
import org.apache.hadoop.util.LineReader;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Represents the contents of a HAR Index file. The HAR is assumed to be
|
|
||||||
* comprising of RAID parity files only and no directories.
|
|
||||||
*/
|
|
||||||
public class HarIndex {
|
|
||||||
public static final String indexFileName = "_index";
|
|
||||||
private List<IndexEntry> entries = new LinkedList<IndexEntry>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Represents information in a single line of the HAR index file.
|
|
||||||
*/
|
|
||||||
public static class IndexEntry {
|
|
||||||
String fileName; // Name of the file in the part file.
|
|
||||||
long startOffset; // Start offset within the part file.
|
|
||||||
long length; // Length of this file within the part file.
|
|
||||||
long mtime; // Modification time of the file.
|
|
||||||
String partFileName; // Name of the part file.
|
|
||||||
|
|
||||||
IndexEntry(String fileName, long startOffset, long length,
|
|
||||||
long mtime, String partFileName) {
|
|
||||||
this.fileName = fileName;
|
|
||||||
this.startOffset = startOffset;
|
|
||||||
this.length = length;
|
|
||||||
this.mtime = mtime;
|
|
||||||
this.partFileName = partFileName;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return "fileName=" + fileName +
|
|
||||||
", startOffset=" + startOffset +
|
|
||||||
", length=" + length +
|
|
||||||
", mtime=" + mtime +
|
|
||||||
", partFileName=" + partFileName;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor that reads the contents of the index file.
|
|
||||||
* @param in An input stream to the index file.
|
|
||||||
* @param max The size of the index file.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public HarIndex(InputStream in, long max) throws IOException {
|
|
||||||
LineReader lineReader = new LineReader(in);
|
|
||||||
Text text = new Text();
|
|
||||||
long nread = 0;
|
|
||||||
while (nread < max) {
|
|
||||||
int n = lineReader.readLine(text);
|
|
||||||
nread += n;
|
|
||||||
String line = text.toString();
|
|
||||||
try {
|
|
||||||
parseLine(line);
|
|
||||||
} catch (UnsupportedEncodingException e) {
|
|
||||||
throw new IOException("UnsupportedEncodingException after reading " +
|
|
||||||
nread + "bytes");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses each line and extracts relevant information.
|
|
||||||
* @param line
|
|
||||||
* @throws UnsupportedEncodingException
|
|
||||||
*/
|
|
||||||
void parseLine(String line) throws UnsupportedEncodingException {
|
|
||||||
String[] splits = line.split(" ");
|
|
||||||
|
|
||||||
boolean isDir = "dir".equals(splits[1]) ? true: false;
|
|
||||||
if (!isDir && splits.length >= 6) {
|
|
||||||
String name = URLDecoder.decode(splits[0], "UTF-8");
|
|
||||||
String partName = URLDecoder.decode(splits[2], "UTF-8");
|
|
||||||
long startIndex = Long.parseLong(splits[3]);
|
|
||||||
long length = Long.parseLong(splits[4]);
|
|
||||||
String[] newsplits = URLDecoder.decode(splits[5],"UTF-8").split(" ");
|
|
||||||
if (newsplits != null && newsplits.length >= 4) {
|
|
||||||
long mtime = Long.parseLong(newsplits[0]);
|
|
||||||
IndexEntry entry = new IndexEntry(
|
|
||||||
name, startIndex, length, mtime, partName);
|
|
||||||
entries.add(entry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Finds the index entry corresponding to a HAR partFile at an offset.
|
|
||||||
* @param partName The name of the part file (part-*).
|
|
||||||
* @param partFileOffset The offset into the part file.
|
|
||||||
* @return The entry corresponding to partName:partFileOffset.
|
|
||||||
*/
|
|
||||||
public IndexEntry findEntry(String partName, long partFileOffset) {
|
|
||||||
for (IndexEntry e: entries) {
|
|
||||||
boolean nameMatch = partName.equals(e.partFileName);
|
|
||||||
boolean inRange = (partFileOffset >= e.startOffset) &&
|
|
||||||
(partFileOffset < e.startOffset + e.length);
|
|
||||||
if (nameMatch && inRange) {
|
|
||||||
return e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Finds the index entry corresponding to a file in the archive
|
|
||||||
*/
|
|
||||||
public IndexEntry findEntryByFileName(String fileName) {
|
|
||||||
for (IndexEntry e: entries) {
|
|
||||||
if (fileName.equals(e.fileName)) {
|
|
||||||
return e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,211 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Periodically monitors the status of jobs registered with it.
|
|
||||||
*
|
|
||||||
* Jobs that are submitted for the same policy name are kept in the same list,
|
|
||||||
* and the list itself is kept in a map that has the policy name as the key and
|
|
||||||
* the list as value.
|
|
||||||
*/
|
|
||||||
class JobMonitor implements Runnable {
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.JobMonitor");
|
|
||||||
|
|
||||||
volatile boolean running = true;
|
|
||||||
|
|
||||||
private Map<String, List<DistRaid>> jobs;
|
|
||||||
public static final String JOBMONITOR_INTERVAL_KEY = "raid.jobmonitor.interval";
|
|
||||||
private long jobMonitorInterval;
|
|
||||||
private volatile long jobsMonitored = 0;
|
|
||||||
private volatile long jobsSucceeded = 0;
|
|
||||||
|
|
||||||
public JobMonitor(Configuration conf) {
|
|
||||||
jobMonitorInterval = conf.getLong(JOBMONITOR_INTERVAL_KEY, 60000);
|
|
||||||
jobs = new java.util.HashMap<String, List<DistRaid>>();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void run() {
|
|
||||||
while (running) {
|
|
||||||
try {
|
|
||||||
LOG.info("JobMonitor thread continuing to run...");
|
|
||||||
doMonitor();
|
|
||||||
} catch (Throwable e) {
|
|
||||||
LOG.error("JobMonitor encountered exception " +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
// All expected exceptions are caught by doMonitor(). It is better
|
|
||||||
// to exit now, this will prevent RaidNode from submitting more jobs
|
|
||||||
// since the number of running jobs will never decrease.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Periodically checks status of running map-reduce jobs.
|
|
||||||
*/
|
|
||||||
public void doMonitor() {
|
|
||||||
while (running) {
|
|
||||||
String[] keys = null;
|
|
||||||
// Make a copy of the names of the current jobs.
|
|
||||||
synchronized(jobs) {
|
|
||||||
keys = jobs.keySet().toArray(new String[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check all the jobs. We do not want to block access to `jobs`
|
|
||||||
// because that will prevent new jobs from being added.
|
|
||||||
// This is safe because JobMonitor.run is the only code that can
|
|
||||||
// remove a job from `jobs`. Thus all elements in `keys` will have
|
|
||||||
// valid values.
|
|
||||||
Map<String, List<DistRaid>> finishedJobs =
|
|
||||||
new HashMap<String, List<DistRaid>>();
|
|
||||||
|
|
||||||
for (String key: keys) {
|
|
||||||
// For each policy being monitored, get the list of jobs running.
|
|
||||||
DistRaid[] jobListCopy = null;
|
|
||||||
synchronized(jobs) {
|
|
||||||
List<DistRaid> jobList = jobs.get(key);
|
|
||||||
synchronized(jobList) {
|
|
||||||
jobListCopy = jobList.toArray(new DistRaid[jobList.size()]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// The code that actually contacts the JobTracker is not synchronized,
|
|
||||||
// it uses copies of the list of jobs.
|
|
||||||
for (DistRaid job: jobListCopy) {
|
|
||||||
// Check each running job.
|
|
||||||
try {
|
|
||||||
boolean complete = job.checkComplete();
|
|
||||||
if (complete) {
|
|
||||||
addJob(finishedJobs, key, job);
|
|
||||||
if (job.successful()) {
|
|
||||||
jobsSucceeded++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
// If there was an error, consider the job finished.
|
|
||||||
addJob(finishedJobs, key, job);
|
|
||||||
LOG.error("JobMonitor exception", ioe);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (finishedJobs.size() > 0) {
|
|
||||||
for (String key: finishedJobs.keySet()) {
|
|
||||||
List<DistRaid> finishedJobList = finishedJobs.get(key);
|
|
||||||
// Iterate through finished jobs and remove from jobs.
|
|
||||||
// removeJob takes care of locking.
|
|
||||||
for (DistRaid job: finishedJobList) {
|
|
||||||
removeJob(jobs, key, job);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
Thread.sleep(jobMonitorInterval);
|
|
||||||
} catch (InterruptedException ie) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public int runningJobsCount(String key) {
|
|
||||||
int count = 0;
|
|
||||||
synchronized(jobs) {
|
|
||||||
if (jobs.containsKey(key)) {
|
|
||||||
List<DistRaid> jobList = jobs.get(key);
|
|
||||||
synchronized(jobList) {
|
|
||||||
count = jobList.size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void monitorJob(String key, DistRaid job) {
|
|
||||||
addJob(jobs, key, job);
|
|
||||||
jobsMonitored++;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long jobsMonitored() {
|
|
||||||
return this.jobsMonitored;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long jobsSucceeded() {
|
|
||||||
return this.jobsSucceeded;
|
|
||||||
}
|
|
||||||
|
|
||||||
// For test code
|
|
||||||
int runningJobsCount() {
|
|
||||||
int total = 0;
|
|
||||||
synchronized(jobs) {
|
|
||||||
for (String key: jobs.keySet()) {
|
|
||||||
total += jobs.get(key).size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return total;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void addJob(Map<String, List<DistRaid>> jobsMap,
|
|
||||||
String jobName, DistRaid job) {
|
|
||||||
synchronized(jobsMap) {
|
|
||||||
List<DistRaid> list = null;
|
|
||||||
if (jobsMap.containsKey(jobName)) {
|
|
||||||
list = jobsMap.get(jobName);
|
|
||||||
} else {
|
|
||||||
list = new LinkedList<DistRaid>();
|
|
||||||
jobsMap.put(jobName, list);
|
|
||||||
}
|
|
||||||
synchronized(list) {
|
|
||||||
list.add(job);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void removeJob(Map<String, List<DistRaid>> jobsMap,
|
|
||||||
String jobName, DistRaid job) {
|
|
||||||
synchronized(jobsMap) {
|
|
||||||
if (jobsMap.containsKey(jobName)) {
|
|
||||||
List<DistRaid> list = jobsMap.get(jobName);
|
|
||||||
synchronized(list) {
|
|
||||||
for (Iterator<DistRaid> it = list.iterator(); it.hasNext(); ) {
|
|
||||||
DistRaid val = it.next();
|
|
||||||
if (val == job) {
|
|
||||||
it.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (list.size() == 0) {
|
|
||||||
jobsMap.remove(jobName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,171 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
|
||||||
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
|
|
||||||
import org.apache.hadoop.net.NetUtils;
|
|
||||||
|
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
||||||
|
|
||||||
import org.apache.hadoop.raid.RaidNode;
|
|
||||||
import org.apache.hadoop.raid.RaidUtils;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This class fixes source file blocks using the parity file,
|
|
||||||
* and parity file blocks using the source file.
|
|
||||||
* It periodically fetches the list of corrupt files from the namenode,
|
|
||||||
* and figures out the location of the bad block by reading through
|
|
||||||
* the corrupt file.
|
|
||||||
*/
|
|
||||||
public class LocalBlockFixer extends BlockFixer {
|
|
||||||
public static final Log LOG = LogFactory.getLog(LocalBlockFixer.class);
|
|
||||||
|
|
||||||
private java.util.HashMap<String, java.util.Date> history;
|
|
||||||
|
|
||||||
private BlockFixerHelper helper;
|
|
||||||
|
|
||||||
public LocalBlockFixer(Configuration conf) throws IOException {
|
|
||||||
super(conf);
|
|
||||||
history = new java.util.HashMap<String, java.util.Date>();
|
|
||||||
helper = new BlockFixerHelper(getConf());
|
|
||||||
}
|
|
||||||
|
|
||||||
public void run() {
|
|
||||||
while (running) {
|
|
||||||
try {
|
|
||||||
LOG.info("LocalBlockFixer continuing to run...");
|
|
||||||
doFix();
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.error(StringUtils.stringifyException(e));
|
|
||||||
} catch (Error err) {
|
|
||||||
LOG.error("Exiting after encountering " +
|
|
||||||
StringUtils.stringifyException(err));
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void doFix() throws InterruptedException, IOException {
|
|
||||||
while (running) {
|
|
||||||
// Sleep before proceeding to fix files.
|
|
||||||
Thread.sleep(blockFixInterval);
|
|
||||||
|
|
||||||
// Purge history older than the history interval.
|
|
||||||
purgeHistory();
|
|
||||||
|
|
||||||
List<Path> corruptFiles = getCorruptFiles();
|
|
||||||
|
|
||||||
filterUnfixableSourceFiles(corruptFiles.iterator());
|
|
||||||
|
|
||||||
if (corruptFiles.isEmpty()) {
|
|
||||||
// If there are no corrupt files, retry after some time.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
LOG.info("Found " + corruptFiles.size() + " corrupt files.");
|
|
||||||
|
|
||||||
helper.sortCorruptFiles(corruptFiles);
|
|
||||||
|
|
||||||
for (Path srcPath: corruptFiles) {
|
|
||||||
if (!running) break;
|
|
||||||
try {
|
|
||||||
boolean fixed = helper.fixFile(srcPath);
|
|
||||||
LOG.info("Adding " + srcPath + " to history");
|
|
||||||
history.put(srcPath.toString(), new java.util.Date());
|
|
||||||
if (fixed) {
|
|
||||||
incrFilesFixed();
|
|
||||||
}
|
|
||||||
} catch (IOException ie) {
|
|
||||||
LOG.error("Hit error while processing " + srcPath +
|
|
||||||
": " + StringUtils.stringifyException(ie));
|
|
||||||
// Do nothing, move on to the next file.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* We maintain history of fixed files because a fixed file may appear in
|
|
||||||
* the list of corrupt files if we loop around too quickly.
|
|
||||||
* This function removes the old items in the history so that we can
|
|
||||||
* recognize files that have actually become corrupt since being fixed.
|
|
||||||
*/
|
|
||||||
void purgeHistory() {
|
|
||||||
java.util.Date cutOff = new java.util.Date(Time.now() -
|
|
||||||
historyInterval);
|
|
||||||
List<String> toRemove = new java.util.ArrayList<String>();
|
|
||||||
|
|
||||||
for (String key: history.keySet()) {
|
|
||||||
java.util.Date item = history.get(key);
|
|
||||||
if (item.before(cutOff)) {
|
|
||||||
toRemove.add(key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (String key: toRemove) {
|
|
||||||
LOG.info("Removing " + key + " from history");
|
|
||||||
history.remove(key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return A list of corrupt files as obtained from the namenode
|
|
||||||
*/
|
|
||||||
List<Path> getCorruptFiles() throws IOException {
|
|
||||||
DistributedFileSystem dfs = helper.getDFS(new Path("/"));
|
|
||||||
|
|
||||||
String[] files = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
List<Path> corruptFiles = new LinkedList<Path>();
|
|
||||||
for (String f: files) {
|
|
||||||
Path p = new Path(f);
|
|
||||||
if (!history.containsKey(p.toString())) {
|
|
||||||
corruptFiles.add(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
RaidUtils.filterTrash(getConf(), corruptFiles);
|
|
||||||
return corruptFiles;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,60 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implementation of {@link RaidNode} that performs raiding locally.
|
|
||||||
*/
|
|
||||||
public class LocalRaidNode extends RaidNode {
|
|
||||||
|
|
||||||
public static final Log LOG = LogFactory.getLog(LocalRaidNode.class);
|
|
||||||
|
|
||||||
public LocalRaidNode(Configuration conf) throws IOException {
|
|
||||||
super(conf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDocs}
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
void raidFiles(PolicyInfo info, List<FileStatus> paths) throws IOException {
|
|
||||||
doRaid(conf, info, paths);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@inheritDocs}
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
int getRunningJobsForPolicy(String policyName) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,151 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.hadoop.util.Progressable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wraps over multiple input streams and provides an input stream that is
|
|
||||||
* an XOR of the streams.
|
|
||||||
*/
|
|
||||||
class ParityInputStream extends InputStream {
|
|
||||||
private static final int DEFAULT_BUFSIZE = 5*1024*1024;
|
|
||||||
private InputStream[] streams;
|
|
||||||
private byte[] xor;
|
|
||||||
private byte[] buf;
|
|
||||||
private int bufSize;
|
|
||||||
private long remaining;
|
|
||||||
private int available = 0;
|
|
||||||
private int readPos = 0;
|
|
||||||
|
|
||||||
public ParityInputStream(
|
|
||||||
InputStream[] streams, long parityBlockSize, byte[] buf, byte[] xor) {
|
|
||||||
assert buf.length == xor.length;
|
|
||||||
bufSize = buf.length;
|
|
||||||
this.streams = streams;
|
|
||||||
remaining = parityBlockSize;
|
|
||||||
this.buf = buf;
|
|
||||||
this.xor = xor;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int read() throws IOException {
|
|
||||||
makeAvailable();
|
|
||||||
if (available == 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
int ret = xor[readPos];
|
|
||||||
readPos++;
|
|
||||||
available--;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int read(byte b[], int off, int len) throws IOException {
|
|
||||||
makeAvailable();
|
|
||||||
if (available == 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
int ret = Math.min(len, available);
|
|
||||||
for (int i = 0; i < ret; ++i) {
|
|
||||||
b[off+i] = xor[readPos+i];
|
|
||||||
}
|
|
||||||
readPos += ret;
|
|
||||||
available -= ret;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
for (InputStream i: streams) {
|
|
||||||
i.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Send the contents of the stream to the sink.
|
|
||||||
* @param sink
|
|
||||||
* @param reporter
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public void drain(OutputStream sink, Progressable reporter)
|
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
makeAvailable();
|
|
||||||
if (available == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
sink.write(xor, readPos, available);
|
|
||||||
available = 0;
|
|
||||||
if (reporter != null) {
|
|
||||||
reporter.progress();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Make some bytes available for reading in the internal buffer.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private void makeAvailable() throws IOException {
|
|
||||||
if (available > 0 || remaining <= 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Read some bytes from the first stream.
|
|
||||||
int xorlen = (int)Math.min(remaining, bufSize);
|
|
||||||
readExact(streams[0], xor, xorlen);
|
|
||||||
|
|
||||||
// Read bytes from all the other streams and xor them.
|
|
||||||
for (int i = 1; i < streams.length; i++) {
|
|
||||||
readExact(streams[i], buf, xorlen);
|
|
||||||
|
|
||||||
for (int j = 0; j < xorlen; j++) {
|
|
||||||
xor[j] ^= buf[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
remaining -= xorlen;
|
|
||||||
available = xorlen;
|
|
||||||
readPos = 0;
|
|
||||||
readPos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void readExact(InputStream in, byte[] bufs, int toRead)
|
|
||||||
throws IOException {
|
|
||||||
int tread = 0;
|
|
||||||
while (tread < toRead) {
|
|
||||||
int read = in.read(bufs, tread, toRead - tread);
|
|
||||||
if (read == -1) {
|
|
||||||
// If the stream ends, fill in zeros.
|
|
||||||
Arrays.fill(bufs, tread, toRead, (byte)0);
|
|
||||||
tread = toRead;
|
|
||||||
} else {
|
|
||||||
tread += read;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert tread == toRead;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Thrown when the config file for {@link RaidNode} is malformed.
|
|
||||||
*/
|
|
||||||
public class RaidConfigurationException extends Exception {
|
|
||||||
private static final long serialVersionUID = 4046516718965587999L;
|
|
||||||
|
|
||||||
public RaidConfigurationException(String message) {
|
|
||||||
super(message);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,259 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.conf.Configured;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
|
||||||
|
|
||||||
public class RaidFilter {
|
|
||||||
static class Statistics {
|
|
||||||
long numRaided = 0;
|
|
||||||
long numTooNew = 0;
|
|
||||||
long sizeTooNew = 0;
|
|
||||||
long numTooSmall = 0;
|
|
||||||
long sizeTooSmall = 0;
|
|
||||||
|
|
||||||
public void aggregate(Statistics other) {
|
|
||||||
this.numRaided += other.numRaided;
|
|
||||||
this.numTooNew += other.numTooNew;
|
|
||||||
this.sizeTooNew += other.sizeTooNew;
|
|
||||||
this.numTooSmall += other.numTooSmall;
|
|
||||||
this.sizeTooSmall += other.sizeTooSmall;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return "numRaided = " + numRaided +
|
|
||||||
", numTooNew = " + numTooNew +
|
|
||||||
", sizeTooNew = " + sizeTooNew +
|
|
||||||
", numTooSmall = " + numTooSmall +
|
|
||||||
", sizeTooSmall = " + sizeTooSmall;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static class TimeBasedFilter extends Configured
|
|
||||||
implements DirectoryTraversal.FileFilter {
|
|
||||||
int targetRepl;
|
|
||||||
Path raidDestPrefix;
|
|
||||||
long modTimePeriod;
|
|
||||||
long startTime;
|
|
||||||
Statistics stats = new Statistics();
|
|
||||||
String currentSrcPath = null;
|
|
||||||
long[] modTimePeriods = new long[0];
|
|
||||||
String[] otherSrcPaths = new String[0];
|
|
||||||
|
|
||||||
TimeBasedFilter(Configuration conf, Path destPrefix, int targetRepl,
|
|
||||||
long startTime, long modTimePeriod) {
|
|
||||||
super(conf);
|
|
||||||
this.raidDestPrefix = destPrefix;
|
|
||||||
this.targetRepl = targetRepl;
|
|
||||||
this.startTime = startTime;
|
|
||||||
this.modTimePeriod = modTimePeriod;
|
|
||||||
}
|
|
||||||
|
|
||||||
TimeBasedFilter(Configuration conf,
|
|
||||||
Path destPrefix, PolicyInfo info,
|
|
||||||
List<PolicyInfo> allPolicies, long startTime, Statistics stats) {
|
|
||||||
super(conf);
|
|
||||||
this.raidDestPrefix = destPrefix;
|
|
||||||
this.targetRepl = Integer.parseInt(info.getProperty("targetReplication"));
|
|
||||||
this.modTimePeriod = Long.parseLong(info.getProperty("modTimePeriod"));
|
|
||||||
this.startTime = startTime;
|
|
||||||
this.stats = stats;
|
|
||||||
this.currentSrcPath = info.getSrcPath().toUri().getPath();
|
|
||||||
initializeOtherPaths(allPolicies);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void initializeOtherPaths(List<PolicyInfo> allPolicies) {
|
|
||||||
ArrayList<PolicyInfo> tmp = new ArrayList<PolicyInfo>(allPolicies);
|
|
||||||
// Remove all policies where srcPath <= currentSrcPath or
|
|
||||||
// matchingPrefixLength is < length(currentSrcPath)
|
|
||||||
// The policies remaining are the only ones that could better
|
|
||||||
// select a file chosen by the current policy.
|
|
||||||
for (Iterator<PolicyInfo> it = tmp.iterator(); it.hasNext(); ) {
|
|
||||||
String src = it.next().getSrcPath().toUri().getPath();
|
|
||||||
if (src.compareTo(currentSrcPath) <= 0) {
|
|
||||||
it.remove();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int matchLen = matchingPrefixLength(src, currentSrcPath);
|
|
||||||
if (matchLen < currentSrcPath.length()) {
|
|
||||||
it.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Sort in reverse lexicographic order.
|
|
||||||
Collections.sort(tmp, new Comparator() {
|
|
||||||
public int compare(Object o1, Object o2) {
|
|
||||||
return 0 -
|
|
||||||
((PolicyInfo)o1).getSrcPath().toUri().getPath().compareTo(
|
|
||||||
((PolicyInfo)o1).getSrcPath().toUri().getPath());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
otherSrcPaths = new String[tmp.size()];
|
|
||||||
modTimePeriods = new long[otherSrcPaths.length];
|
|
||||||
for (int i = 0; i < otherSrcPaths.length; i++) {
|
|
||||||
otherSrcPaths[i] = tmp.get(i).getSrcPath().toUri().getPath();
|
|
||||||
modTimePeriods[i] = Long.parseLong(
|
|
||||||
tmp.get(i).getProperty("modTimePeriod"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean check(FileStatus f) throws IOException {
|
|
||||||
if (!canChooseForCurrentPolicy(f)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the source file has fewer than or equal to 2 blocks, then skip it.
|
|
||||||
long blockSize = f.getBlockSize();
|
|
||||||
if (2 * blockSize >= f.getLen()) {
|
|
||||||
stats.numTooSmall++;
|
|
||||||
stats.sizeTooSmall += f.getLen();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean select = false;
|
|
||||||
try {
|
|
||||||
Object ppair = RaidNode.getParityFile(
|
|
||||||
raidDestPrefix, f.getPath(), getConf());
|
|
||||||
// Is there is a valid parity file?
|
|
||||||
if (ppair != null) {
|
|
||||||
// Is the source at the target replication?
|
|
||||||
if (f.getReplication() != targetRepl) {
|
|
||||||
// Select the file so that its replication can be set.
|
|
||||||
select = true;
|
|
||||||
} else {
|
|
||||||
stats.numRaided++;
|
|
||||||
// Nothing to do, don't select the file.
|
|
||||||
select = false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// No parity file.
|
|
||||||
if (f.getModificationTime() + modTimePeriod < startTime) {
|
|
||||||
// If the file is not too new, choose it for raiding.
|
|
||||||
select = true;
|
|
||||||
} else {
|
|
||||||
select = false;
|
|
||||||
stats.numTooNew++;
|
|
||||||
stats.sizeTooNew += f.getLen();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (java.io.FileNotFoundException e) {
|
|
||||||
select = true; // destination file does not exist
|
|
||||||
} catch (java.io.IOException e) {
|
|
||||||
// If there is a problem with the har path, this will let us continue.
|
|
||||||
DirectoryTraversal.LOG.error(
|
|
||||||
"Error while selecting " + StringUtils.stringifyException(e));
|
|
||||||
}
|
|
||||||
return select;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a file can be chosen for the current policy.
|
|
||||||
*/
|
|
||||||
boolean canChooseForCurrentPolicy(FileStatus stat) {
|
|
||||||
boolean choose = true;
|
|
||||||
if (otherSrcPaths.length > 0) {
|
|
||||||
String fileStr = stat.getPath().toUri().getPath();
|
|
||||||
|
|
||||||
// For a given string, find the best matching srcPath.
|
|
||||||
int matchWithCurrent = matchingPrefixLength(fileStr, currentSrcPath);
|
|
||||||
for (int i = 0; i < otherSrcPaths.length; i++) {
|
|
||||||
// If the file is too new, move to the next.
|
|
||||||
if (stat.getModificationTime() > startTime - modTimePeriods[i]) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int matchLen = matchingPrefixLength(fileStr, otherSrcPaths[i]);
|
|
||||||
if (matchLen > 0 &&
|
|
||||||
fileStr.charAt(matchLen - 1) == Path.SEPARATOR_CHAR) {
|
|
||||||
matchLen--;
|
|
||||||
}
|
|
||||||
if (matchLen > matchWithCurrent) {
|
|
||||||
choose = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return choose;
|
|
||||||
}
|
|
||||||
|
|
||||||
int matchingPrefixLength(final String s1, final String s2) {
|
|
||||||
int len = 0;
|
|
||||||
for (int j = 0; j < s1.length() && j < s2.length(); j++) {
|
|
||||||
if (s1.charAt(j) == s2.charAt(j)) {
|
|
||||||
len++;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return len;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static class PreferenceFilter extends Configured
|
|
||||||
implements DirectoryTraversal.FileFilter {
|
|
||||||
Path firstChoicePrefix;
|
|
||||||
DirectoryTraversal.FileFilter secondChoiceFilter;
|
|
||||||
|
|
||||||
PreferenceFilter(Configuration conf,
|
|
||||||
Path firstChoicePrefix, Path secondChoicePrefix,
|
|
||||||
int targetRepl, long startTime, long modTimePeriod) {
|
|
||||||
super(conf);
|
|
||||||
this.firstChoicePrefix = firstChoicePrefix;
|
|
||||||
this.secondChoiceFilter = new TimeBasedFilter(conf,
|
|
||||||
secondChoicePrefix, targetRepl, startTime, modTimePeriod);
|
|
||||||
}
|
|
||||||
|
|
||||||
PreferenceFilter(Configuration conf,
|
|
||||||
Path firstChoicePrefix, Path secondChoicePrefix,
|
|
||||||
PolicyInfo info, List<PolicyInfo> allPolicies, long startTime,
|
|
||||||
Statistics stats) {
|
|
||||||
super(conf);
|
|
||||||
this.firstChoicePrefix = firstChoicePrefix;
|
|
||||||
this.secondChoiceFilter = new TimeBasedFilter(
|
|
||||||
conf, secondChoicePrefix, info, allPolicies, startTime, stats);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean check(FileStatus f) throws IOException {
|
|
||||||
Object firstChoicePPair =
|
|
||||||
RaidNode.getParityFile(firstChoicePrefix, f.getPath(), getConf());
|
|
||||||
if (firstChoicePPair == null) {
|
|
||||||
// The decision is upto the the second choice filter.
|
|
||||||
return secondChoiceFilter.check(f);
|
|
||||||
} else {
|
|
||||||
// There is already a parity file under the first choice path.
|
|
||||||
// We dont want to choose this file.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,682 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
import javax.security.auth.login.LoginException;
|
|
||||||
|
|
||||||
import org.apache.hadoop.ipc.*;
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.net.NetUtils;
|
|
||||||
import org.apache.hadoop.conf.Configured;
|
|
||||||
import org.apache.hadoop.util.Tool;
|
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.apache.hadoop.io.retry.RetryPolicy;
|
|
||||||
import org.apache.hadoop.io.retry.RetryPolicies;
|
|
||||||
import org.apache.hadoop.io.retry.RetryProxy;
|
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.BlockLocation;
|
|
||||||
import org.apache.hadoop.fs.HarFileSystem;
|
|
||||||
|
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
||||||
import org.apache.hadoop.hdfs.DistributedRaidFileSystem;
|
|
||||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
|
||||||
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyList;
|
|
||||||
import org.apache.hadoop.raid.protocol.RaidProtocol;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A {@link RaidShell} that allows browsing configured raid policies.
|
|
||||||
*/
|
|
||||||
public class RaidShell extends Configured implements Tool {
|
|
||||||
static {
|
|
||||||
Configuration.addDefaultResource("hdfs-default.xml");
|
|
||||||
Configuration.addDefaultResource("hdfs-site.xml");
|
|
||||||
}
|
|
||||||
public static final Log LOG = LogFactory.getLog( "org.apache.hadoop.RaidShell");
|
|
||||||
public RaidProtocol raidnode;
|
|
||||||
RaidProtocol rpcRaidnode;
|
|
||||||
private UserGroupInformation ugi;
|
|
||||||
volatile boolean clientRunning = true;
|
|
||||||
private Configuration conf;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Start RaidShell.
|
|
||||||
* <p>
|
|
||||||
* The RaidShell connects to the specified RaidNode and performs basic
|
|
||||||
* configuration options.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public RaidShell(Configuration conf) throws IOException {
|
|
||||||
super(conf);
|
|
||||||
this.conf = conf;
|
|
||||||
}
|
|
||||||
|
|
||||||
void initializeRpc(Configuration conf, InetSocketAddress address) throws IOException {
|
|
||||||
this.ugi = UserGroupInformation.getCurrentUser();
|
|
||||||
this.rpcRaidnode = createRPCRaidnode(address, conf, ugi);
|
|
||||||
this.raidnode = createRaidnode(rpcRaidnode);
|
|
||||||
}
|
|
||||||
|
|
||||||
void initializeLocal(Configuration conf) throws IOException {
|
|
||||||
this.ugi = UserGroupInformation.getCurrentUser();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static RaidProtocol createRaidnode(Configuration conf) throws IOException {
|
|
||||||
return createRaidnode(RaidNode.getAddress(conf), conf);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static RaidProtocol createRaidnode(InetSocketAddress raidNodeAddr,
|
|
||||||
Configuration conf) throws IOException {
|
|
||||||
return createRaidnode(createRPCRaidnode(raidNodeAddr, conf,
|
|
||||||
UserGroupInformation.getCurrentUser()));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static RaidProtocol createRPCRaidnode(InetSocketAddress raidNodeAddr,
|
|
||||||
Configuration conf, UserGroupInformation ugi)
|
|
||||||
throws IOException {
|
|
||||||
LOG.debug("RaidShell connecting to " + raidNodeAddr);
|
|
||||||
return (RaidProtocol)RPC.getProxy(RaidProtocol.class,
|
|
||||||
RaidProtocol.versionID, raidNodeAddr, ugi, conf,
|
|
||||||
NetUtils.getSocketFactory(conf, RaidProtocol.class));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static RaidProtocol createRaidnode(RaidProtocol rpcRaidnode)
|
|
||||||
throws IOException {
|
|
||||||
RetryPolicy createPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
|
|
||||||
5, 5000, TimeUnit.MILLISECONDS);
|
|
||||||
|
|
||||||
Map<Class<? extends Exception>,RetryPolicy> remoteExceptionToPolicyMap =
|
|
||||||
new HashMap<Class<? extends Exception>, RetryPolicy>();
|
|
||||||
|
|
||||||
Map<Class<? extends Exception>,RetryPolicy> exceptionToPolicyMap =
|
|
||||||
new HashMap<Class<? extends Exception>, RetryPolicy>();
|
|
||||||
exceptionToPolicyMap.put(RemoteException.class,
|
|
||||||
RetryPolicies.retryByRemoteException(
|
|
||||||
RetryPolicies.TRY_ONCE_THEN_FAIL, remoteExceptionToPolicyMap));
|
|
||||||
RetryPolicy methodPolicy = RetryPolicies.retryByException(
|
|
||||||
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
|
|
||||||
Map<String,RetryPolicy> methodNameToPolicyMap = new HashMap<String,RetryPolicy>();
|
|
||||||
|
|
||||||
methodNameToPolicyMap.put("create", methodPolicy);
|
|
||||||
|
|
||||||
return (RaidProtocol) RetryProxy.create(RaidProtocol.class,
|
|
||||||
rpcRaidnode, methodNameToPolicyMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkOpen() throws IOException {
|
|
||||||
if (!clientRunning) {
|
|
||||||
IOException result = new IOException("RaidNode closed");
|
|
||||||
throw result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Close the connection to the raidNode.
|
|
||||||
*/
|
|
||||||
public synchronized void close() throws IOException {
|
|
||||||
if(clientRunning) {
|
|
||||||
clientRunning = false;
|
|
||||||
RPC.stopProxy(rpcRaidnode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Displays format of commands.
|
|
||||||
*/
|
|
||||||
private static void printUsage(String cmd) {
|
|
||||||
String prefix = "Usage: java " + RaidShell.class.getSimpleName();
|
|
||||||
if ("-showConfig".equals(cmd)) {
|
|
||||||
System.err.println("Usage: java RaidShell" +
|
|
||||||
" [-showConfig]");
|
|
||||||
} else if ("-recover".equals(cmd)) {
|
|
||||||
System.err.println("Usage: java RaidShell" +
|
|
||||||
" [-recover srcPath1 corruptOffset]");
|
|
||||||
} else if ("-recoverBlocks".equals(cmd)) {
|
|
||||||
System.err.println("Usage: java RaidShell" +
|
|
||||||
" [-recoverBlocks path1 path2...]");
|
|
||||||
} else {
|
|
||||||
System.err.println("Usage: java RaidShell");
|
|
||||||
System.err.println(" [-showConfig ]");
|
|
||||||
System.err.println(" [-help [cmd]]");
|
|
||||||
System.err.println(" [-recover srcPath1 corruptOffset]");
|
|
||||||
System.err.println(" [-recoverBlocks path1 path2...]");
|
|
||||||
System.err.println(" [-fsck [path]]");
|
|
||||||
System.err.println();
|
|
||||||
ToolRunner.printGenericCommandUsage(System.err);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* run
|
|
||||||
*/
|
|
||||||
public int run(String argv[]) throws Exception {
|
|
||||||
|
|
||||||
if (argv.length < 1) {
|
|
||||||
printUsage("");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int exitCode = -1;
|
|
||||||
int i = 0;
|
|
||||||
String cmd = argv[i++];
|
|
||||||
//
|
|
||||||
// verify that we have enough command line parameters
|
|
||||||
//
|
|
||||||
if ("-showConfig".equals(cmd)) {
|
|
||||||
if (argv.length < 1) {
|
|
||||||
printUsage(cmd);
|
|
||||||
return exitCode;
|
|
||||||
}
|
|
||||||
} else if ("-recover".equals(cmd)) {
|
|
||||||
if (argv.length < 3) {
|
|
||||||
printUsage(cmd);
|
|
||||||
return exitCode;
|
|
||||||
}
|
|
||||||
} else if ("-fsck".equals(cmd)) {
|
|
||||||
if ((argv.length < 1) || (argv.length > 2)) {
|
|
||||||
printUsage(cmd);
|
|
||||||
return exitCode;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if ("-showConfig".equals(cmd)) {
|
|
||||||
initializeRpc(conf, RaidNode.getAddress(conf));
|
|
||||||
exitCode = showConfig(cmd, argv, i);
|
|
||||||
} else if ("-recover".equals(cmd)) {
|
|
||||||
initializeRpc(conf, RaidNode.getAddress(conf));
|
|
||||||
exitCode = recoverAndPrint(cmd, argv, i);
|
|
||||||
} else if ("-recoverBlocks".equals(cmd)) {
|
|
||||||
initializeLocal(conf);
|
|
||||||
recoverBlocks(argv, i);
|
|
||||||
exitCode = 0;
|
|
||||||
} else if ("-fsck".equals(cmd)) {
|
|
||||||
if (argv.length == 1) {
|
|
||||||
// if there are no args, check the whole file system
|
|
||||||
exitCode = fsck("/");
|
|
||||||
} else {
|
|
||||||
// argv.length == 2
|
|
||||||
// otherwise, check the path passed
|
|
||||||
exitCode = fsck(argv[1]);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
exitCode = -1;
|
|
||||||
System.err.println(cmd.substring(1) + ": Unknown command");
|
|
||||||
printUsage("");
|
|
||||||
}
|
|
||||||
} catch (IllegalArgumentException arge) {
|
|
||||||
exitCode = -1;
|
|
||||||
System.err.println(cmd.substring(1) + ": " + arge.getLocalizedMessage());
|
|
||||||
printUsage(cmd);
|
|
||||||
} catch (RemoteException e) {
|
|
||||||
//
|
|
||||||
// This is a error returned by raidnode server. Print
|
|
||||||
// out the first line of the error mesage, ignore the stack trace.
|
|
||||||
exitCode = -1;
|
|
||||||
try {
|
|
||||||
String[] content;
|
|
||||||
content = e.getLocalizedMessage().split("\n");
|
|
||||||
System.err.println(cmd.substring(1) + ": " +
|
|
||||||
content[0]);
|
|
||||||
} catch (Exception ex) {
|
|
||||||
System.err.println(cmd.substring(1) + ": " +
|
|
||||||
ex.getLocalizedMessage());
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
//
|
|
||||||
// IO exception encountered locally.
|
|
||||||
//
|
|
||||||
exitCode = -1;
|
|
||||||
System.err.println(cmd.substring(1) + ": " +
|
|
||||||
e.getLocalizedMessage());
|
|
||||||
} catch (Exception re) {
|
|
||||||
exitCode = -1;
|
|
||||||
System.err.println(cmd.substring(1) + ": " + re.getLocalizedMessage());
|
|
||||||
} finally {
|
|
||||||
}
|
|
||||||
return exitCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Apply operation specified by 'cmd' on all parameters
|
|
||||||
* starting from argv[startindex].
|
|
||||||
*/
|
|
||||||
private int showConfig(String cmd, String argv[], int startindex) throws IOException {
|
|
||||||
int exitCode = 0;
|
|
||||||
int i = startindex;
|
|
||||||
PolicyList[] all = raidnode.getAllPolicies();
|
|
||||||
for (PolicyList list: all) {
|
|
||||||
for (PolicyInfo p : list.getAll()) {
|
|
||||||
System.out.println(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return exitCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recovers the specified path from the parity file
|
|
||||||
*/
|
|
||||||
public Path[] recover(String cmd, String argv[], int startindex)
|
|
||||||
throws IOException {
|
|
||||||
Path[] paths = new Path[(argv.length - startindex) / 2];
|
|
||||||
int j = 0;
|
|
||||||
for (int i = startindex; i < argv.length; i = i + 2) {
|
|
||||||
String path = argv[i];
|
|
||||||
long corruptOffset = Long.parseLong(argv[i+1]);
|
|
||||||
LOG.info("RaidShell recoverFile for " + path + " corruptOffset " + corruptOffset);
|
|
||||||
Path recovered = new Path("/tmp/recovered." + Time.now());
|
|
||||||
FileSystem fs = recovered.getFileSystem(conf);
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)fs;
|
|
||||||
Configuration raidConf = new Configuration(conf);
|
|
||||||
raidConf.set("fs.hdfs.impl",
|
|
||||||
"org.apache.hadoop.hdfs.DistributedRaidFileSystem");
|
|
||||||
raidConf.set("fs.raid.underlyingfs.impl",
|
|
||||||
"org.apache.hadoop.hdfs.DistributedFileSystem");
|
|
||||||
raidConf.setBoolean("fs.hdfs.impl.disable.cache", true);
|
|
||||||
java.net.URI dfsUri = dfs.getUri();
|
|
||||||
FileSystem raidFs = FileSystem.get(dfsUri, raidConf);
|
|
||||||
FileUtil.copy(raidFs, new Path(path), fs, recovered, false, conf);
|
|
||||||
|
|
||||||
paths[j] = recovered;
|
|
||||||
LOG.info("Raidshell created recovery file " + paths[j]);
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
return paths;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int recoverAndPrint(String cmd, String argv[], int startindex)
|
|
||||||
throws IOException {
|
|
||||||
int exitCode = 0;
|
|
||||||
for (Path p : recover(cmd,argv,startindex)) {
|
|
||||||
System.out.println(p);
|
|
||||||
}
|
|
||||||
return exitCode;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void recoverBlocks(String[] args, int startIndex)
|
|
||||||
throws IOException {
|
|
||||||
LOG.debug("Recovering blocks for " + (args.length - startIndex) + " files");
|
|
||||||
BlockFixer.BlockFixerHelper fixer = new BlockFixer.BlockFixerHelper(conf);
|
|
||||||
for (int i = startIndex; i < args.length; i++) {
|
|
||||||
String path = args[i];
|
|
||||||
fixer.fixFile(new Path(path));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks whether a file has more than the allowable number of
|
|
||||||
* corrupt blocks and must therefore be considered corrupt
|
|
||||||
*/
|
|
||||||
private boolean isFileCorrupt(final DistributedFileSystem dfs,
|
|
||||||
final Path filePath)
|
|
||||||
throws IOException {
|
|
||||||
// corruptBlocksPerStripe:
|
|
||||||
// map stripe # -> # of corrupt blocks in that stripe (data + parity)
|
|
||||||
HashMap<Integer, Integer> corruptBlocksPerStripe =
|
|
||||||
new LinkedHashMap<Integer, Integer>();
|
|
||||||
|
|
||||||
// read conf
|
|
||||||
final int stripeBlocks = RaidNode.getStripeLength(conf);
|
|
||||||
|
|
||||||
// figure out which blocks are missing/corrupted
|
|
||||||
final FileStatus fileStatus = dfs.getFileStatus(filePath);
|
|
||||||
final long blockSize = fileStatus.getBlockSize();
|
|
||||||
final long fileLength = fileStatus.getLen();
|
|
||||||
final long fileLengthInBlocks = (fileLength / blockSize) +
|
|
||||||
(((fileLength % blockSize) == 0) ? 0L : 1L);
|
|
||||||
final long fileStripes = (fileLengthInBlocks / stripeBlocks) +
|
|
||||||
(((fileLengthInBlocks % stripeBlocks) == 0) ? 0L : 1L);
|
|
||||||
final BlockLocation[] fileBlocks =
|
|
||||||
dfs.getFileBlockLocations(fileStatus, 0, fileLength);
|
|
||||||
|
|
||||||
// figure out which stripes these corrupted blocks belong to
|
|
||||||
for (BlockLocation fileBlock: fileBlocks) {
|
|
||||||
int blockNo = (int) (fileBlock.getOffset() / blockSize);
|
|
||||||
final int stripe = (int) (blockNo / stripeBlocks);
|
|
||||||
if (fileBlock.isCorrupt() ||
|
|
||||||
(fileBlock.getNames().length == 0 && fileBlock.getLength() > 0)) {
|
|
||||||
if (corruptBlocksPerStripe.get(stripe) == null) {
|
|
||||||
corruptBlocksPerStripe.put(stripe, 1);
|
|
||||||
} else {
|
|
||||||
corruptBlocksPerStripe.put(stripe, corruptBlocksPerStripe.
|
|
||||||
get(stripe) + 1);
|
|
||||||
}
|
|
||||||
LOG.debug("file " + filePath.toString() + " corrupt in block " +
|
|
||||||
blockNo + "/" + fileLengthInBlocks + ", stripe " + stripe +
|
|
||||||
"/" + fileStripes);
|
|
||||||
} else {
|
|
||||||
LOG.debug("file " + filePath.toString() + " OK in block " + blockNo +
|
|
||||||
"/" + fileLengthInBlocks + ", stripe " + stripe + "/" +
|
|
||||||
fileStripes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
RaidInfo raidInfo = getFileRaidInfo(dfs, filePath);
|
|
||||||
|
|
||||||
// now check parity blocks
|
|
||||||
if (raidInfo.raidType != RaidType.NONE) {
|
|
||||||
checkParityBlocks(filePath, corruptBlocksPerStripe, blockSize,
|
|
||||||
fileStripes, raidInfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
final int maxCorruptBlocksPerStripe = raidInfo.parityBlocksPerStripe;
|
|
||||||
|
|
||||||
for (int corruptBlocksInStripe: corruptBlocksPerStripe.values()) {
|
|
||||||
if (corruptBlocksInStripe > maxCorruptBlocksPerStripe) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* holds the type of raid used for a particular file
|
|
||||||
*/
|
|
||||||
private enum RaidType {
|
|
||||||
XOR,
|
|
||||||
RS,
|
|
||||||
NONE
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* holds raid type and parity file pair
|
|
||||||
*/
|
|
||||||
private class RaidInfo {
|
|
||||||
public RaidInfo(final RaidType raidType,
|
|
||||||
final RaidNode.ParityFilePair parityPair,
|
|
||||||
final int parityBlocksPerStripe) {
|
|
||||||
this.raidType = raidType;
|
|
||||||
this.parityPair = parityPair;
|
|
||||||
this.parityBlocksPerStripe = parityBlocksPerStripe;
|
|
||||||
}
|
|
||||||
public final RaidType raidType;
|
|
||||||
public final RaidNode.ParityFilePair parityPair;
|
|
||||||
public final int parityBlocksPerStripe;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* returns the raid for a given file
|
|
||||||
*/
|
|
||||||
private RaidInfo getFileRaidInfo(final DistributedFileSystem dfs,
|
|
||||||
final Path filePath)
|
|
||||||
throws IOException {
|
|
||||||
// now look for the parity file
|
|
||||||
Path destPath = null;
|
|
||||||
RaidNode.ParityFilePair ppair = null;
|
|
||||||
try {
|
|
||||||
// look for xor parity file first
|
|
||||||
destPath = RaidNode.xorDestinationPath(conf);
|
|
||||||
ppair = RaidNode.getParityFile(destPath, filePath, conf);
|
|
||||||
} catch (FileNotFoundException ignore) {
|
|
||||||
}
|
|
||||||
if (ppair != null) {
|
|
||||||
return new RaidInfo(RaidType.XOR, ppair, 1);
|
|
||||||
} else {
|
|
||||||
// failing that, look for rs parity file
|
|
||||||
try {
|
|
||||||
destPath = RaidNode.rsDestinationPath(conf);
|
|
||||||
ppair = RaidNode.getParityFile(destPath, filePath, conf);
|
|
||||||
} catch (FileNotFoundException ignore) {
|
|
||||||
}
|
|
||||||
if (ppair != null) {
|
|
||||||
return new RaidInfo(RaidType.RS, ppair, RaidNode.rsParityLength(conf));
|
|
||||||
} else {
|
|
||||||
return new RaidInfo(RaidType.NONE, null, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* gets the parity blocks corresponding to file
|
|
||||||
* returns the parity blocks in case of DFS
|
|
||||||
* and the part blocks containing parity blocks
|
|
||||||
* in case of HAR FS
|
|
||||||
*/
|
|
||||||
private BlockLocation[] getParityBlocks(final Path filePath,
|
|
||||||
final long blockSize,
|
|
||||||
final long fileStripes,
|
|
||||||
final RaidInfo raidInfo)
|
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
|
|
||||||
final String parityPathStr = raidInfo.parityPair.getPath().toUri().
|
|
||||||
getPath();
|
|
||||||
FileSystem parityFS = raidInfo.parityPair.getFileSystem();
|
|
||||||
|
|
||||||
// get parity file metadata
|
|
||||||
FileStatus parityFileStatus = parityFS.
|
|
||||||
getFileStatus(new Path(parityPathStr));
|
|
||||||
long parityFileLength = parityFileStatus.getLen();
|
|
||||||
|
|
||||||
if (parityFileLength != fileStripes * raidInfo.parityBlocksPerStripe *
|
|
||||||
blockSize) {
|
|
||||||
throw new IOException("expected parity file of length" +
|
|
||||||
(fileStripes * raidInfo.parityBlocksPerStripe *
|
|
||||||
blockSize) +
|
|
||||||
" but got parity file of length " +
|
|
||||||
parityFileLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
BlockLocation[] parityBlocks =
|
|
||||||
parityFS.getFileBlockLocations(parityFileStatus, 0L, parityFileLength);
|
|
||||||
|
|
||||||
if (parityFS instanceof DistributedFileSystem ||
|
|
||||||
parityFS instanceof DistributedRaidFileSystem) {
|
|
||||||
long parityBlockSize = parityFileStatus.getBlockSize();
|
|
||||||
if (parityBlockSize != blockSize) {
|
|
||||||
throw new IOException("file block size is " + blockSize +
|
|
||||||
" but parity file block size is " +
|
|
||||||
parityBlockSize);
|
|
||||||
}
|
|
||||||
} else if (parityFS instanceof HarFileSystem) {
|
|
||||||
LOG.debug("HAR FS found");
|
|
||||||
} else {
|
|
||||||
LOG.warn("parity file system is not of a supported type");
|
|
||||||
}
|
|
||||||
|
|
||||||
return parityBlocks;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks the parity blocks for a given file and modifies
|
|
||||||
* corruptBlocksPerStripe accordingly
|
|
||||||
*/
|
|
||||||
private void checkParityBlocks(final Path filePath,
|
|
||||||
final HashMap<Integer, Integer>
|
|
||||||
corruptBlocksPerStripe,
|
|
||||||
final long blockSize,
|
|
||||||
final long fileStripes,
|
|
||||||
final RaidInfo raidInfo)
|
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
// get the blocks of the parity file
|
|
||||||
// because of har, multiple blocks may be returned as one container block
|
|
||||||
BlockLocation[] containerBlocks = getParityBlocks(filePath, blockSize,
|
|
||||||
fileStripes, raidInfo);
|
|
||||||
|
|
||||||
long parityStripeLength = blockSize *
|
|
||||||
((long) raidInfo.parityBlocksPerStripe);
|
|
||||||
|
|
||||||
long parityFileLength = parityStripeLength * fileStripes;
|
|
||||||
|
|
||||||
long parityBlocksFound = 0L;
|
|
||||||
|
|
||||||
for (BlockLocation cb: containerBlocks) {
|
|
||||||
if (cb.getLength() % blockSize != 0) {
|
|
||||||
throw new IOException("container block size is not " +
|
|
||||||
"multiple of parity block size");
|
|
||||||
}
|
|
||||||
int blocksInContainer = (int) (cb.getLength() / blockSize);
|
|
||||||
LOG.debug("found container with offset " + cb.getOffset() +
|
|
||||||
", length " + cb.getLength());
|
|
||||||
|
|
||||||
for (long offset = cb.getOffset();
|
|
||||||
offset < cb.getOffset() + cb.getLength();
|
|
||||||
offset += blockSize) {
|
|
||||||
long block = offset / blockSize;
|
|
||||||
|
|
||||||
int stripe = (int) (offset / parityStripeLength);
|
|
||||||
|
|
||||||
if (stripe < 0) {
|
|
||||||
// before the beginning of the parity file
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (stripe >= fileStripes) {
|
|
||||||
// past the end of the parity file
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
parityBlocksFound++;
|
|
||||||
|
|
||||||
if (cb.isCorrupt() ||
|
|
||||||
(cb.getNames().length == 0 && cb.getLength() > 0)) {
|
|
||||||
LOG.debug("parity file for " + filePath.toString() +
|
|
||||||
" corrupt in block " + block +
|
|
||||||
", stripe " + stripe + "/" + fileStripes);
|
|
||||||
|
|
||||||
if (corruptBlocksPerStripe.get(stripe) == null) {
|
|
||||||
corruptBlocksPerStripe.put(stripe, 1);
|
|
||||||
} else {
|
|
||||||
corruptBlocksPerStripe.put(stripe,
|
|
||||||
corruptBlocksPerStripe.get(stripe) +
|
|
||||||
1);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG.debug("parity file for " + filePath.toString() +
|
|
||||||
" OK in block " + block +
|
|
||||||
", stripe " + stripe + "/" + fileStripes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
long parityBlocksExpected = raidInfo.parityBlocksPerStripe * fileStripes;
|
|
||||||
if (parityBlocksFound != parityBlocksExpected ) {
|
|
||||||
throw new IOException("expected " + parityBlocksExpected +
|
|
||||||
" parity blocks but got " + parityBlocksFound);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks the raided file system, prints a list of corrupt files to
|
|
||||||
* System.out and returns the number of corrupt files
|
|
||||||
*/
|
|
||||||
public int fsck(final String path) throws IOException {
|
|
||||||
|
|
||||||
FileSystem fs = (new Path(path)).getFileSystem(conf);
|
|
||||||
|
|
||||||
// if we got a raid fs, get the underlying fs
|
|
||||||
if (fs instanceof DistributedRaidFileSystem) {
|
|
||||||
fs = ((DistributedRaidFileSystem) fs).getFileSystem();
|
|
||||||
}
|
|
||||||
|
|
||||||
// check that we have a distributed fs
|
|
||||||
if (!(fs instanceof DistributedFileSystem)) {
|
|
||||||
throw new IOException("expected DistributedFileSystem but got " +
|
|
||||||
fs.getClass().getName());
|
|
||||||
}
|
|
||||||
final DistributedFileSystem dfs = (DistributedFileSystem) fs;
|
|
||||||
|
|
||||||
// get conf settings
|
|
||||||
String xorPrefix = RaidNode.xorDestinationPath(conf).toUri().getPath();
|
|
||||||
String rsPrefix = RaidNode.rsDestinationPath(conf).toUri().getPath();
|
|
||||||
if (!xorPrefix.endsWith("/")) {
|
|
||||||
xorPrefix = xorPrefix + "/";
|
|
||||||
}
|
|
||||||
if (!rsPrefix.endsWith("/")) {
|
|
||||||
rsPrefix = rsPrefix + "/";
|
|
||||||
}
|
|
||||||
LOG.debug("prefixes: " + xorPrefix + ", " + rsPrefix);
|
|
||||||
|
|
||||||
// get a list of corrupted files (not considering parity blocks just yet)
|
|
||||||
// from the name node
|
|
||||||
// these are the only files we need to consider:
|
|
||||||
// if a file has no corrupted data blocks, it is OK even if some
|
|
||||||
// of its parity blocks are corrupted, so no further checking is
|
|
||||||
// necessary
|
|
||||||
final String[] files = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
final List<Path> corruptFileCandidates = new LinkedList<Path>();
|
|
||||||
for (final String f: files) {
|
|
||||||
final Path p = new Path(f);
|
|
||||||
// if this file is a parity file
|
|
||||||
// or if it does not start with the specified path,
|
|
||||||
// ignore it
|
|
||||||
if (!p.toString().startsWith(xorPrefix) &&
|
|
||||||
!p.toString().startsWith(rsPrefix) &&
|
|
||||||
p.toString().startsWith(path)) {
|
|
||||||
corruptFileCandidates.add(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// filter files marked for deletion
|
|
||||||
RaidUtils.filterTrash(conf, corruptFileCandidates);
|
|
||||||
|
|
||||||
int numberOfCorruptFiles = 0;
|
|
||||||
|
|
||||||
for (final Path corruptFileCandidate: corruptFileCandidates) {
|
|
||||||
if (isFileCorrupt(dfs, corruptFileCandidate)) {
|
|
||||||
System.out.println(corruptFileCandidate.toString());
|
|
||||||
numberOfCorruptFiles++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return numberOfCorruptFiles;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* main() has some simple utility methods
|
|
||||||
*/
|
|
||||||
public static void main(String argv[]) throws Exception {
|
|
||||||
RaidShell shell = null;
|
|
||||||
try {
|
|
||||||
shell = new RaidShell(new Configuration());
|
|
||||||
int res = ToolRunner.run(shell, argv);
|
|
||||||
System.exit(res);
|
|
||||||
} catch (RPC.VersionMismatch v) {
|
|
||||||
System.err.println("Version Mismatch between client and server" +
|
|
||||||
"... command aborted.");
|
|
||||||
System.exit(-1);
|
|
||||||
} catch (IOException e) {
|
|
||||||
System.err.
|
|
||||||
println("Bad connection to RaidNode or NameNode. command aborted.");
|
|
||||||
System.err.println(e.getMessage());
|
|
||||||
System.exit(-1);
|
|
||||||
} finally {
|
|
||||||
shell.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,171 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.PositionedReadable;
|
|
||||||
import org.apache.hadoop.fs.Seekable;
|
|
||||||
import org.apache.hadoop.io.IOUtils;
|
|
||||||
import org.apache.hadoop.util.Progressable;
|
|
||||||
|
|
||||||
public class RaidUtils {
|
|
||||||
/**
|
|
||||||
* A {@link Progressable} that does nothing.
|
|
||||||
*
|
|
||||||
* We could have used Reporter.NULL here but that would introduce
|
|
||||||
* a dependency on mapreduce.
|
|
||||||
*/
|
|
||||||
public static class DummyProgressable implements Progressable {
|
|
||||||
/**
|
|
||||||
* Do nothing.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void progress() {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes files matching the trash file pattern.
|
|
||||||
*/
|
|
||||||
public static void filterTrash(Configuration conf, List<Path> files) {
|
|
||||||
// Remove files under Trash.
|
|
||||||
String trashPattern = conf.get("raid.blockfixer.trash.pattern",
|
|
||||||
"^/user/.*/\\.Trash.*");
|
|
||||||
for (Iterator<Path> it = files.iterator(); it.hasNext(); ) {
|
|
||||||
String pathStr = it.next().toString();
|
|
||||||
if (Pattern.matches(trashPattern, pathStr)) {
|
|
||||||
it.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void readTillEnd(InputStream in, byte[] buf, boolean eofOK)
|
|
||||||
throws IOException {
|
|
||||||
int toRead = buf.length;
|
|
||||||
int numRead = 0;
|
|
||||||
while (numRead < toRead) {
|
|
||||||
int nread = in.read(buf, numRead, toRead - numRead);
|
|
||||||
if (nread < 0) {
|
|
||||||
if (eofOK) {
|
|
||||||
// EOF hit, fill with zeros
|
|
||||||
Arrays.fill(buf, numRead, toRead, (byte)0);
|
|
||||||
numRead = toRead;
|
|
||||||
} else {
|
|
||||||
// EOF hit, throw.
|
|
||||||
throw new IOException("Premature EOF");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
numRead += nread;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void copyBytes(
|
|
||||||
InputStream in, OutputStream out, byte[] buf, long count)
|
|
||||||
throws IOException {
|
|
||||||
for (long bytesRead = 0; bytesRead < count; ) {
|
|
||||||
int toRead = Math.min(buf.length, (int)(count - bytesRead));
|
|
||||||
IOUtils.readFully(in, buf, 0, toRead);
|
|
||||||
bytesRead += toRead;
|
|
||||||
out.write(buf, 0, toRead);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class ZeroInputStream extends InputStream
|
|
||||||
implements Seekable, PositionedReadable {
|
|
||||||
private long endOffset;
|
|
||||||
private long pos;
|
|
||||||
|
|
||||||
public ZeroInputStream(long endOffset) {
|
|
||||||
this.endOffset = endOffset;
|
|
||||||
this.pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int read() throws IOException {
|
|
||||||
if (pos < endOffset) {
|
|
||||||
pos++;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int available() throws IOException {
|
|
||||||
return (int)(endOffset - pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getPos() throws IOException {
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void seek(long seekOffset) throws IOException {
|
|
||||||
if (seekOffset < endOffset) {
|
|
||||||
pos = seekOffset;
|
|
||||||
} else {
|
|
||||||
throw new IOException("Illegal Offset" + pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean seekToNewSource(long targetPos) throws IOException {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int read(long position, byte[] buffer, int offset, int length)
|
|
||||||
throws IOException {
|
|
||||||
int count = 0;
|
|
||||||
for (; position < endOffset && count < length; position++) {
|
|
||||||
buffer[offset + count] = 0;
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void readFully(long position, byte[] buffer, int offset, int length)
|
|
||||||
throws IOException {
|
|
||||||
int count = 0;
|
|
||||||
for (; position < endOffset && count < length; position++) {
|
|
||||||
buffer[offset + count] = 0;
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
if (count < length) {
|
|
||||||
throw new IOException("Premature EOF");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void readFully(long position, byte[] buffer) throws IOException {
|
|
||||||
readFully(position, buffer, 0, buffer.length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,183 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
|
|
||||||
public class ReedSolomonCode implements ErasureCode {
|
|
||||||
|
|
||||||
private final int stripeSize;
|
|
||||||
private final int paritySize;
|
|
||||||
private final int[] generatingPolynomial;
|
|
||||||
private final int PRIMITIVE_ROOT = 2;
|
|
||||||
private final int[] primitivePower;
|
|
||||||
private final GaloisField GF = GaloisField.getInstance();
|
|
||||||
private int[] errSignature;
|
|
||||||
private final int[] paritySymbolLocations;
|
|
||||||
private final int[] dataBuff;
|
|
||||||
|
|
||||||
public ReedSolomonCode(int stripeSize, int paritySize) {
|
|
||||||
assert(stripeSize + paritySize < GF.getFieldSize());
|
|
||||||
this.stripeSize = stripeSize;
|
|
||||||
this.paritySize = paritySize;
|
|
||||||
this.errSignature = new int[paritySize];
|
|
||||||
this.paritySymbolLocations = new int[paritySize];
|
|
||||||
this.dataBuff = new int[paritySize + stripeSize];
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
paritySymbolLocations[i] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
this.primitivePower = new int[stripeSize + paritySize];
|
|
||||||
// compute powers of the primitive root
|
|
||||||
for (int i = 0; i < stripeSize + paritySize; i++) {
|
|
||||||
primitivePower[i] = GF.power(PRIMITIVE_ROOT, i);
|
|
||||||
}
|
|
||||||
// compute generating polynomial
|
|
||||||
int[] gen = {1};
|
|
||||||
int[] poly = new int[2];
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
poly[0] = primitivePower[i];
|
|
||||||
poly[1] = 1;
|
|
||||||
gen = GF.multiply(gen, poly);
|
|
||||||
}
|
|
||||||
// generating polynomial has all generating roots
|
|
||||||
generatingPolynomial = gen;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void encode(int[] message, int[] parity) {
|
|
||||||
assert(message.length == stripeSize && parity.length == paritySize);
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
dataBuff[i] = 0;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < stripeSize; i++) {
|
|
||||||
dataBuff[i + paritySize] = message[i];
|
|
||||||
}
|
|
||||||
GF.remainder(dataBuff, generatingPolynomial);
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
parity[i] = dataBuff[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void decode(int[] data, int[] erasedLocation, int[] erasedValue) {
|
|
||||||
if (erasedLocation.length == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
assert(erasedLocation.length == erasedValue.length);
|
|
||||||
for (int i = 0; i < erasedLocation.length; i++) {
|
|
||||||
data[erasedLocation[i]] = 0;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < erasedLocation.length; i++) {
|
|
||||||
errSignature[i] = primitivePower[erasedLocation[i]];
|
|
||||||
erasedValue[i] = GF.substitute(data, primitivePower[i]);
|
|
||||||
}
|
|
||||||
GF.solveVandermondeSystem(errSignature, erasedValue, erasedLocation.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int stripeSize() {
|
|
||||||
return this.stripeSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int paritySize() {
|
|
||||||
return this.paritySize;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int symbolSize() {
|
|
||||||
return (int) Math.round(Math.log(GF.getFieldSize()) / Math.log(2));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Given parity symbols followed by message symbols, return the locations of
|
|
||||||
* symbols that are corrupted. Can resolve up to (parity length / 2) error
|
|
||||||
* locations.
|
|
||||||
* @param data The message and parity. The parity should be placed in the
|
|
||||||
* first part of the array. In each integer, the relevant portion
|
|
||||||
* is present in the least significant bits of each int.
|
|
||||||
* The number of elements in data is stripeSize() + paritySize().
|
|
||||||
* <b>Note that data may be changed after calling this method.</b>
|
|
||||||
* @param errorLocations The set to put the error location results
|
|
||||||
* @return true If the locations can be resolved, return true.
|
|
||||||
*/
|
|
||||||
public boolean computeErrorLocations(int[] data,
|
|
||||||
Set<Integer> errorLocations) {
|
|
||||||
assert(data.length == paritySize + stripeSize && errorLocations != null);
|
|
||||||
errorLocations.clear();
|
|
||||||
int maxError = paritySize / 2;
|
|
||||||
int[][] syndromeMatrix = new int[maxError][];
|
|
||||||
for (int i = 0; i < syndromeMatrix.length; ++i) {
|
|
||||||
syndromeMatrix[i] = new int[maxError + 1];
|
|
||||||
}
|
|
||||||
int[] syndrome = new int[paritySize];
|
|
||||||
|
|
||||||
if (computeSyndrome(data, syndrome)) {
|
|
||||||
// Parity check OK. No error location added.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < maxError; ++i) {
|
|
||||||
for (int j = 0; j < maxError + 1; ++j) {
|
|
||||||
syndromeMatrix[i][j] = syndrome[i + j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
GF.gaussianElimination(syndromeMatrix);
|
|
||||||
int[] polynomial = new int[maxError + 1];
|
|
||||||
polynomial[0] = 1;
|
|
||||||
for (int i = 0; i < maxError; ++i) {
|
|
||||||
polynomial[i + 1] = syndromeMatrix[maxError - 1 - i][maxError];
|
|
||||||
}
|
|
||||||
for (int i = 0; i < paritySize + stripeSize; ++i) {
|
|
||||||
int possibleRoot = GF.divide(1, primitivePower[i]);
|
|
||||||
if (GF.substitute(polynomial, possibleRoot) == 0) {
|
|
||||||
errorLocations.add(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Now recover with error locations and check the syndrome again
|
|
||||||
int[] locations = new int[errorLocations.size()];
|
|
||||||
int k = 0;
|
|
||||||
for (int loc : errorLocations) {
|
|
||||||
locations[k++] = loc;
|
|
||||||
}
|
|
||||||
int [] erasedValue = new int[locations.length];
|
|
||||||
decode(data, locations, erasedValue);
|
|
||||||
for (int i = 0; i < locations.length; ++i) {
|
|
||||||
data[locations[i]] = erasedValue[i];
|
|
||||||
}
|
|
||||||
return computeSyndrome(data, syndrome);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the syndrome of the input [parity, message]
|
|
||||||
* @param data [parity, message]
|
|
||||||
* @param syndrome The syndromes (checksums) of the data
|
|
||||||
* @return true If syndromes are all zeros
|
|
||||||
*/
|
|
||||||
private boolean computeSyndrome(int[] data, int [] syndrome) {
|
|
||||||
boolean corruptionFound = false;
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
syndrome[i] = GF.substitute(data, primitivePower[i]);
|
|
||||||
if (syndrome[i] != 0) {
|
|
||||||
corruptionFound = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return !corruptionFound;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,226 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.ChecksumException;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.BlockMissingException;
|
|
||||||
|
|
||||||
public class ReedSolomonDecoder extends Decoder {
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.ReedSolomonDecoder");
|
|
||||||
private ErasureCode reedSolomonCode;
|
|
||||||
|
|
||||||
public ReedSolomonDecoder(
|
|
||||||
Configuration conf, int stripeSize, int paritySize) {
|
|
||||||
super(conf, stripeSize, paritySize);
|
|
||||||
this.reedSolomonCode = new ReedSolomonCode(stripeSize, paritySize);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void fixErasedBlock(
|
|
||||||
FileSystem fs, Path srcFile,
|
|
||||||
FileSystem parityFs, Path parityFile,
|
|
||||||
long blockSize, long errorOffset, long bytesToSkip, long limit,
|
|
||||||
OutputStream out) throws IOException {
|
|
||||||
FSDataInputStream[] inputs = new FSDataInputStream[stripeSize + paritySize];
|
|
||||||
int[] erasedLocations = buildInputs(fs, srcFile, parityFs, parityFile,
|
|
||||||
errorOffset, inputs);
|
|
||||||
int blockIdxInStripe = ((int)(errorOffset/blockSize)) % stripeSize;
|
|
||||||
int erasedLocationToFix = paritySize + blockIdxInStripe;
|
|
||||||
writeFixedBlock(inputs, erasedLocations, erasedLocationToFix,
|
|
||||||
bytesToSkip, limit, out);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected int[] buildInputs(FileSystem fs, Path srcFile,
|
|
||||||
FileSystem parityFs, Path parityFile,
|
|
||||||
long errorOffset, FSDataInputStream[] inputs)
|
|
||||||
throws IOException {
|
|
||||||
LOG.info("Building inputs to recover block starting at " + errorOffset);
|
|
||||||
FileStatus srcStat = fs.getFileStatus(srcFile);
|
|
||||||
long blockSize = srcStat.getBlockSize();
|
|
||||||
long blockIdx = (int)(errorOffset / blockSize);
|
|
||||||
long stripeIdx = blockIdx / stripeSize;
|
|
||||||
LOG.info("FileSize = " + srcStat.getLen() + ", blockSize = " + blockSize +
|
|
||||||
", blockIdx = " + blockIdx + ", stripeIdx = " + stripeIdx);
|
|
||||||
ArrayList<Integer> erasedLocations = new ArrayList<Integer>();
|
|
||||||
// First open streams to the parity blocks.
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
long offset = blockSize * (stripeIdx * paritySize + i);
|
|
||||||
FSDataInputStream in = parityFs.open(
|
|
||||||
parityFile, conf.getInt("io.file.buffer.size", 64 * 1024));
|
|
||||||
in.seek(offset);
|
|
||||||
LOG.info("Adding " + parityFile + ":" + offset + " as input " + i);
|
|
||||||
inputs[i] = in;
|
|
||||||
}
|
|
||||||
// Now open streams to the data blocks.
|
|
||||||
for (int i = paritySize; i < paritySize + stripeSize; i++) {
|
|
||||||
long offset = blockSize * (stripeIdx * stripeSize + i - paritySize);
|
|
||||||
if (offset == errorOffset) {
|
|
||||||
LOG.info(srcFile + ":" + offset +
|
|
||||||
" is known to have error, adding zeros as input " + i);
|
|
||||||
inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(
|
|
||||||
offset + blockSize));
|
|
||||||
erasedLocations.add(i);
|
|
||||||
} else if (offset > srcStat.getLen()) {
|
|
||||||
LOG.info(srcFile + ":" + offset +
|
|
||||||
" is past file size, adding zeros as input " + i);
|
|
||||||
inputs[i] = new FSDataInputStream(new RaidUtils.ZeroInputStream(
|
|
||||||
offset + blockSize));
|
|
||||||
} else {
|
|
||||||
FSDataInputStream in = fs.open(
|
|
||||||
srcFile, conf.getInt("io.file.buffer.size", 64 * 1024));
|
|
||||||
in.seek(offset);
|
|
||||||
LOG.info("Adding " + srcFile + ":" + offset + " as input " + i);
|
|
||||||
inputs[i] = in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (erasedLocations.size() > paritySize) {
|
|
||||||
String msg = "Too many erased locations: " + erasedLocations.size();
|
|
||||||
LOG.error(msg);
|
|
||||||
throw new IOException(msg);
|
|
||||||
}
|
|
||||||
int[] locs = new int[erasedLocations.size()];
|
|
||||||
for (int i = 0; i < locs.length; i++) {
|
|
||||||
locs[i] = erasedLocations.get(i);
|
|
||||||
}
|
|
||||||
return locs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decode the inputs provided and write to the output.
|
|
||||||
* @param inputs array of inputs.
|
|
||||||
* @param erasedLocations indexes in the inputs which are known to be erased.
|
|
||||||
* @param erasedLocationToFix index in the inputs which needs to be fixed.
|
|
||||||
* @param skipBytes number of bytes to skip before writing to output.
|
|
||||||
* @param limit maximum number of bytes to be written/skipped.
|
|
||||||
* @param out the output.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
void writeFixedBlock(
|
|
||||||
FSDataInputStream[] inputs,
|
|
||||||
int[] erasedLocations,
|
|
||||||
int erasedLocationToFix,
|
|
||||||
long skipBytes,
|
|
||||||
long limit,
|
|
||||||
OutputStream out) throws IOException {
|
|
||||||
|
|
||||||
LOG.info("Need to write " + (limit - skipBytes) +
|
|
||||||
" bytes for erased location index " + erasedLocationToFix);
|
|
||||||
int[] tmp = new int[inputs.length];
|
|
||||||
int[] decoded = new int[erasedLocations.length];
|
|
||||||
long toDiscard = skipBytes;
|
|
||||||
// Loop while the number of skipped + written bytes is less than the max.
|
|
||||||
for (long written = 0; skipBytes + written < limit; ) {
|
|
||||||
erasedLocations = readFromInputs(inputs, erasedLocations, limit);
|
|
||||||
if (decoded.length != erasedLocations.length) {
|
|
||||||
decoded = new int[erasedLocations.length];
|
|
||||||
}
|
|
||||||
|
|
||||||
int toWrite = (int)Math.min((long)bufSize, limit - (skipBytes + written));
|
|
||||||
if (toDiscard >= toWrite) {
|
|
||||||
toDiscard -= toWrite;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decoded bufSize amount of data.
|
|
||||||
for (int i = 0; i < bufSize; i++) {
|
|
||||||
performDecode(readBufs, writeBufs, i, tmp, erasedLocations, decoded);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < erasedLocations.length; i++) {
|
|
||||||
if (erasedLocations[i] == erasedLocationToFix) {
|
|
||||||
toWrite -= toDiscard;
|
|
||||||
out.write(writeBufs[i], (int)toDiscard, toWrite);
|
|
||||||
toDiscard = 0;
|
|
||||||
written += toWrite;
|
|
||||||
LOG.debug("Wrote " + toWrite + " bytes for erased location index " +
|
|
||||||
erasedLocationToFix);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int[] readFromInputs(
|
|
||||||
FSDataInputStream[] inputs,
|
|
||||||
int[] erasedLocations,
|
|
||||||
long limit) throws IOException {
|
|
||||||
// For every input, read some data = bufSize.
|
|
||||||
for (int i = 0; i < inputs.length; i++) {
|
|
||||||
long curPos = inputs[i].getPos();
|
|
||||||
try {
|
|
||||||
RaidUtils.readTillEnd(inputs[i], readBufs[i], true);
|
|
||||||
continue;
|
|
||||||
} catch (BlockMissingException e) {
|
|
||||||
LOG.error("Encountered BlockMissingException in stream " + i);
|
|
||||||
} catch (ChecksumException e) {
|
|
||||||
LOG.error("Encountered ChecksumException in stream " + i);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Found a new erased location.
|
|
||||||
if (erasedLocations.length == paritySize) {
|
|
||||||
String msg = "Too many read errors";
|
|
||||||
LOG.error(msg);
|
|
||||||
throw new IOException(msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add this stream to the set of erased locations.
|
|
||||||
int[] newErasedLocations = new int[erasedLocations.length + 1];
|
|
||||||
for (int j = 0; j < erasedLocations.length; j++) {
|
|
||||||
newErasedLocations[j] = erasedLocations[j];
|
|
||||||
}
|
|
||||||
newErasedLocations[newErasedLocations.length - 1] = i;
|
|
||||||
erasedLocations = newErasedLocations;
|
|
||||||
|
|
||||||
LOG.info("Using zeros for stream " + i);
|
|
||||||
inputs[i] = new FSDataInputStream(
|
|
||||||
new RaidUtils.ZeroInputStream(curPos + limit));
|
|
||||||
inputs[i].seek(curPos);
|
|
||||||
RaidUtils.readTillEnd(inputs[i], readBufs[i], true);
|
|
||||||
}
|
|
||||||
return erasedLocations;
|
|
||||||
}
|
|
||||||
|
|
||||||
void performDecode(byte[][] readBufs, byte[][] writeBufs,
|
|
||||||
int idx, int[] inputs,
|
|
||||||
int[] erasedLocations, int[] decoded) {
|
|
||||||
for (int i = 0; i < decoded.length; i++) {
|
|
||||||
decoded[i] = 0;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < inputs.length; i++) {
|
|
||||||
inputs[i] = readBufs[i][idx] & 0x000000FF;
|
|
||||||
}
|
|
||||||
reedSolomonCode.decode(inputs, erasedLocations, decoded);
|
|
||||||
for (int i = 0; i < decoded.length; i++) {
|
|
||||||
writeBufs[i][idx] = (byte)decoded[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,96 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.util.Progressable;
|
|
||||||
|
|
||||||
public class ReedSolomonEncoder extends Encoder {
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.ReedSolomonEncoder");
|
|
||||||
private ErasureCode reedSolomonCode;
|
|
||||||
|
|
||||||
public ReedSolomonEncoder(
|
|
||||||
Configuration conf, int stripeSize, int paritySize) {
|
|
||||||
super(conf, stripeSize, paritySize);
|
|
||||||
this.reedSolomonCode = new ReedSolomonCode(stripeSize, paritySize);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void encodeStripe(
|
|
||||||
InputStream[] blocks,
|
|
||||||
long stripeStartOffset,
|
|
||||||
long blockSize,
|
|
||||||
OutputStream[] outs,
|
|
||||||
Progressable reporter) throws IOException {
|
|
||||||
|
|
||||||
int[] data = new int[stripeSize];
|
|
||||||
int[] code = new int[paritySize];
|
|
||||||
|
|
||||||
for (long encoded = 0; encoded < blockSize; encoded += bufSize) {
|
|
||||||
// Read some data from each block = bufSize.
|
|
||||||
for (int i = 0; i < blocks.length; i++) {
|
|
||||||
RaidUtils.readTillEnd(blocks[i], readBufs[i], true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode the data read.
|
|
||||||
for (int j = 0; j < bufSize; j++) {
|
|
||||||
performEncode(readBufs, writeBufs, j, data, code);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now that we have some data to write, send it to the temp files.
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
outs[i].write(writeBufs[i], 0, bufSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (reporter != null) {
|
|
||||||
reporter.progress();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void performEncode(byte[][] readBufs, byte[][] writeBufs, int idx,
|
|
||||||
int[] data, int[] code) {
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
code[i] = 0;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < stripeSize; i++) {
|
|
||||||
data[i] = readBufs[i][idx] & 0x000000FF;
|
|
||||||
}
|
|
||||||
reedSolomonCode.encode(data, code);
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
writeBufs[i][idx] = (byte)code[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Path getParityTempPath() {
|
|
||||||
return new Path(RaidNode.rsTempPrefix(conf));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,92 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
|
|
||||||
public class XORDecoder extends Decoder {
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.XORDecoder");
|
|
||||||
|
|
||||||
public XORDecoder(
|
|
||||||
Configuration conf, int stripeSize) {
|
|
||||||
super(conf, stripeSize, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void fixErasedBlock(
|
|
||||||
FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
|
|
||||||
long blockSize, long errorOffset, long bytesToSkip, long limit,
|
|
||||||
OutputStream out) throws IOException {
|
|
||||||
LOG.info("Fixing block at " + srcFile + ":" + errorOffset +
|
|
||||||
", skipping " + bytesToSkip + ", limit " + limit);
|
|
||||||
FileStatus srcStat = fs.getFileStatus(srcFile);
|
|
||||||
ArrayList<FSDataInputStream> xorinputs = new ArrayList<FSDataInputStream>();
|
|
||||||
|
|
||||||
FSDataInputStream parityFileIn = parityFs.open(parityFile);
|
|
||||||
parityFileIn.seek(parityOffset(errorOffset, blockSize));
|
|
||||||
xorinputs.add(parityFileIn);
|
|
||||||
|
|
||||||
long errorBlockOffset = (errorOffset / blockSize) * blockSize;
|
|
||||||
long[] srcOffsets = stripeOffsets(errorOffset, blockSize);
|
|
||||||
for (int i = 0; i < srcOffsets.length; i++) {
|
|
||||||
if (srcOffsets[i] == errorBlockOffset) {
|
|
||||||
LOG.info("Skipping block at " + srcFile + ":" + errorBlockOffset);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (srcOffsets[i] < srcStat.getLen()) {
|
|
||||||
FSDataInputStream in = fs.open(srcFile);
|
|
||||||
in.seek(srcOffsets[i]);
|
|
||||||
xorinputs.add(in);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FSDataInputStream[] inputs = xorinputs.toArray(
|
|
||||||
new FSDataInputStream[]{null});
|
|
||||||
ParityInputStream recovered =
|
|
||||||
new ParityInputStream(inputs, limit, readBufs[0], writeBufs[0]);
|
|
||||||
recovered.skip(bytesToSkip);
|
|
||||||
recovered.drain(out, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected long[] stripeOffsets(long errorOffset, long blockSize) {
|
|
||||||
long[] offsets = new long[stripeSize];
|
|
||||||
long stripeIdx = errorOffset / (blockSize * stripeSize);
|
|
||||||
long startOffsetOfStripe = stripeIdx * stripeSize * blockSize;
|
|
||||||
for (int i = 0; i < stripeSize; i++) {
|
|
||||||
offsets[i] = startOffsetOfStripe + i * blockSize;
|
|
||||||
}
|
|
||||||
return offsets;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected long parityOffset(long errorOffset, long blockSize) {
|
|
||||||
long stripeIdx = errorOffset / (blockSize * stripeSize);
|
|
||||||
return stripeIdx * blockSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,63 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.util.Progressable;
|
|
||||||
|
|
||||||
public class XOREncoder extends Encoder {
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.XOREncoder");
|
|
||||||
public XOREncoder(
|
|
||||||
Configuration conf, int stripeSize) {
|
|
||||||
super(conf, stripeSize, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void encodeStripe(
|
|
||||||
InputStream[] blocks,
|
|
||||||
long stripeStartOffset,
|
|
||||||
long blockSize,
|
|
||||||
OutputStream[] outs,
|
|
||||||
Progressable reporter) throws IOException {
|
|
||||||
LOG.info("Peforming XOR ");
|
|
||||||
ParityInputStream parityIn =
|
|
||||||
new ParityInputStream(blocks, blockSize, readBufs[0], writeBufs[0]);
|
|
||||||
try {
|
|
||||||
parityIn.drain(outs[0], reporter);
|
|
||||||
} finally {
|
|
||||||
parityIn.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Path getParityTempPath() {
|
|
||||||
return new Path(RaidNode.unraidTmpDirectory(conf));
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,256 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid.protocol;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.DataInput;
|
|
||||||
import java.io.DataOutput;
|
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.Enumeration;
|
|
||||||
import java.lang.Math;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.hadoop.io.Writable;
|
|
||||||
import org.apache.hadoop.io.WritableFactories;
|
|
||||||
import org.apache.hadoop.io.WritableFactory;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Maintains information about one policy
|
|
||||||
*/
|
|
||||||
public class PolicyInfo implements Writable {
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.protocol.PolicyInfo");
|
|
||||||
protected static final SimpleDateFormat dateFormat =
|
|
||||||
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
|
||||||
|
|
||||||
private Path srcPath; // the specified src path
|
|
||||||
private String policyName; // name of policy
|
|
||||||
private ErasureCodeType codeType;// the erasure code used
|
|
||||||
private String description; // A verbose description of this policy
|
|
||||||
private Configuration conf; // Hadoop configuration
|
|
||||||
|
|
||||||
private Properties properties; // Policy-dependent properties
|
|
||||||
|
|
||||||
private ReentrantReadWriteLock plock; // protects policy operations.
|
|
||||||
public static enum ErasureCodeType {
|
|
||||||
XOR, RS;
|
|
||||||
public static ErasureCodeType fromString(String s) {
|
|
||||||
if (XOR.toString().equalsIgnoreCase(s)) {
|
|
||||||
return XOR;
|
|
||||||
}
|
|
||||||
if (RS.toString().equalsIgnoreCase(s)) {
|
|
||||||
return RS;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the empty object
|
|
||||||
*/
|
|
||||||
public PolicyInfo() {
|
|
||||||
this.conf = null;
|
|
||||||
this.policyName = "";
|
|
||||||
this.description = "";
|
|
||||||
this.srcPath = null;
|
|
||||||
this.properties = new Properties();
|
|
||||||
this.plock = new ReentrantReadWriteLock();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the metadata that describes a policy
|
|
||||||
*/
|
|
||||||
public PolicyInfo(String policyName, Configuration conf) {
|
|
||||||
this.conf = conf;
|
|
||||||
this.policyName = policyName;
|
|
||||||
this.description = "";
|
|
||||||
this.srcPath = null;
|
|
||||||
this.properties = new Properties();
|
|
||||||
this.plock = new ReentrantReadWriteLock();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copy fields from another PolicyInfo
|
|
||||||
*/
|
|
||||||
public void copyFrom(PolicyInfo other) {
|
|
||||||
if (other.conf != null) {
|
|
||||||
this.conf = other.conf;
|
|
||||||
}
|
|
||||||
if (other.policyName != null && other.policyName.length() > 0) {
|
|
||||||
this.policyName = other.policyName;
|
|
||||||
}
|
|
||||||
if (other.description != null && other.description.length() > 0) {
|
|
||||||
this.description = other.description;
|
|
||||||
}
|
|
||||||
if (other.codeType != null) {
|
|
||||||
this.codeType = other.codeType;
|
|
||||||
}
|
|
||||||
if (other.srcPath != null) {
|
|
||||||
this.srcPath = other.srcPath;
|
|
||||||
}
|
|
||||||
for (Object key : other.properties.keySet()) {
|
|
||||||
String skey = (String) key;
|
|
||||||
this.properties.setProperty(skey, other.properties.getProperty(skey));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the input path on which this policy has to be applied
|
|
||||||
*/
|
|
||||||
public void setSrcPath(String in) throws IOException {
|
|
||||||
srcPath = new Path(in);
|
|
||||||
srcPath = srcPath.makeQualified(srcPath.getFileSystem(conf));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the erasure code type used in this policy
|
|
||||||
*/
|
|
||||||
public void setErasureCode(String code) {
|
|
||||||
this.codeType = ErasureCodeType.fromString(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the description of this policy.
|
|
||||||
*/
|
|
||||||
public void setDescription(String des) {
|
|
||||||
this.description = des;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets an internal property.
|
|
||||||
* @param name property name.
|
|
||||||
* @param value property value.
|
|
||||||
*/
|
|
||||||
public void setProperty(String name, String value) {
|
|
||||||
properties.setProperty(name, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the value of an internal property.
|
|
||||||
* @param name property name.
|
|
||||||
*/
|
|
||||||
public String getProperty(String name) {
|
|
||||||
return properties.getProperty(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the name of this policy.
|
|
||||||
*/
|
|
||||||
public String getName() {
|
|
||||||
return this.policyName;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the destination path of this policy.
|
|
||||||
*/
|
|
||||||
public ErasureCodeType getErasureCode() {
|
|
||||||
return this.codeType;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the srcPath
|
|
||||||
*/
|
|
||||||
public Path getSrcPath() {
|
|
||||||
return srcPath;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the expanded (unglobbed) forms of the srcPaths
|
|
||||||
*/
|
|
||||||
public Path[] getSrcPathExpanded() throws IOException {
|
|
||||||
FileSystem fs = srcPath.getFileSystem(conf);
|
|
||||||
|
|
||||||
// globbing on srcPath
|
|
||||||
FileStatus[] gpaths = fs.globStatus(srcPath);
|
|
||||||
if (gpaths == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
Path[] values = new Path[gpaths.length];
|
|
||||||
for (int i = 0; i < gpaths.length; i++) {
|
|
||||||
Path p = gpaths[i].getPath();
|
|
||||||
values[i] = p.makeQualified(fs);
|
|
||||||
}
|
|
||||||
return values;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert this policy into a printable form
|
|
||||||
*/
|
|
||||||
public String toString() {
|
|
||||||
StringBuffer buff = new StringBuffer();
|
|
||||||
buff.append("Policy Name:\t" + policyName + " --------------------\n");
|
|
||||||
buff.append("Source Path:\t" + srcPath + "\n");
|
|
||||||
buff.append("Erasure Code:\t" + codeType + "\n");
|
|
||||||
for (Enumeration<?> e = properties.propertyNames(); e.hasMoreElements();) {
|
|
||||||
String name = (String) e.nextElement();
|
|
||||||
buff.append( name + ":\t" + properties.getProperty(name) + "\n");
|
|
||||||
}
|
|
||||||
if (description.length() > 0) {
|
|
||||||
int len = Math.min(description.length(), 80);
|
|
||||||
String sub = description.substring(0, len).trim();
|
|
||||||
sub = sub.replaceAll("\n", " ");
|
|
||||||
buff.append("Description:\t" + sub + "...\n");
|
|
||||||
}
|
|
||||||
return buff.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
// Writable
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
static { // register a ctor
|
|
||||||
WritableFactories.setFactory
|
|
||||||
(PolicyInfo.class,
|
|
||||||
new WritableFactory() {
|
|
||||||
public Writable newInstance() { return new PolicyInfo(); }
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
public void write(DataOutput out) throws IOException {
|
|
||||||
Text.writeString(out, srcPath.toString());
|
|
||||||
Text.writeString(out, policyName);
|
|
||||||
Text.writeString(out, codeType.toString());
|
|
||||||
Text.writeString(out, description);
|
|
||||||
out.writeInt(properties.size());
|
|
||||||
for (Enumeration<?> e = properties.propertyNames(); e.hasMoreElements();) {
|
|
||||||
String name = (String) e.nextElement();
|
|
||||||
Text.writeString(out, name);
|
|
||||||
Text.writeString(out, properties.getProperty(name));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void readFields(DataInput in) throws IOException {
|
|
||||||
this.srcPath = new Path(Text.readString(in));
|
|
||||||
this.policyName = Text.readString(in);
|
|
||||||
this.codeType = ErasureCodeType.fromString(Text.readString(in));
|
|
||||||
this.description = Text.readString(in);
|
|
||||||
for (int n = in.readInt(); n>0; n--) {
|
|
||||||
String name = Text.readString(in);
|
|
||||||
String value = Text.readString(in);
|
|
||||||
properties.setProperty(name,value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,106 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid.protocol;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.DataInput;
|
|
||||||
import java.io.DataOutput;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.io.Writable;
|
|
||||||
import org.apache.hadoop.io.WritableFactories;
|
|
||||||
import org.apache.hadoop.io.WritableFactory;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Maintains informatiom about all policies that belong to a category.
|
|
||||||
* These policies have to be applied one-at-a-time and cannot be run
|
|
||||||
* simultaneously.
|
|
||||||
*/
|
|
||||||
public class PolicyList implements Writable {
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.protocol.PolicyList");
|
|
||||||
|
|
||||||
private List<PolicyInfo> category; // list of policies
|
|
||||||
private Path srcPath;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new category of policies.
|
|
||||||
*/
|
|
||||||
public PolicyList() {
|
|
||||||
this.category = new LinkedList<PolicyInfo>();
|
|
||||||
this.srcPath = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add a new policy to this category.
|
|
||||||
*/
|
|
||||||
public void add(PolicyInfo info) {
|
|
||||||
category.add(info);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSrcPath(Configuration conf, String src) throws IOException {
|
|
||||||
srcPath = new Path(src);
|
|
||||||
srcPath = srcPath.makeQualified(srcPath.getFileSystem(conf));
|
|
||||||
}
|
|
||||||
|
|
||||||
public Path getSrcPath() {
|
|
||||||
return srcPath;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the policies in this category
|
|
||||||
*/
|
|
||||||
public Collection<PolicyInfo> getAll() {
|
|
||||||
return category;
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
// Writable
|
|
||||||
//////////////////////////////////////////////////
|
|
||||||
static { // register a ctor
|
|
||||||
WritableFactories.setFactory
|
|
||||||
(PolicyList.class,
|
|
||||||
new WritableFactory() {
|
|
||||||
public Writable newInstance() { return new PolicyList(); }
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
public void write(DataOutput out) throws IOException {
|
|
||||||
out.writeInt(category.size());
|
|
||||||
for (PolicyInfo p : category) {
|
|
||||||
p.write(out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void readFields(DataInput in) throws IOException {
|
|
||||||
int count = in.readInt();
|
|
||||||
for (int i = 0; i < count; i++) {
|
|
||||||
PolicyInfo p = new PolicyInfo();
|
|
||||||
p.readFields(in);
|
|
||||||
add(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,58 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid.protocol;
|
|
||||||
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.hadoop.ipc.VersionedProtocol;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
|
|
||||||
/**********************************************************************
|
|
||||||
* RaidProtocol is used by user code
|
|
||||||
* {@link org.apache.hadoop.raid.RaidShell} class to communicate
|
|
||||||
* with the RaidNode. User code can manipulate the configured policies.
|
|
||||||
*
|
|
||||||
**********************************************************************/
|
|
||||||
public interface RaidProtocol extends VersionedProtocol {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compared to the previous version the following changes have been introduced:
|
|
||||||
* Only the latest change is reflected.
|
|
||||||
* 1: new protocol introduced
|
|
||||||
*/
|
|
||||||
public static final long versionID = 1L;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a listing of all configured policies
|
|
||||||
* @throws IOException
|
|
||||||
* return all categories of configured policies
|
|
||||||
*/
|
|
||||||
public PolicyList[] getAllPolicies() throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Unraid the specified input path. This is called when the specified file
|
|
||||||
* is corrupted. This call will move the specified file to file.old
|
|
||||||
* and then recover it from the RAID subsystem.
|
|
||||||
*
|
|
||||||
* @param inputPath The absolute pathname of the file to be recovered.
|
|
||||||
* @param corruptOffset The offset that has the corruption
|
|
||||||
*/
|
|
||||||
public String recoverFile(String inputPath, long corruptOffset) throws IOException;
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,45 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
|
||||||
# this work for additional information regarding copyright ownership.
|
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
# (the "License"); you may not use this file except in compliance with
|
|
||||||
# the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
|
|
||||||
# Start hadoop RaidNode process on machine specified on file conf/raidnode
|
|
||||||
|
|
||||||
usage="Usage: start-raidnode-remote.sh"
|
|
||||||
|
|
||||||
params=$#
|
|
||||||
bin=`dirname "$0"`
|
|
||||||
bin=`cd "$bin"; pwd`
|
|
||||||
|
|
||||||
DEFAULT_LIBEXEC_DIR="$bin"
|
|
||||||
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
|
||||||
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
|
||||||
|
|
||||||
# get arguments
|
|
||||||
if [ $# -ge 1 ]; then
|
|
||||||
echo $usage
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -f "${HADOOP_CONF_DIR}/raidnode" ]; then
|
|
||||||
export HADOOP_SLAVES="${HADOOP_CONF_DIR}/raidnode"
|
|
||||||
echo "Starting raidnode at "`cat ${HADOOP_SLAVES}`
|
|
||||||
"$bin"/slaves.sh --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/start-raidnode.sh"
|
|
||||||
else
|
|
||||||
echo "No raidnode file in ${HADOOP_CONF_DIR}/raidnode"
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
|
@ -1,42 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
|
||||||
# this work for additional information regarding copyright ownership.
|
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
# (the "License"); you may not use this file except in compliance with
|
|
||||||
# the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
|
|
||||||
# Start hadoop RaidNode process
|
|
||||||
# Run this on RaidNode machine
|
|
||||||
|
|
||||||
usage="Usage: start-raidnode.sh"
|
|
||||||
|
|
||||||
params=$#
|
|
||||||
bin=`dirname "$0"`
|
|
||||||
bin=`cd "$bin"; pwd`
|
|
||||||
|
|
||||||
DEFAULT_LIBEXEC_DIR="$bin"
|
|
||||||
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
|
||||||
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
|
||||||
|
|
||||||
# get arguments
|
|
||||||
if [ $# -ge 1 ]; then
|
|
||||||
echo $usage
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
|
|
||||||
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
|
|
||||||
fi
|
|
||||||
export HADOOP_OPTS="$HADOOP_OPTS $HADOOP_RAIDNODE_OPTS"
|
|
||||||
|
|
||||||
"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start org.apache.hadoop.raid.RaidNode
|
|
|
@ -1,42 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
|
||||||
# this work for additional information regarding copyright ownership.
|
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
# (the "License"); you may not use this file except in compliance with
|
|
||||||
# the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
|
|
||||||
# Stop hadoop RaidNode process on machine specified on file conf/raidnode
|
|
||||||
|
|
||||||
usage="Usage: stop-raidnode-remote.sh"
|
|
||||||
|
|
||||||
params=$#
|
|
||||||
bin=`dirname "$0"`
|
|
||||||
bin=`cd "$bin"; pwd`
|
|
||||||
|
|
||||||
DEFAULT_LIBEXEC_DIR="$bin"
|
|
||||||
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
|
||||||
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
|
||||||
|
|
||||||
# get arguments
|
|
||||||
if [ $# -ge 1 ]; then
|
|
||||||
echo $usage
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -f "${HADOOP_CONF_DIR}/raidnode" ]; then
|
|
||||||
export HADOOP_SLAVES="${HADOOP_CONF_DIR}/raidnode"
|
|
||||||
echo "Stopping raidnode at "`cat ${HADOOP_SLAVES}`
|
|
||||||
"$bin"/slaves.sh --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/stop-raidnode.sh"
|
|
||||||
else
|
|
||||||
echo "No raidnode file in ${HADOOP_CONF_DIR}/raidnode"
|
|
||||||
fi
|
|
|
@ -1,39 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
|
||||||
# this work for additional information regarding copyright ownership.
|
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
# (the "License"); you may not use this file except in compliance with
|
|
||||||
# the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
|
|
||||||
# Stop hadoop RaidNode process
|
|
||||||
# Run this on RaidNode machine.
|
|
||||||
|
|
||||||
usage="Usage: stop-raidnode.sh"
|
|
||||||
|
|
||||||
params=$#
|
|
||||||
bin=`dirname "$0"`
|
|
||||||
bin=`cd "$bin"; pwd`
|
|
||||||
|
|
||||||
DEFAULT_LIBEXEC_DIR="$bin"
|
|
||||||
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
|
||||||
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
|
||||||
|
|
||||||
# get arguments
|
|
||||||
if [ $# -ge 1 ]; then
|
|
||||||
echo $usage
|
|
||||||
fi
|
|
||||||
|
|
||||||
export HADOOP_OPTS="$HADOOP_OPTS $HADOOP_RAIDNODE_OPTS"
|
|
||||||
|
|
||||||
"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop org.apache.hadoop.raid.RaidNode
|
|
|
@ -1,501 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.hdfs;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.RandomAccessFile;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.util.zip.CRC32;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
|
||||||
import org.apache.hadoop.raid.RaidNode;
|
|
||||||
import org.apache.hadoop.raid.RaidUtils;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo.ErasureCodeType;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestRaidDfs {
|
|
||||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
|
||||||
"target/test-data")).getAbsolutePath();
|
|
||||||
final static String LOG_DIR = "target/raidlog";
|
|
||||||
final static long RELOAD_INTERVAL = 1000;
|
|
||||||
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidDfs");
|
|
||||||
final static int NUM_DATANODES = 3;
|
|
||||||
|
|
||||||
Configuration conf;
|
|
||||||
String namenode = null;
|
|
||||||
String hftp = null;
|
|
||||||
MiniDFSCluster dfs = null;
|
|
||||||
FileSystem fileSys = null;
|
|
||||||
String jobTrackerName = null;
|
|
||||||
ErasureCodeType code;
|
|
||||||
int stripeLength;
|
|
||||||
|
|
||||||
private void mySetup(
|
|
||||||
String erasureCode, int rsParityLength) throws Exception {
|
|
||||||
|
|
||||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
|
||||||
conf = new Configuration();
|
|
||||||
|
|
||||||
conf.set("fs.raid.recoverylogdir", LOG_DIR);
|
|
||||||
conf.setInt(RaidNode.RS_PARITY_LENGTH_KEY, rsParityLength);
|
|
||||||
|
|
||||||
// scan all policies once every 5 second
|
|
||||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
|
||||||
|
|
||||||
// make all deletions not go through Trash
|
|
||||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
|
||||||
|
|
||||||
// do not use map-reduce cluster for Raiding
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
|
||||||
|
|
||||||
conf.set("raid.server.address", "localhost:0");
|
|
||||||
conf.setInt("hdfs.raid.stripeLength", stripeLength);
|
|
||||||
conf.set("xor".equals(erasureCode) ? RaidNode.RAID_LOCATION_KEY :
|
|
||||||
RaidNode.RAIDRS_LOCATION_KEY, "/destraid");
|
|
||||||
|
|
||||||
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
|
|
||||||
dfs.waitActive();
|
|
||||||
fileSys = dfs.getFileSystem();
|
|
||||||
namenode = fileSys.getUri().toString();
|
|
||||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
|
||||||
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void myTearDown() throws Exception {
|
|
||||||
if (dfs != null) { dfs.shutdown(); }
|
|
||||||
}
|
|
||||||
|
|
||||||
private LocatedBlocks getBlockLocations(Path file, long length)
|
|
||||||
throws IOException {
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem) fileSys;
|
|
||||||
return RaidDFSUtil.getBlockLocations(
|
|
||||||
dfs, file.toUri().getPath(), 0, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
private LocatedBlocks getBlockLocations(Path file)
|
|
||||||
throws IOException {
|
|
||||||
FileStatus stat = fileSys.getFileStatus(file);
|
|
||||||
return getBlockLocations(file, stat.getLen());
|
|
||||||
}
|
|
||||||
|
|
||||||
private DistributedRaidFileSystem getRaidFS() throws IOException {
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
|
||||||
Configuration clientConf = new Configuration(conf);
|
|
||||||
clientConf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedRaidFileSystem");
|
|
||||||
clientConf.set("fs.raid.underlyingfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
|
|
||||||
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
|
|
||||||
URI dfsUri = dfs.getUri();
|
|
||||||
return (DistributedRaidFileSystem)FileSystem.get(dfsUri, clientConf);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void waitForFileRaided(
|
|
||||||
Log logger, FileSystem fileSys, Path file, Path destPath)
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
FileStatus parityStat = null;
|
|
||||||
String fileName = file.getName().toString();
|
|
||||||
// wait till file is raided
|
|
||||||
while (parityStat == null) {
|
|
||||||
logger.info("Waiting for files to be raided.");
|
|
||||||
try {
|
|
||||||
FileStatus[] listPaths = fileSys.listStatus(destPath);
|
|
||||||
if (listPaths != null) {
|
|
||||||
for (FileStatus f : listPaths) {
|
|
||||||
logger.info("File raided so far : " + f.getPath());
|
|
||||||
String found = f.getPath().getName().toString();
|
|
||||||
if (fileName.equals(found)) {
|
|
||||||
parityStat = f;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
//ignore
|
|
||||||
}
|
|
||||||
Thread.sleep(1000); // keep waiting
|
|
||||||
}
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
LocatedBlocks locations = null;
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem) fileSys;
|
|
||||||
locations = RaidDFSUtil.getBlockLocations(
|
|
||||||
dfs, file.toUri().getPath(), 0, parityStat.getLen());
|
|
||||||
if (!locations.isUnderConstruction()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
FileStatus stat = fileSys.getFileStatus(file);
|
|
||||||
if (stat.getReplication() == 1) break;
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void corruptBlockAndValidate(Path srcFile, Path destPath,
|
|
||||||
int[] listBlockNumToCorrupt, long blockSize, int numBlocks)
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
int repl = 1;
|
|
||||||
long crc = createTestFilePartialLastBlock(fileSys, srcFile, repl,
|
|
||||||
numBlocks, blockSize);
|
|
||||||
long length = fileSys.getFileStatus(srcFile).getLen();
|
|
||||||
|
|
||||||
RaidNode.doRaid(conf, fileSys.getFileStatus(srcFile),
|
|
||||||
destPath, code, new RaidNode.Statistics(), new RaidUtils.DummyProgressable(),
|
|
||||||
false, repl, repl, stripeLength);
|
|
||||||
|
|
||||||
// Delete first block of file
|
|
||||||
for (int blockNumToCorrupt : listBlockNumToCorrupt) {
|
|
||||||
LOG.info("Corrupt block " + blockNumToCorrupt + " of file " + srcFile);
|
|
||||||
LocatedBlocks locations = getBlockLocations(srcFile);
|
|
||||||
corruptBlock(dfs, srcFile, locations.get(blockNumToCorrupt).getBlock(),
|
|
||||||
NUM_DATANODES, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate
|
|
||||||
DistributedRaidFileSystem raidfs = getRaidFS();
|
|
||||||
assertTrue(validateFile(raidfs, srcFile, length, crc));
|
|
||||||
validateLogFile(getRaidFS(), new Path(LOG_DIR));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a file, corrupt several blocks in it and ensure that the file can be
|
|
||||||
* read through DistributedRaidFileSystem by ReedSolomon coding.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testRaidDfsRs() throws Exception {
|
|
||||||
LOG.info("Test testRaidDfs started.");
|
|
||||||
|
|
||||||
code = ErasureCodeType.RS;
|
|
||||||
long blockSize = 8192L;
|
|
||||||
int numBlocks = 8;
|
|
||||||
stripeLength = 3;
|
|
||||||
mySetup("rs", 3);
|
|
||||||
|
|
||||||
int[][] corrupt = {{1, 2, 3}, {1, 4, 7}, {3, 6, 7}};
|
|
||||||
try {
|
|
||||||
for (int i = 0; i < corrupt.length; i++) {
|
|
||||||
Path file = new Path("/user/dhruba/raidtest/file" + i);
|
|
||||||
corruptBlockAndValidate(
|
|
||||||
file, new Path("/destraid"), corrupt[i], blockSize, numBlocks);
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("testRaidDfs Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
LOG.info("Test testRaidDfs completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test DistributedRaidFileSystem.readFully()
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testReadFully() throws Exception {
|
|
||||||
code = ErasureCodeType.XOR;
|
|
||||||
stripeLength = 3;
|
|
||||||
mySetup("xor", 1);
|
|
||||||
|
|
||||||
try {
|
|
||||||
Path file = new Path("/user/raid/raidtest/file1");
|
|
||||||
long crc = createTestFile(fileSys, file, 1, 8, 8192L);
|
|
||||||
FileStatus stat = fileSys.getFileStatus(file);
|
|
||||||
LOG.info("Created " + file + ", crc=" + crc + ", len=" + stat.getLen());
|
|
||||||
|
|
||||||
byte[] filebytes = new byte[(int)stat.getLen()];
|
|
||||||
// Test that readFully returns the correct CRC when there are no errors.
|
|
||||||
DistributedRaidFileSystem raidfs = getRaidFS();
|
|
||||||
FSDataInputStream stm = raidfs.open(file);
|
|
||||||
stm.readFully(0, filebytes);
|
|
||||||
assertEquals(crc, bufferCRC(filebytes));
|
|
||||||
stm.close();
|
|
||||||
|
|
||||||
// Generate parity.
|
|
||||||
RaidNode.doRaid(conf, fileSys.getFileStatus(file),
|
|
||||||
new Path("/destraid"), code, new RaidNode.Statistics(),
|
|
||||||
new RaidUtils.DummyProgressable(),
|
|
||||||
false, 1, 1, stripeLength);
|
|
||||||
int[] corrupt = {0, 4, 7}; // first, last and middle block
|
|
||||||
for (int blockIdx : corrupt) {
|
|
||||||
LOG.info("Corrupt block " + blockIdx + " of file " + file);
|
|
||||||
LocatedBlocks locations = getBlockLocations(file);
|
|
||||||
corruptBlock(dfs, file, locations.get(blockIdx).getBlock(),
|
|
||||||
NUM_DATANODES, true);
|
|
||||||
}
|
|
||||||
// Test that readFully returns the correct CRC when there are errors.
|
|
||||||
stm = raidfs.open(file);
|
|
||||||
stm.readFully(0, filebytes);
|
|
||||||
assertEquals(crc, bufferCRC(filebytes));
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test that access time and mtime of a source file do not change after
|
|
||||||
* raiding.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testAccessTime() throws Exception {
|
|
||||||
LOG.info("Test testAccessTime started.");
|
|
||||||
|
|
||||||
code = ErasureCodeType.XOR;
|
|
||||||
long blockSize = 8192L;
|
|
||||||
int numBlocks = 8;
|
|
||||||
int repl = 1;
|
|
||||||
stripeLength = 3;
|
|
||||||
mySetup("xor", 1);
|
|
||||||
|
|
||||||
Path file = new Path("/user/dhruba/raidtest/file");
|
|
||||||
createTestFilePartialLastBlock(fileSys, file, repl, numBlocks, blockSize);
|
|
||||||
FileStatus stat = fileSys.getFileStatus(file);
|
|
||||||
|
|
||||||
try {
|
|
||||||
RaidNode.doRaid(conf, fileSys.getFileStatus(file),
|
|
||||||
new Path("/destraid"), code, new RaidNode.Statistics(),
|
|
||||||
new RaidUtils.DummyProgressable(), false, repl, repl, stripeLength);
|
|
||||||
|
|
||||||
FileStatus newStat = fileSys.getFileStatus(file);
|
|
||||||
|
|
||||||
assertEquals(stat.getModificationTime(), newStat.getModificationTime());
|
|
||||||
assertEquals(stat.getAccessTime(), newStat.getAccessTime());
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a file, corrupt a block in it and ensure that the file can be
|
|
||||||
* read through DistributedRaidFileSystem by XOR code.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testRaidDfsXor() throws Exception {
|
|
||||||
LOG.info("Test testRaidDfs started.");
|
|
||||||
|
|
||||||
code = ErasureCodeType.XOR;
|
|
||||||
long blockSize = 8192L;
|
|
||||||
int numBlocks = 8;
|
|
||||||
stripeLength = 3;
|
|
||||||
mySetup("xor", 1);
|
|
||||||
|
|
||||||
int[][] corrupt = {{0}, {4}, {7}}; // first, last and middle block
|
|
||||||
try {
|
|
||||||
for (int i = 0; i < corrupt.length; i++) {
|
|
||||||
Path file = new Path("/user/dhruba/raidtest/" + i);
|
|
||||||
corruptBlockAndValidate(
|
|
||||||
file, new Path("/destraid"), corrupt[i], blockSize, numBlocks);
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("testRaidDfs Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
LOG.info("Test testRaidDfs completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// creates a file and populate it with random data. Returns its crc.
|
|
||||||
//
|
|
||||||
public static long createTestFile(FileSystem fileSys, Path name, int repl,
|
|
||||||
int numBlocks, long blocksize)
|
|
||||||
throws IOException {
|
|
||||||
CRC32 crc = new CRC32();
|
|
||||||
Random rand = new Random();
|
|
||||||
FSDataOutputStream stm = fileSys.create(name, true,
|
|
||||||
fileSys.getConf().getInt("io.file.buffer.size", 4096),
|
|
||||||
(short)repl, blocksize);
|
|
||||||
// fill random data into file
|
|
||||||
final byte[] b = new byte[(int)blocksize];
|
|
||||||
for (int i = 0; i < numBlocks; i++) {
|
|
||||||
rand.nextBytes(b);
|
|
||||||
stm.write(b);
|
|
||||||
crc.update(b);
|
|
||||||
}
|
|
||||||
stm.close();
|
|
||||||
return crc.getValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Creates a file with partially full last block. Populate it with random
|
|
||||||
// data. Returns its crc.
|
|
||||||
//
|
|
||||||
public static long createTestFilePartialLastBlock(
|
|
||||||
FileSystem fileSys, Path name, int repl, int numBlocks, long blocksize)
|
|
||||||
throws IOException {
|
|
||||||
CRC32 crc = new CRC32();
|
|
||||||
Random rand = new Random();
|
|
||||||
FSDataOutputStream stm = fileSys.create(name, true,
|
|
||||||
fileSys.getConf().getInt("io.file.buffer.size", 4096),
|
|
||||||
(short)repl, blocksize);
|
|
||||||
// Write whole blocks.
|
|
||||||
byte[] b = new byte[(int)blocksize];
|
|
||||||
for (int i = 1; i < numBlocks; i++) {
|
|
||||||
rand.nextBytes(b);
|
|
||||||
stm.write(b);
|
|
||||||
crc.update(b);
|
|
||||||
}
|
|
||||||
// Write partial block.
|
|
||||||
b = new byte[(int)blocksize/2 - 1];
|
|
||||||
rand.nextBytes(b);
|
|
||||||
stm.write(b);
|
|
||||||
crc.update(b);
|
|
||||||
|
|
||||||
stm.close();
|
|
||||||
return crc.getValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
static long bufferCRC(byte[] buf) {
|
|
||||||
CRC32 crc = new CRC32();
|
|
||||||
crc.update(buf, 0, buf.length);
|
|
||||||
return crc.getValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// validates that file matches the crc.
|
|
||||||
//
|
|
||||||
public static boolean validateFile(FileSystem fileSys, Path name, long length,
|
|
||||||
long crc)
|
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
long numRead = 0;
|
|
||||||
CRC32 newcrc = new CRC32();
|
|
||||||
FSDataInputStream stm = fileSys.open(name);
|
|
||||||
final byte[] b = new byte[4192];
|
|
||||||
int num = 0;
|
|
||||||
while (num >= 0) {
|
|
||||||
num = stm.read(b);
|
|
||||||
if (num < 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
numRead += num;
|
|
||||||
newcrc.update(b, 0, num);
|
|
||||||
}
|
|
||||||
stm.close();
|
|
||||||
|
|
||||||
if (numRead != length) {
|
|
||||||
LOG.info("Number of bytes read " + numRead +
|
|
||||||
" does not match file size " + length);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG.info(" Newcrc " + newcrc.getValue() + " old crc " + crc);
|
|
||||||
if (newcrc.getValue() != crc) {
|
|
||||||
LOG.info("CRC mismatch of file " + name + ": " + newcrc + " vs. " + crc);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
//
|
|
||||||
// validates the contents of raid recovery log file
|
|
||||||
//
|
|
||||||
public static void validateLogFile(FileSystem fileSys, Path logDir)
|
|
||||||
throws IOException {
|
|
||||||
FileStatus f = fileSys.listStatus(logDir)[0];
|
|
||||||
FSDataInputStream stm = fileSys.open(f.getPath());
|
|
||||||
try {
|
|
||||||
BufferedReader reader = new BufferedReader(new InputStreamReader(stm));
|
|
||||||
assertEquals("Recovery attempt log", reader.readLine());
|
|
||||||
assertTrue(Pattern.matches("Source path : /user/dhruba/raidtest/.*",
|
|
||||||
reader.readLine()));
|
|
||||||
assertTrue(Pattern.matches("Alternate path : .*/destraid",
|
|
||||||
reader.readLine()));
|
|
||||||
assertEquals("Stripe lentgh : 3", reader.readLine());
|
|
||||||
assertTrue(Pattern.matches("Corrupt offset : \\d*", reader.readLine()));
|
|
||||||
assertTrue(Pattern.matches("Output from unRaid : " +
|
|
||||||
"hdfs://.*/tmp/raid/user/dhruba/raidtest/.*recovered",
|
|
||||||
reader.readLine()));
|
|
||||||
} finally {
|
|
||||||
stm.close();
|
|
||||||
}
|
|
||||||
LOG.info("Raid HDFS Recovery log verified");
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// Delete/Corrupt specified block of file
|
|
||||||
//
|
|
||||||
public static void corruptBlock(MiniDFSCluster dfs, Path file, ExtendedBlock blockNum,
|
|
||||||
int numDataNodes, boolean delete) throws IOException {
|
|
||||||
// Now deliberately remove/truncate replicas of blocks
|
|
||||||
int numDeleted = 0;
|
|
||||||
int numCorrupted = 0;
|
|
||||||
for (int i = 0; i < numDataNodes; i++) {
|
|
||||||
File block = MiniDFSCluster.getBlockFile(i, blockNum);
|
|
||||||
if (block == null || !block.exists()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (delete) {
|
|
||||||
block.delete();
|
|
||||||
LOG.info("Deleted block " + block);
|
|
||||||
numDeleted++;
|
|
||||||
} else {
|
|
||||||
// Corrupt
|
|
||||||
long seekPos = block.length()/2;
|
|
||||||
RandomAccessFile raf = new RandomAccessFile(block, "rw");
|
|
||||||
raf.seek(seekPos);
|
|
||||||
int data = raf.readInt();
|
|
||||||
raf.seek(seekPos);
|
|
||||||
raf.writeInt(data+1);
|
|
||||||
LOG.info("Corrupted block " + block);
|
|
||||||
numCorrupted++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assertTrue("Nothing corrupted or deleted",
|
|
||||||
(numCorrupted + numDeleted) > 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void corruptBlock(Path file, ExtendedBlock blockNum,
|
|
||||||
int numDataNodes, long offset) throws IOException {
|
|
||||||
// Now deliberately corrupt replicas of the the block.
|
|
||||||
for (int i = 0; i < numDataNodes; i++) {
|
|
||||||
File block = MiniDFSCluster.getBlockFile(i, blockNum);
|
|
||||||
if (block == null || !block.exists()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
RandomAccessFile raf = new RandomAccessFile(block, "rw");
|
|
||||||
raf.seek(offset);
|
|
||||||
int data = raf.readInt();
|
|
||||||
raf.seek(offset);
|
|
||||||
raf.writeInt(data+1);
|
|
||||||
LOG.info("Corrupted block " + block);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,518 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.hdfs.server.blockmanagement;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.BlockLocation;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
|
||||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRaid.CachedFullPathNames;
|
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRaid.CachedLocatedBlocks;
|
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRaid.FileType;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeRaidTestUtil;
|
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeRaidUtil;
|
|
||||||
import org.apache.hadoop.net.NetworkTopology;
|
|
||||||
import org.apache.hadoop.raid.RaidNode;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestBlockPlacementPolicyRaid {
|
|
||||||
private Configuration conf = null;
|
|
||||||
private MiniDFSCluster cluster = null;
|
|
||||||
private FSNamesystem namesystem = null;
|
|
||||||
private BlockManager blockManager;
|
|
||||||
private NetworkTopology networktopology;
|
|
||||||
private BlockPlacementPolicyRaid policy = null;
|
|
||||||
private FileSystem fs = null;
|
|
||||||
String[] rack1 = {"/rack1"};
|
|
||||||
String[] rack2 = {"/rack2"};
|
|
||||||
String[] host1 = {"host1.rack1.com"};
|
|
||||||
String[] host2 = {"host2.rack2.com"};
|
|
||||||
String xorPrefix = null;
|
|
||||||
String raidTempPrefix = null;
|
|
||||||
String raidrsTempPrefix = null;
|
|
||||||
String raidrsHarTempPrefix = null;
|
|
||||||
|
|
||||||
final static Log LOG =
|
|
||||||
LogFactory.getLog(TestBlockPlacementPolicyRaid.class);
|
|
||||||
|
|
||||||
protected void setupCluster() throws IOException {
|
|
||||||
conf = new Configuration();
|
|
||||||
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
|
|
||||||
conf.set("dfs.replication.pending.timeout.sec", "2");
|
|
||||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1L);
|
|
||||||
conf.set("dfs.block.replicator.classname",
|
|
||||||
BlockPlacementPolicyRaid.class.getName());
|
|
||||||
conf.set(RaidNode.STRIPE_LENGTH_KEY, "2");
|
|
||||||
conf.set(RaidNode.RS_PARITY_LENGTH_KEY, "3");
|
|
||||||
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 1);
|
|
||||||
// start the cluster with one datanode first
|
|
||||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).
|
|
||||||
format(true).racks(rack1).hosts(host1).build();
|
|
||||||
cluster.waitActive();
|
|
||||||
namesystem = cluster.getNameNode().getNamesystem();
|
|
||||||
blockManager = namesystem.getBlockManager();
|
|
||||||
networktopology = blockManager.getDatanodeManager().getNetworkTopology();
|
|
||||||
|
|
||||||
Assert.assertTrue("BlockPlacementPolicy type is not correct.",
|
|
||||||
blockManager.getBlockPlacementPolicy() instanceof BlockPlacementPolicyRaid);
|
|
||||||
policy = (BlockPlacementPolicyRaid)blockManager.getBlockPlacementPolicy();
|
|
||||||
fs = cluster.getFileSystem();
|
|
||||||
xorPrefix = RaidNode.xorDestinationPath(conf).toUri().getPath();
|
|
||||||
raidTempPrefix = RaidNode.xorTempPrefix(conf);
|
|
||||||
raidrsTempPrefix = RaidNode.rsTempPrefix(conf);
|
|
||||||
raidrsHarTempPrefix = RaidNode.rsHarTempPrefix(conf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test that the parity files will be placed at the good locations when we
|
|
||||||
* create them.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testChooseTargetForRaidFile() throws IOException {
|
|
||||||
setupCluster();
|
|
||||||
try {
|
|
||||||
String src = "/dir/file";
|
|
||||||
String parity = raidrsTempPrefix + src;
|
|
||||||
DFSTestUtil.createFile(fs, new Path(src), 4, (short)1, 0L);
|
|
||||||
DFSTestUtil.waitReplication(fs, new Path(src), (short)1);
|
|
||||||
refreshPolicy();
|
|
||||||
setBlockPlacementPolicy(namesystem, policy);
|
|
||||||
// start 3 more datanodes
|
|
||||||
String[] racks = {"/rack2", "/rack2", "/rack2",
|
|
||||||
"/rack2", "/rack2", "/rack2"};
|
|
||||||
String[] hosts =
|
|
||||||
{"host2.rack2.com", "host3.rack2.com", "host4.rack2.com",
|
|
||||||
"host5.rack2.com", "host6.rack2.com", "host7.rack2.com"};
|
|
||||||
cluster.startDataNodes(conf, 6, true, null, racks, hosts, null);
|
|
||||||
int numBlocks = 6;
|
|
||||||
DFSTestUtil.createFile(fs, new Path(parity), numBlocks, (short)2, 0L);
|
|
||||||
DFSTestUtil.waitReplication(fs, new Path(parity), (short)2);
|
|
||||||
FileStatus srcStat = fs.getFileStatus(new Path(src));
|
|
||||||
BlockLocation[] srcLoc =
|
|
||||||
fs.getFileBlockLocations(srcStat, 0, srcStat.getLen());
|
|
||||||
FileStatus parityStat = fs.getFileStatus(new Path(parity));
|
|
||||||
BlockLocation[] parityLoc =
|
|
||||||
fs.getFileBlockLocations(parityStat, 0, parityStat.getLen());
|
|
||||||
int parityLen = RaidNode.rsParityLength(conf);
|
|
||||||
for (int i = 0; i < numBlocks / parityLen; i++) {
|
|
||||||
Set<String> locations = new HashSet<String>();
|
|
||||||
for (int j = 0; j < srcLoc.length; j++) {
|
|
||||||
String [] names = srcLoc[j].getNames();
|
|
||||||
for (int k = 0; k < names.length; k++) {
|
|
||||||
LOG.info("Source block location: " + names[k]);
|
|
||||||
locations.add(names[k]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int j = 0 ; j < parityLen; j++) {
|
|
||||||
String[] names = parityLoc[j + i * parityLen].getNames();
|
|
||||||
for (int k = 0; k < names.length; k++) {
|
|
||||||
LOG.info("Parity block location: " + names[k]);
|
|
||||||
Assert.assertTrue(locations.add(names[k]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
if (cluster != null) {
|
|
||||||
cluster.shutdown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test that the har parity files will be placed at the good locations when we
|
|
||||||
* create them.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testChooseTargetForHarRaidFile() throws IOException {
|
|
||||||
setupCluster();
|
|
||||||
try {
|
|
||||||
String[] racks = {"/rack2", "/rack2", "/rack2",
|
|
||||||
"/rack2", "/rack2", "/rack2"};
|
|
||||||
String[] hosts =
|
|
||||||
{"host2.rack2.com", "host3.rack2.com", "host4.rack2.com",
|
|
||||||
"host5.rack2.com", "host6.rack2.com", "host7.rack2.com"};
|
|
||||||
cluster.startDataNodes(conf, 6, true, null, racks, hosts, null);
|
|
||||||
String harParity = raidrsHarTempPrefix + "/dir/file";
|
|
||||||
int numBlocks = 11;
|
|
||||||
DFSTestUtil.createFile(fs, new Path(harParity), numBlocks, (short)1, 0L);
|
|
||||||
DFSTestUtil.waitReplication(fs, new Path(harParity), (short)1);
|
|
||||||
FileStatus stat = fs.getFileStatus(new Path(harParity));
|
|
||||||
BlockLocation[] loc = fs.getFileBlockLocations(stat, 0, stat.getLen());
|
|
||||||
int rsParityLength = RaidNode.rsParityLength(conf);
|
|
||||||
for (int i = 0; i < numBlocks - rsParityLength; i++) {
|
|
||||||
Set<String> locations = new HashSet<String>();
|
|
||||||
for (int j = 0; j < rsParityLength; j++) {
|
|
||||||
for (int k = 0; k < loc[i + j].getNames().length; k++) {
|
|
||||||
// verify that every adjacent 4 blocks are on differnt nodes
|
|
||||||
String name = loc[i + j].getNames()[k];
|
|
||||||
LOG.info("Har Raid block location: " + name);
|
|
||||||
Assert.assertTrue(locations.add(name));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
if (cluster != null) {
|
|
||||||
cluster.shutdown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test BlockPlacementPolicyRaid.CachedLocatedBlocks
|
|
||||||
* Verify that the results obtained from cache is the same as
|
|
||||||
* the results obtained directly
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testCachedBlocks() throws IOException {
|
|
||||||
setupCluster();
|
|
||||||
try {
|
|
||||||
String file1 = "/dir/file1";
|
|
||||||
String file2 = "/dir/file2";
|
|
||||||
DFSTestUtil.createFile(fs, new Path(file1), 3, (short)1, 0L);
|
|
||||||
DFSTestUtil.createFile(fs, new Path(file2), 4, (short)1, 0L);
|
|
||||||
// test blocks cache
|
|
||||||
CachedLocatedBlocks cachedBlocks = new CachedLocatedBlocks(namesystem);
|
|
||||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file1);
|
|
||||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file1);
|
|
||||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file2);
|
|
||||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file2);
|
|
||||||
try {
|
|
||||||
Thread.sleep(1200L);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
}
|
|
||||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file2);
|
|
||||||
verifyCachedBlocksResult(cachedBlocks, namesystem, file1);
|
|
||||||
} finally {
|
|
||||||
if (cluster != null) {
|
|
||||||
cluster.shutdown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test BlockPlacementPolicyRaid.CachedFullPathNames
|
|
||||||
* Verify that the results obtained from cache is the same as
|
|
||||||
* the results obtained directly
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testCachedPathNames() throws IOException {
|
|
||||||
setupCluster();
|
|
||||||
try {
|
|
||||||
String file1 = "/dir/file1";
|
|
||||||
String file2 = "/dir/file2";
|
|
||||||
DFSTestUtil.createFile(fs, new Path(file1), 3, (short)1, 0L);
|
|
||||||
DFSTestUtil.createFile(fs, new Path(file2), 4, (short)1, 0L);
|
|
||||||
// test full path cache
|
|
||||||
CachedFullPathNames cachedFullPathNames =
|
|
||||||
new CachedFullPathNames(namesystem);
|
|
||||||
final BlockCollection[] bcs = NameNodeRaidTestUtil.getBlockCollections(
|
|
||||||
namesystem, file1, file2);
|
|
||||||
|
|
||||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[0]);
|
|
||||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[0]);
|
|
||||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[1]);
|
|
||||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[1]);
|
|
||||||
try {
|
|
||||||
Thread.sleep(1200L);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
}
|
|
||||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[1]);
|
|
||||||
verifyCachedFullPathNameResult(cachedFullPathNames, bcs[0]);
|
|
||||||
} finally {
|
|
||||||
if (cluster != null) {
|
|
||||||
cluster.shutdown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Test the result of getCompanionBlocks() on the unraided files
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testGetCompanionBLocks() throws IOException {
|
|
||||||
setupCluster();
|
|
||||||
try {
|
|
||||||
String file1 = "/dir/file1";
|
|
||||||
String file2 = "/raid/dir/file2";
|
|
||||||
String file3 = "/raidrs/dir/file3";
|
|
||||||
// Set the policy to default policy to place the block in the default way
|
|
||||||
setBlockPlacementPolicy(namesystem, new BlockPlacementPolicyDefault(
|
|
||||||
conf, namesystem, networktopology));
|
|
||||||
DFSTestUtil.createFile(fs, new Path(file1), 3, (short)1, 0L);
|
|
||||||
DFSTestUtil.createFile(fs, new Path(file2), 4, (short)1, 0L);
|
|
||||||
DFSTestUtil.createFile(fs, new Path(file3), 8, (short)1, 0L);
|
|
||||||
Collection<LocatedBlock> companionBlocks;
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, getBlocks(namesystem, file1).get(0).getBlock());
|
|
||||||
Assert.assertTrue(companionBlocks == null || companionBlocks.size() == 0);
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, getBlocks(namesystem, file1).get(2).getBlock());
|
|
||||||
Assert.assertTrue(companionBlocks == null || companionBlocks.size() == 0);
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, getBlocks(namesystem, file2).get(0).getBlock());
|
|
||||||
Assert.assertEquals(1, companionBlocks.size());
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, getBlocks(namesystem, file2).get(3).getBlock());
|
|
||||||
Assert.assertEquals(1, companionBlocks.size());
|
|
||||||
|
|
||||||
int rsParityLength = RaidNode.rsParityLength(conf);
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, getBlocks(namesystem, file3).get(0).getBlock());
|
|
||||||
Assert.assertEquals(rsParityLength, companionBlocks.size());
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, getBlocks(namesystem, file3).get(4).getBlock());
|
|
||||||
Assert.assertEquals(rsParityLength, companionBlocks.size());
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, getBlocks(namesystem, file3).get(6).getBlock());
|
|
||||||
Assert.assertEquals(2, companionBlocks.size());
|
|
||||||
} finally {
|
|
||||||
if (cluster != null) {
|
|
||||||
cluster.shutdown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void setBlockPlacementPolicy(
|
|
||||||
FSNamesystem namesystem, BlockPlacementPolicy policy) {
|
|
||||||
namesystem.writeLock();
|
|
||||||
try {
|
|
||||||
namesystem.getBlockManager().setBlockPlacementPolicy(policy);
|
|
||||||
} finally {
|
|
||||||
namesystem.writeUnlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test BlockPlacementPolicyRaid actually deletes the correct replica.
|
|
||||||
* Start 2 datanodes and create 1 source file and its parity file.
|
|
||||||
* 1) Start host1, create the parity file with replication 1
|
|
||||||
* 2) Start host2, create the source file with replication 2
|
|
||||||
* 3) Set repliation of source file to 1
|
|
||||||
* Verify that the policy should delete the block with more companion blocks.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testDeleteReplica() throws IOException {
|
|
||||||
setupCluster();
|
|
||||||
try {
|
|
||||||
// Set the policy to default policy to place the block in the default way
|
|
||||||
setBlockPlacementPolicy(namesystem, new BlockPlacementPolicyDefault(
|
|
||||||
conf, namesystem, networktopology));
|
|
||||||
DatanodeDescriptor datanode1 = blockManager.getDatanodeManager(
|
|
||||||
).getDatanodeCyclicIteration("").iterator().next().getValue();
|
|
||||||
String source = "/dir/file";
|
|
||||||
String parity = xorPrefix + source;
|
|
||||||
|
|
||||||
final Path parityPath = new Path(parity);
|
|
||||||
DFSTestUtil.createFile(fs, parityPath, 3, (short)1, 0L);
|
|
||||||
DFSTestUtil.waitReplication(fs, parityPath, (short)1);
|
|
||||||
|
|
||||||
// start one more datanode
|
|
||||||
cluster.startDataNodes(conf, 1, true, null, rack2, host2, null);
|
|
||||||
DatanodeDescriptor datanode2 = null;
|
|
||||||
for(Map.Entry<String, DatanodeDescriptor> e : blockManager.getDatanodeManager(
|
|
||||||
).getDatanodeCyclicIteration("")) {
|
|
||||||
final DatanodeDescriptor d = e.getValue();
|
|
||||||
if (!d.getName().equals(datanode1.getName())) {
|
|
||||||
datanode2 = d;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Assert.assertTrue(datanode2 != null);
|
|
||||||
cluster.waitActive();
|
|
||||||
final Path sourcePath = new Path(source);
|
|
||||||
DFSTestUtil.createFile(fs, sourcePath, 5, (short)2, 0L);
|
|
||||||
DFSTestUtil.waitReplication(fs, sourcePath, (short)2);
|
|
||||||
|
|
||||||
refreshPolicy();
|
|
||||||
Assert.assertEquals(parity,
|
|
||||||
policy.getParityFile(source));
|
|
||||||
Assert.assertEquals(source,
|
|
||||||
policy.getSourceFile(parity, xorPrefix));
|
|
||||||
|
|
||||||
List<LocatedBlock> sourceBlocks = getBlocks(namesystem, source);
|
|
||||||
List<LocatedBlock> parityBlocks = getBlocks(namesystem, parity);
|
|
||||||
Assert.assertEquals(5, sourceBlocks.size());
|
|
||||||
Assert.assertEquals(3, parityBlocks.size());
|
|
||||||
|
|
||||||
// verify the result of getCompanionBlocks()
|
|
||||||
Collection<LocatedBlock> companionBlocks;
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, sourceBlocks.get(0).getBlock());
|
|
||||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
|
||||||
new int[]{0, 1}, new int[]{0});
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, sourceBlocks.get(1).getBlock());
|
|
||||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
|
||||||
new int[]{0, 1}, new int[]{0});
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, sourceBlocks.get(2).getBlock());
|
|
||||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
|
||||||
new int[]{2, 3}, new int[]{1});
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, sourceBlocks.get(3).getBlock());
|
|
||||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
|
||||||
new int[]{2, 3}, new int[]{1});
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, sourceBlocks.get(4).getBlock());
|
|
||||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
|
||||||
new int[]{4}, new int[]{2});
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, parityBlocks.get(0).getBlock());
|
|
||||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
|
||||||
new int[]{0, 1}, new int[]{0});
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, parityBlocks.get(1).getBlock());
|
|
||||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
|
||||||
new int[]{2, 3}, new int[]{1});
|
|
||||||
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, parityBlocks.get(2).getBlock());
|
|
||||||
verifyCompanionBlocks(companionBlocks, sourceBlocks, parityBlocks,
|
|
||||||
new int[]{4}, new int[]{2});
|
|
||||||
|
|
||||||
// Set the policy back to raid policy. We have to create a new object
|
|
||||||
// here to clear the block location cache
|
|
||||||
refreshPolicy();
|
|
||||||
setBlockPlacementPolicy(namesystem, policy);
|
|
||||||
// verify policy deletes the correct blocks. companion blocks should be
|
|
||||||
// evenly distributed.
|
|
||||||
fs.setReplication(sourcePath, (short)1);
|
|
||||||
DFSTestUtil.waitReplication(fs, sourcePath, (short)1);
|
|
||||||
Map<String, Integer> counters = new HashMap<String, Integer>();
|
|
||||||
refreshPolicy();
|
|
||||||
for (int i = 0; i < parityBlocks.size(); i++) {
|
|
||||||
companionBlocks = getCompanionBlocks(
|
|
||||||
namesystem, policy, parityBlocks.get(i).getBlock());
|
|
||||||
|
|
||||||
counters = BlockPlacementPolicyRaid.countCompanionBlocks(
|
|
||||||
companionBlocks, false);
|
|
||||||
Assert.assertTrue(counters.get(datanode1.getName()) >= 1 &&
|
|
||||||
counters.get(datanode1.getName()) <= 2);
|
|
||||||
Assert.assertTrue(counters.get(datanode1.getName()) +
|
|
||||||
counters.get(datanode2.getName()) ==
|
|
||||||
companionBlocks.size());
|
|
||||||
|
|
||||||
counters = BlockPlacementPolicyRaid.countCompanionBlocks(
|
|
||||||
companionBlocks, true);
|
|
||||||
Assert.assertTrue(counters.get(datanode1.getParent().getName()) >= 1 &&
|
|
||||||
counters.get(datanode1.getParent().getName()) <= 2);
|
|
||||||
Assert.assertTrue(counters.get(datanode1.getParent().getName()) +
|
|
||||||
counters.get(datanode2.getParent().getName()) ==
|
|
||||||
companionBlocks.size());
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
if (cluster != null) {
|
|
||||||
cluster.shutdown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// create a new BlockPlacementPolicyRaid to clear the cache
|
|
||||||
private void refreshPolicy() {
|
|
||||||
policy = new BlockPlacementPolicyRaid();
|
|
||||||
policy.initialize(conf, namesystem, networktopology);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void verifyCompanionBlocks(Collection<LocatedBlock> companionBlocks,
|
|
||||||
List<LocatedBlock> sourceBlocks, List<LocatedBlock> parityBlocks,
|
|
||||||
int[] sourceBlockIndexes, int[] parityBlockIndexes) {
|
|
||||||
Set<ExtendedBlock> blockSet = new HashSet<ExtendedBlock>();
|
|
||||||
for (LocatedBlock b : companionBlocks) {
|
|
||||||
blockSet.add(b.getBlock());
|
|
||||||
}
|
|
||||||
Assert.assertEquals(sourceBlockIndexes.length + parityBlockIndexes.length,
|
|
||||||
blockSet.size());
|
|
||||||
for (int index : sourceBlockIndexes) {
|
|
||||||
Assert.assertTrue(blockSet.contains(sourceBlocks.get(index).getBlock()));
|
|
||||||
}
|
|
||||||
for (int index : parityBlockIndexes) {
|
|
||||||
Assert.assertTrue(blockSet.contains(parityBlocks.get(index).getBlock()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void verifyCachedFullPathNameResult(
|
|
||||||
CachedFullPathNames cachedFullPathNames, BlockCollection bc)
|
|
||||||
throws IOException {
|
|
||||||
String res1 = bc.getName();
|
|
||||||
String res2 = cachedFullPathNames.get(bc);
|
|
||||||
LOG.info("Actual path name: " + res1);
|
|
||||||
LOG.info("Cached path name: " + res2);
|
|
||||||
Assert.assertEquals(cachedFullPathNames.get(bc),
|
|
||||||
bc.getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void verifyCachedBlocksResult(CachedLocatedBlocks cachedBlocks,
|
|
||||||
FSNamesystem namesystem, String file) throws IOException{
|
|
||||||
long len = NameNodeRaidUtil.getFileInfo(namesystem, file, true).getLen();
|
|
||||||
List<LocatedBlock> res1 = NameNodeRaidUtil.getBlockLocations(namesystem,
|
|
||||||
file, 0L, len, false, false).getLocatedBlocks();
|
|
||||||
List<LocatedBlock> res2 = cachedBlocks.get(file);
|
|
||||||
for (int i = 0; i < res1.size(); i++) {
|
|
||||||
LOG.info("Actual block: " + res1.get(i).getBlock());
|
|
||||||
LOG.info("Cached block: " + res2.get(i).getBlock());
|
|
||||||
Assert.assertEquals(res1.get(i).getBlock(), res2.get(i).getBlock());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Collection<LocatedBlock> getCompanionBlocks(
|
|
||||||
FSNamesystem namesystem, BlockPlacementPolicyRaid policy,
|
|
||||||
ExtendedBlock block) throws IOException {
|
|
||||||
INodeFile inode = (INodeFile)blockManager.blocksMap.getBlockCollection(block
|
|
||||||
.getLocalBlock());
|
|
||||||
FileType type = policy.getFileType(inode.getFullPathName());
|
|
||||||
return policy.getCompanionBlocks(inode.getFullPathName(), type,
|
|
||||||
block.getLocalBlock());
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<LocatedBlock> getBlocks(FSNamesystem namesystem, String file)
|
|
||||||
throws IOException {
|
|
||||||
long len = NameNodeRaidUtil.getFileInfo(namesystem, file, true).getLen();
|
|
||||||
return NameNodeRaidUtil.getBlockLocations(namesystem,
|
|
||||||
file, 0, len, false, false).getLocatedBlocks();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,38 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
|
||||||
|
|
||||||
import org.apache.hadoop.fs.UnresolvedLinkException;
|
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
|
||||||
|
|
||||||
public class NameNodeRaidTestUtil {
|
|
||||||
public static BlockCollection[] getBlockCollections(final FSNamesystem namesystem,
|
|
||||||
final String... files) throws UnresolvedLinkException {
|
|
||||||
final BlockCollection[] inodes = new BlockCollection[files.length];
|
|
||||||
final FSDirectory dir = namesystem.dir;
|
|
||||||
dir.readLock();
|
|
||||||
try {
|
|
||||||
for(int i = 0; i < files.length; i++) {
|
|
||||||
inodes[i] = (BlockCollection)dir.rootDir.getNode(files[i], true);
|
|
||||||
}
|
|
||||||
return inodes;
|
|
||||||
} finally {
|
|
||||||
dir.readUnlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,671 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertFalse;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
import static org.junit.Assert.fail;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.util.zip.CRC32;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
|
||||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
|
||||||
import org.apache.hadoop.mapred.JobConf;
|
|
||||||
import org.apache.hadoop.mapred.JobContext;
|
|
||||||
import org.apache.hadoop.mapred.MiniMRCluster;
|
|
||||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
|
||||||
import org.apache.hadoop.util.JarFinder;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
|
|
||||||
public class TestBlockFixer {
|
|
||||||
final static Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.TestBlockFixer");
|
|
||||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
|
||||||
"target/test-data")).getAbsolutePath();
|
|
||||||
final static String CONFIG_FILE = new File(TEST_DIR,
|
|
||||||
"test-raid.xml").getAbsolutePath();
|
|
||||||
public static final String DistBlockFixer_JAR =
|
|
||||||
JarFinder.getJar(DistBlockFixer.class);
|
|
||||||
final static long RELOAD_INTERVAL = 1000;
|
|
||||||
final static int NUM_DATANODES = 3;
|
|
||||||
Configuration conf;
|
|
||||||
String namenode = null;
|
|
||||||
MiniDFSCluster dfs = null;
|
|
||||||
String hftp = null;
|
|
||||||
MiniMRCluster mr = null;
|
|
||||||
FileSystem fileSys = null;
|
|
||||||
RaidNode cnode = null;
|
|
||||||
String jobTrackerName = null;
|
|
||||||
Random rand = new Random();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests isXorParityFile and isRsParityFile
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testIsParityFile() throws IOException {
|
|
||||||
Configuration testConf = new Configuration();
|
|
||||||
testConf.set("hdfs.raid.locations", "/raid");
|
|
||||||
testConf.set("hdfs.raidrs.locations", "/raidrs");
|
|
||||||
|
|
||||||
BlockFixer.BlockFixerHelper helper =
|
|
||||||
new BlockFixer.BlockFixerHelper(testConf);
|
|
||||||
|
|
||||||
assertFalse("incorrectly identified rs parity file as xor parity file",
|
|
||||||
helper.isXorParityFile(new Path("/raidrs/test/test")));
|
|
||||||
assertTrue("could not identify rs parity file",
|
|
||||||
helper.isRsParityFile(new Path("/raidrs/test/test")));
|
|
||||||
assertTrue("could not identify xor parity file",
|
|
||||||
helper.isXorParityFile(new Path("/raid/test/test")));
|
|
||||||
assertFalse("incorrectly identified xor parity file as rs parity file",
|
|
||||||
helper.isRsParityFile(new Path("/raid/test/test")));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test the filtering of trash files from the list of corrupt files.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testTrashFilter() {
|
|
||||||
List<Path> files = new LinkedList<Path>();
|
|
||||||
// Paths that do not match the trash pattern.
|
|
||||||
Path p1 = new Path("/user/raid/raidtest/f1");
|
|
||||||
Path p2 = new Path("/user/.Trash/");
|
|
||||||
// Paths that match the trash pattern.
|
|
||||||
Path p3 = new Path("/user/raid/.Trash/raidtest/f1");
|
|
||||||
Path p4 = new Path("/user/raid/.Trash/");
|
|
||||||
files.add(p1);
|
|
||||||
files.add(p3);
|
|
||||||
files.add(p4);
|
|
||||||
files.add(p2);
|
|
||||||
|
|
||||||
Configuration conf = new Configuration();
|
|
||||||
RaidUtils.filterTrash(conf, files);
|
|
||||||
|
|
||||||
assertEquals("expected 2 non-trash files but got " + files.size(),
|
|
||||||
2, files.size());
|
|
||||||
for (Path p: files) {
|
|
||||||
assertTrue("wrong file returned by filterTrash",
|
|
||||||
p == p1 || p == p2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testBlockFixLocal() throws Exception {
|
|
||||||
implBlockFix(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a file with three stripes, corrupt a block each in two stripes,
|
|
||||||
* and wait for the the file to be fixed.
|
|
||||||
*/
|
|
||||||
protected void implBlockFix(boolean local) throws Exception {
|
|
||||||
LOG.info("Test testBlockFix started.");
|
|
||||||
long blockSize = 8192L;
|
|
||||||
int stripeLength = 3;
|
|
||||||
mySetup(stripeLength, -1); // never har
|
|
||||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
|
||||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
|
||||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
|
||||||
1, 7, blockSize);
|
|
||||||
long file1Len = fileSys.getFileStatus(file1).getLen();
|
|
||||||
LOG.info("Test testBlockFix created test files");
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
localConf.setInt("raid.blockfix.interval", 1000);
|
|
||||||
if (local) {
|
|
||||||
localConf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
|
||||||
} else {
|
|
||||||
localConf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.DistBlockFixer");
|
|
||||||
}
|
|
||||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
|
||||||
|
|
||||||
try {
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
|
||||||
cnode.stop(); cnode.join();
|
|
||||||
|
|
||||||
FileStatus srcStat = fileSys.getFileStatus(file1);
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
|
||||||
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
|
|
||||||
dfs, file1.toUri().getPath(), 0, srcStat.getLen());
|
|
||||||
|
|
||||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
|
||||||
assertEquals("filesFixed() should return 0 before fixing files",
|
|
||||||
0, cnode.blockFixer.filesFixed());
|
|
||||||
|
|
||||||
// Corrupt blocks in two different stripes. We can fix them.
|
|
||||||
int[] corruptBlockIdxs = new int[]{0, 4, 6};
|
|
||||||
for (int idx: corruptBlockIdxs)
|
|
||||||
corruptBlock(locs.get(idx).getBlock());
|
|
||||||
reportCorruptBlocks(dfs, file1, corruptBlockIdxs, blockSize);
|
|
||||||
|
|
||||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals("file not corrupted", 1, corruptFiles.length);
|
|
||||||
assertEquals("wrong file corrupted",
|
|
||||||
corruptFiles[0], file1.toUri().getPath());
|
|
||||||
assertEquals("wrong number of corrupt blocks", 3,
|
|
||||||
RaidDFSUtil.corruptBlocksInFile(dfs, file1.toUri().getPath(), 0,
|
|
||||||
srcStat.getLen()).size());
|
|
||||||
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
long start = Time.now();
|
|
||||||
while (cnode.blockFixer.filesFixed() < 1 &&
|
|
||||||
Time.now() - start < 120000) {
|
|
||||||
LOG.info("Test testBlockFix waiting for files to be fixed.");
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
assertEquals("file not fixed", 1, cnode.blockFixer.filesFixed());
|
|
||||||
|
|
||||||
dfs = getDFS(conf, dfs);
|
|
||||||
assertTrue("file not fixed",
|
|
||||||
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("Test testBlockFix Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
LOG.info("Test testBlockFix completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests integrity of generated block.
|
|
||||||
* Create a file and delete a block entirely. Wait for the block to be
|
|
||||||
* regenerated. Now stop RaidNode and corrupt the generated block.
|
|
||||||
* Test that corruption in the generated block can be detected by clients.
|
|
||||||
*/
|
|
||||||
protected void generatedBlockTestCommon(String testName, int blockToCorrupt,
|
|
||||||
boolean local) throws Exception {
|
|
||||||
LOG.info("Test " + testName + " started.");
|
|
||||||
long blockSize = 8192L;
|
|
||||||
int stripeLength = 3;
|
|
||||||
mySetup(stripeLength, -1); // never har
|
|
||||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
|
||||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
|
||||||
long crc1 = TestRaidDfs.createTestFile(fileSys, file1, 1, 7, blockSize);
|
|
||||||
long file1Len = fileSys.getFileStatus(file1).getLen();
|
|
||||||
LOG.info("Test " + testName + " created test files");
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
localConf.setInt("raid.blockfix.interval", 1000);
|
|
||||||
if (local) {
|
|
||||||
localConf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
|
||||||
} else {
|
|
||||||
localConf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.DistBlockFixer");
|
|
||||||
}
|
|
||||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
|
||||||
try {
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
|
||||||
cnode.stop(); cnode.join();
|
|
||||||
|
|
||||||
FileStatus srcStat = fileSys.getFileStatus(file1);
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
|
||||||
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
|
|
||||||
dfs, file1.toUri().getPath(), 0, srcStat.getLen());
|
|
||||||
|
|
||||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
|
||||||
assertEquals("filesFixed() should return 0 before fixing files",
|
|
||||||
0, cnode.blockFixer.filesFixed());
|
|
||||||
|
|
||||||
corruptBlock(locs.get(0).getBlock());
|
|
||||||
reportCorruptBlocks(dfs, file1, new int[]{0}, blockSize);
|
|
||||||
|
|
||||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals("file not corrupted",
|
|
||||||
1, corruptFiles.length);
|
|
||||||
assertEquals("wrong file corrupted",
|
|
||||||
corruptFiles[0], file1.toUri().getPath());
|
|
||||||
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
long start = Time.now();
|
|
||||||
while (cnode.blockFixer.filesFixed() < 1 &&
|
|
||||||
Time.now() - start < 120000) {
|
|
||||||
LOG.info("Test " + testName + " waiting for files to be fixed.");
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
assertEquals("file not fixed",
|
|
||||||
1, cnode.blockFixer.filesFixed());
|
|
||||||
|
|
||||||
// Stop RaidNode
|
|
||||||
cnode.stop(); cnode.join(); cnode = null;
|
|
||||||
|
|
||||||
// The block has successfully been reconstructed.
|
|
||||||
dfs = getDFS(conf, dfs);
|
|
||||||
assertTrue("file not fixed",
|
|
||||||
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
|
|
||||||
|
|
||||||
// Now corrupt the generated block.
|
|
||||||
locs = RaidDFSUtil.getBlockLocations(
|
|
||||||
dfs, file1.toUri().getPath(), 0, srcStat.getLen());
|
|
||||||
corruptBlock(locs.get(0).getBlock());
|
|
||||||
reportCorruptBlocks(dfs, file1, new int[]{0}, blockSize);
|
|
||||||
|
|
||||||
try {
|
|
||||||
Thread.sleep(5*1000);
|
|
||||||
} catch (InterruptedException ignore) {
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1);
|
|
||||||
fail("Expected exception not thrown");
|
|
||||||
} catch (org.apache.hadoop.fs.ChecksumException ce) {
|
|
||||||
} catch (org.apache.hadoop.hdfs.BlockMissingException bme) {
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("Test " + testName + " Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
LOG.info("Test " + testName + " completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests integrity of generated block.
|
|
||||||
* Create a file and delete a block entirely. Wait for the block to be
|
|
||||||
* regenerated. Now stop RaidNode and corrupt the generated block.
|
|
||||||
* Test that corruption in the generated block can be detected by clients.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testGeneratedBlockLocal() throws Exception {
|
|
||||||
generatedBlockTestCommon("testGeneratedBlock", 3, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests integrity of generated last block.
|
|
||||||
* Create a file and delete a block entirely. Wait for the block to be
|
|
||||||
* regenerated. Now stop RaidNode and corrupt the generated block.
|
|
||||||
* Test that corruption in the generated block can be detected by clients.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testGeneratedLastBlockLocal() throws Exception {
|
|
||||||
generatedBlockTestCommon("testGeneratedLastBlock", 6, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testParityBlockFixLocal() throws Exception {
|
|
||||||
implParityBlockFix("testParityBlockFixLocal", true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Corrupt a parity file and wait for it to get fixed.
|
|
||||||
*/
|
|
||||||
protected void implParityBlockFix(String testName, boolean local)
|
|
||||||
throws Exception {
|
|
||||||
LOG.info("Test " + testName + " started.");
|
|
||||||
long blockSize = 8192L;
|
|
||||||
int stripeLength = 3;
|
|
||||||
mySetup(stripeLength, -1); // never har
|
|
||||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
|
||||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
|
||||||
Path parityFile = new Path("/destraid/user/dhruba/raidtest/file1");
|
|
||||||
TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
|
||||||
1, 7, blockSize);
|
|
||||||
LOG.info("Test " + testName + " created test files");
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
localConf.setInt("raid.blockfix.interval", 1000);
|
|
||||||
if (local) {
|
|
||||||
localConf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
|
||||||
} else {
|
|
||||||
localConf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.DistBlockFixer");
|
|
||||||
}
|
|
||||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
|
||||||
|
|
||||||
try {
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
|
||||||
cnode.stop(); cnode.join();
|
|
||||||
|
|
||||||
long parityCRC = getCRC(fileSys, parityFile);
|
|
||||||
|
|
||||||
FileStatus parityStat = fileSys.getFileStatus(parityFile);
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
|
||||||
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
|
|
||||||
dfs, parityFile.toUri().getPath(), 0, parityStat.getLen());
|
|
||||||
|
|
||||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
|
||||||
assertEquals("filesFixed() should return 0 before fixing files",
|
|
||||||
0, cnode.blockFixer.filesFixed());
|
|
||||||
|
|
||||||
// Corrupt parity blocks for different stripes.
|
|
||||||
int[] corruptBlockIdxs = new int[]{0, 1, 2};
|
|
||||||
for (int idx: corruptBlockIdxs)
|
|
||||||
corruptBlock(locs.get(idx).getBlock());
|
|
||||||
reportCorruptBlocks(dfs, parityFile, corruptBlockIdxs, blockSize);
|
|
||||||
|
|
||||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals("file not corrupted",
|
|
||||||
1, corruptFiles.length);
|
|
||||||
assertEquals("wrong file corrupted",
|
|
||||||
corruptFiles[0], parityFile.toUri().getPath());
|
|
||||||
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
long start = Time.now();
|
|
||||||
while (cnode.blockFixer.filesFixed() < 1 &&
|
|
||||||
Time.now() - start < 120000) {
|
|
||||||
LOG.info("Test " + testName + " waiting for files to be fixed.");
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
assertEquals("file not fixed",
|
|
||||||
1, cnode.blockFixer.filesFixed());
|
|
||||||
|
|
||||||
long checkCRC = getCRC(fileSys, parityFile);
|
|
||||||
|
|
||||||
assertEquals("file not fixed",
|
|
||||||
parityCRC, checkCRC);
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("Test " + testName + " Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
LOG.info("Test " + testName + " completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testParityHarBlockFixLocal() throws Exception {
|
|
||||||
implParityHarBlockFix("testParityHarBlockFixLocal", true);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void implParityHarBlockFix(String testName, boolean local)
|
|
||||||
throws Exception {
|
|
||||||
LOG.info("Test " + testName + " started.");
|
|
||||||
long blockSize = 8192L;
|
|
||||||
int stripeLength = 3;
|
|
||||||
mySetup(stripeLength, 0); // Time before har = 0 days.
|
|
||||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
|
||||||
// Parity file will have 7 blocks.
|
|
||||||
TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
|
||||||
1, 20, blockSize);
|
|
||||||
LOG.info("Test " + testName + " created test files");
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
// HAR block size = 2 * src block size = 2 * parity block size.
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.setLong("har.block.size", blockSize * 2);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
localConf.setInt("raid.blockfix.interval", 1000);
|
|
||||||
if (local) {
|
|
||||||
localConf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
|
||||||
} else {
|
|
||||||
localConf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.DistBlockFixer");
|
|
||||||
}
|
|
||||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
|
||||||
|
|
||||||
try {
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
Path harDirectory =
|
|
||||||
new Path("/destraid/user/dhruba/raidtest/raidtest" +
|
|
||||||
RaidNode.HAR_SUFFIX);
|
|
||||||
long start = Time.now();
|
|
||||||
while (Time.now() - start < 1000 * 120) {
|
|
||||||
if (fileSys.exists(harDirectory)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
LOG.info("Test " + testName + " waiting for har");
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
|
|
||||||
Path partFile = new Path(harDirectory, "part-0");
|
|
||||||
long partCRC = getCRC(fileSys, partFile);
|
|
||||||
FileStatus partStat = fileSys.getFileStatus(partFile);
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
|
||||||
LocatedBlocks locs = RaidDFSUtil.getBlockLocations(
|
|
||||||
dfs, partFile.toUri().getPath(), 0, partStat.getLen());
|
|
||||||
// 7 parity blocks => 4 har blocks.
|
|
||||||
assertEquals("wrong number of har blocks",
|
|
||||||
4, locs.getLocatedBlocks().size());
|
|
||||||
cnode.stop(); cnode.join();
|
|
||||||
|
|
||||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
|
||||||
assertEquals("filesFixed() should return 0 before fixing files",
|
|
||||||
0, cnode.blockFixer.filesFixed());
|
|
||||||
|
|
||||||
// Corrupt parity blocks for different stripes.
|
|
||||||
int[] corruptBlockIdxs = new int[]{0, 3};
|
|
||||||
for (int idx: corruptBlockIdxs)
|
|
||||||
corruptBlock(locs.get(idx).getBlock());
|
|
||||||
reportCorruptBlocks(dfs, partFile, corruptBlockIdxs,
|
|
||||||
partStat.getBlockSize());
|
|
||||||
|
|
||||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals("file not corrupted", 1, corruptFiles.length);
|
|
||||||
assertEquals("wrong file corrupted",
|
|
||||||
corruptFiles[0], partFile.toUri().getPath());
|
|
||||||
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
start = Time.now();
|
|
||||||
while (cnode.blockFixer.filesFixed() < 1 &&
|
|
||||||
Time.now() - start < 120000) {
|
|
||||||
LOG.info("Test " + testName + " waiting for files to be fixed.");
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
assertEquals("file not fixed",
|
|
||||||
1, cnode.blockFixer.filesFixed());
|
|
||||||
|
|
||||||
long checkCRC = getCRC(fileSys, partFile);
|
|
||||||
|
|
||||||
assertEquals("file not fixed",
|
|
||||||
partCRC, checkCRC);
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("Test " + testName + " Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
LOG.info("Test " + testName + " completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected static DistributedFileSystem getDFS(
|
|
||||||
Configuration conf, FileSystem dfs) throws IOException {
|
|
||||||
Configuration clientConf = new Configuration(conf);
|
|
||||||
clientConf.set("fs.hdfs.impl",
|
|
||||||
"org.apache.hadoop.hdfs.DistributedFileSystem");
|
|
||||||
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
|
|
||||||
URI dfsUri = dfs.getUri();
|
|
||||||
FileSystem.closeAll();
|
|
||||||
return (DistributedFileSystem) FileSystem.get(dfsUri, clientConf);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void mySetup(int stripeLength, int timeBeforeHar) throws Exception {
|
|
||||||
|
|
||||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
|
||||||
conf = new Configuration();
|
|
||||||
|
|
||||||
conf.set("raid.config.file", CONFIG_FILE);
|
|
||||||
conf.setBoolean("raid.config.reload", true);
|
|
||||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
|
||||||
|
|
||||||
// scan all policies once every 5 second
|
|
||||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
|
||||||
|
|
||||||
// make all deletions not go through Trash
|
|
||||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
|
||||||
|
|
||||||
// do not use map-reduce cluster for Raiding
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
|
||||||
conf.set("raid.server.address", "localhost:0");
|
|
||||||
conf.setInt("hdfs.raid.stripeLength", stripeLength);
|
|
||||||
conf.set("hdfs.raid.locations", "/destraid");
|
|
||||||
|
|
||||||
conf.setBoolean("dfs.permissions", false);
|
|
||||||
|
|
||||||
conf.set("mapreduce.framework.name", "yarn");
|
|
||||||
|
|
||||||
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
|
|
||||||
dfs.waitActive();
|
|
||||||
fileSys = dfs.getFileSystem();
|
|
||||||
namenode = fileSys.getUri().toString();
|
|
||||||
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
mr = new MiniMRCluster(4, namenode, 3);
|
|
||||||
JobConf jobConf = mr.createJobConf();
|
|
||||||
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
|
|
||||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
|
||||||
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
conf.set("mapred.job.tracker", jobTrackerName);
|
|
||||||
conf.set("mapreduce.framework.name", "yarn");
|
|
||||||
String rmAdress = jobConf.get("yarn.resourcemanager.address");
|
|
||||||
if (rmAdress != null) {
|
|
||||||
conf.set("yarn.resourcemanager.address", rmAdress);
|
|
||||||
}
|
|
||||||
String schedulerAdress =
|
|
||||||
jobConf.get("yarn.resourcemanager.scheduler.address");
|
|
||||||
if (schedulerAdress != null) {
|
|
||||||
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
|
|
||||||
}
|
|
||||||
String jobHistoryAddress =
|
|
||||||
jobConf.get("mapreduce.jobhistory.address");
|
|
||||||
if (jobHistoryAddress != null) {
|
|
||||||
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
|
|
||||||
}
|
|
||||||
conf.set(JobContext.JAR, TestBlockFixer.DistBlockFixer_JAR);
|
|
||||||
|
|
||||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
|
||||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
|
||||||
String str = "<configuration> " +
|
|
||||||
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
|
|
||||||
"<policy name = \"RaidTest1\"> " +
|
|
||||||
"<erasureCode>xor</erasureCode> " +
|
|
||||||
"<destPath> /destraid</destPath> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>targetReplication</name> " +
|
|
||||||
"<value>1</value> " +
|
|
||||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>metaReplication</name> " +
|
|
||||||
"<value>1</value> " +
|
|
||||||
"<description> replication factor of parity file" +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>modTimePeriod</name> " +
|
|
||||||
"<value>2000</value> " +
|
|
||||||
"<description> time (milliseconds) after a file is modified to make it " +
|
|
||||||
"a candidate for RAIDing " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> ";
|
|
||||||
if (timeBeforeHar >= 0) {
|
|
||||||
str +=
|
|
||||||
"<property> " +
|
|
||||||
"<name>time_before_har</name> " +
|
|
||||||
"<value>" + timeBeforeHar + "</value> " +
|
|
||||||
"<description> amount of time waited before har'ing parity files" +
|
|
||||||
"</description> " +
|
|
||||||
"</property> ";
|
|
||||||
}
|
|
||||||
|
|
||||||
str +=
|
|
||||||
"</policy>" +
|
|
||||||
"</srcPath>" +
|
|
||||||
"</configuration>";
|
|
||||||
fileWriter.write(str);
|
|
||||||
fileWriter.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void myTearDown() throws Exception {
|
|
||||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
|
||||||
if (mr != null) { mr.shutdown(); }
|
|
||||||
if (dfs != null) { dfs.shutdown(); }
|
|
||||||
}
|
|
||||||
|
|
||||||
public static long getCRC(FileSystem fs, Path p) throws IOException {
|
|
||||||
CRC32 crc = new CRC32();
|
|
||||||
FSDataInputStream stm = fs.open(p);
|
|
||||||
int b;
|
|
||||||
while ((b = stm.read())>=0) {
|
|
||||||
crc.update(b);
|
|
||||||
}
|
|
||||||
stm.close();
|
|
||||||
return crc.getValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
void corruptBlock(ExtendedBlock block) throws IOException {
|
|
||||||
assertTrue("Could not corrupt block",
|
|
||||||
dfs.corruptBlockOnDataNodes(block) > 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void reportCorruptBlocks(FileSystem fs, Path file, int[] idxs,
|
|
||||||
long blockSize) throws IOException {
|
|
||||||
|
|
||||||
FSDataInputStream in = fs.open(file);
|
|
||||||
for (int idx: idxs) {
|
|
||||||
long offset = idx * blockSize;
|
|
||||||
LOG.info("Reporting corrupt block " + file + ":" + offset);
|
|
||||||
in.seek(offset);
|
|
||||||
try {
|
|
||||||
in.readFully(new byte[(int)blockSize]);
|
|
||||||
fail("Expected exception not thrown for " + file + ":" + offset);
|
|
||||||
} catch (org.apache.hadoop.fs.ChecksumException e) {
|
|
||||||
} catch (org.apache.hadoop.hdfs.BlockMissingException bme) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,26 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestBlockFixerBlockFixDist extends TestBlockFixer {
|
|
||||||
@Test
|
|
||||||
public void testBlockFixDist() throws Exception {
|
|
||||||
implBlockFix(false);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,245 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
||||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
|
||||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestBlockFixerDistConcurrency extends TestBlockFixer {
|
|
||||||
/**
|
|
||||||
* tests that we can have 2 concurrent jobs fixing files
|
|
||||||
* (dist block fixer)
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testConcurrentJobs() throws Exception {
|
|
||||||
LOG.info("Test testConcurrentJobs started.");
|
|
||||||
long blockSize = 8192L;
|
|
||||||
int stripeLength = 3;
|
|
||||||
mySetup(stripeLength, -1); // never har
|
|
||||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
|
||||||
Path file2 = new Path("/user/dhruba/raidtest/file2");
|
|
||||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
|
||||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
|
||||||
1, 20, blockSize);
|
|
||||||
long crc2 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file2,
|
|
||||||
1, 20, blockSize);
|
|
||||||
long file1Len = fileSys.getFileStatus(file1).getLen();
|
|
||||||
long file2Len = fileSys.getFileStatus(file2).getLen();
|
|
||||||
LOG.info("Test testConcurrentJobs created test files");
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
localConf.setInt("raid.blockfix.interval", 1000);
|
|
||||||
localConf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.DistBlockFixer");
|
|
||||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
|
||||||
|
|
||||||
try {
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
|
||||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file2, destPath);
|
|
||||||
cnode.stop(); cnode.join();
|
|
||||||
|
|
||||||
FileStatus file1Stat = fileSys.getFileStatus(file1);
|
|
||||||
FileStatus file2Stat = fileSys.getFileStatus(file2);
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
|
||||||
LocatedBlocks file1Loc =
|
|
||||||
RaidDFSUtil.getBlockLocations(dfs, file1.toUri().getPath(),
|
|
||||||
0, file1Stat.getLen());
|
|
||||||
LocatedBlocks file2Loc =
|
|
||||||
RaidDFSUtil.getBlockLocations(dfs, file2.toUri().getPath(),
|
|
||||||
0, file2Stat.getLen());
|
|
||||||
|
|
||||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
|
||||||
assertEquals("filesFixed() should return 0 before fixing files",
|
|
||||||
0, cnode.blockFixer.filesFixed());
|
|
||||||
|
|
||||||
// corrupt file1
|
|
||||||
int[] corruptBlockIdxs = new int[]{0, 4, 6};
|
|
||||||
for (int idx: corruptBlockIdxs)
|
|
||||||
corruptBlock(file1Loc.get(idx).getBlock());
|
|
||||||
reportCorruptBlocks(dfs, file1, corruptBlockIdxs, blockSize);
|
|
||||||
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
DistBlockFixer blockFixer = (DistBlockFixer) cnode.blockFixer;
|
|
||||||
long start = Time.now();
|
|
||||||
|
|
||||||
while (blockFixer.jobsRunning() < 1 &&
|
|
||||||
Time.now() - start < 240000) {
|
|
||||||
LOG.info("Test testBlockFix waiting for fixing job 1 to start");
|
|
||||||
Thread.sleep(10);
|
|
||||||
}
|
|
||||||
assertEquals("job 1 not running", 1, blockFixer.jobsRunning());
|
|
||||||
|
|
||||||
// corrupt file2
|
|
||||||
for (int idx: corruptBlockIdxs)
|
|
||||||
corruptBlock(file2Loc.get(idx).getBlock());
|
|
||||||
reportCorruptBlocks(dfs, file2, corruptBlockIdxs, blockSize);
|
|
||||||
|
|
||||||
while (blockFixer.jobsRunning() < 2 &&
|
|
||||||
Time.now() - start < 240000) {
|
|
||||||
LOG.info("Test testBlockFix waiting for fixing job 2 to start");
|
|
||||||
Thread.sleep(10);
|
|
||||||
}
|
|
||||||
assertEquals("2 jobs not running", 2, blockFixer.jobsRunning());
|
|
||||||
|
|
||||||
while (blockFixer.filesFixed() < 2 &&
|
|
||||||
Time.now() - start < 240000) {
|
|
||||||
LOG.info("Test testBlockFix waiting for files to be fixed.");
|
|
||||||
Thread.sleep(10);
|
|
||||||
}
|
|
||||||
assertEquals("files not fixed", 2, blockFixer.filesFixed());
|
|
||||||
|
|
||||||
dfs = getDFS(conf, dfs);
|
|
||||||
|
|
||||||
try {
|
|
||||||
Thread.sleep(5*1000);
|
|
||||||
} catch (InterruptedException ignore) {
|
|
||||||
}
|
|
||||||
assertTrue("file not fixed",
|
|
||||||
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
|
|
||||||
assertTrue("file not fixed",
|
|
||||||
TestRaidDfs.validateFile(dfs, file2, file2Len, crc2));
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("Test testConcurrentJobs exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* tests that the distributed block fixer obeys
|
|
||||||
* the limit on how many files to fix simultaneously
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testMaxPendingFiles() throws Exception {
|
|
||||||
LOG.info("Test testMaxPendingFiles started.");
|
|
||||||
long blockSize = 8192L;
|
|
||||||
int stripeLength = 3;
|
|
||||||
mySetup(stripeLength, -1); // never har
|
|
||||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
|
||||||
Path file2 = new Path("/user/dhruba/raidtest/file2");
|
|
||||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
|
||||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
|
||||||
1, 20, blockSize);
|
|
||||||
long crc2 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file2,
|
|
||||||
1, 20, blockSize);
|
|
||||||
long file1Len = fileSys.getFileStatus(file1).getLen();
|
|
||||||
long file2Len = fileSys.getFileStatus(file2).getLen();
|
|
||||||
LOG.info("Test testMaxPendingFiles created test files");
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
localConf.setInt("raid.blockfix.interval", 1000);
|
|
||||||
localConf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.DistBlockFixer");
|
|
||||||
localConf.setLong("raid.blockfix.filespertask", 2L);
|
|
||||||
localConf.setLong("raid.blockfix.maxpendingfiles", 1L);
|
|
||||||
|
|
||||||
try {
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
|
||||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file2, destPath);
|
|
||||||
cnode.stop(); cnode.join();
|
|
||||||
|
|
||||||
FileStatus file1Stat = fileSys.getFileStatus(file1);
|
|
||||||
FileStatus file2Stat = fileSys.getFileStatus(file2);
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
|
||||||
LocatedBlocks file1Loc =
|
|
||||||
RaidDFSUtil.getBlockLocations(dfs, file1.toUri().getPath(),
|
|
||||||
0, file1Stat.getLen());
|
|
||||||
LocatedBlocks file2Loc =
|
|
||||||
RaidDFSUtil.getBlockLocations(dfs, file2.toUri().getPath(),
|
|
||||||
0, file2Stat.getLen());
|
|
||||||
|
|
||||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals("no corrupt files expected", 0, corruptFiles.length);
|
|
||||||
assertEquals("filesFixed() should return 0 before fixing files",
|
|
||||||
0, cnode.blockFixer.filesFixed());
|
|
||||||
|
|
||||||
// corrupt file1
|
|
||||||
int[] corruptBlockIdxs = new int[]{0, 4, 6};
|
|
||||||
for (int idx: corruptBlockIdxs)
|
|
||||||
corruptBlock(file1Loc.get(idx).getBlock());
|
|
||||||
reportCorruptBlocks(dfs, file1, corruptBlockIdxs, blockSize);
|
|
||||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
DistBlockFixer blockFixer = (DistBlockFixer) cnode.blockFixer;
|
|
||||||
long start = Time.now();
|
|
||||||
|
|
||||||
while (blockFixer.jobsRunning() < 1 &&
|
|
||||||
Time.now() - start < 240000) {
|
|
||||||
LOG.info("Test testBlockFix waiting for fixing job 1 to start");
|
|
||||||
Thread.sleep(10);
|
|
||||||
}
|
|
||||||
assertEquals("job not running", 1, blockFixer.jobsRunning());
|
|
||||||
|
|
||||||
// corrupt file2
|
|
||||||
for (int idx: corruptBlockIdxs)
|
|
||||||
corruptBlock(file2Loc.get(idx).getBlock());
|
|
||||||
reportCorruptBlocks(dfs, file2, corruptBlockIdxs, blockSize);
|
|
||||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
|
|
||||||
// wait until both files are fixed
|
|
||||||
while (blockFixer.filesFixed() < 2 &&
|
|
||||||
Time.now() - start < 240000) {
|
|
||||||
// make sure the block fixer does not start a second job while
|
|
||||||
// the first one is still running
|
|
||||||
assertTrue("too many jobs running", blockFixer.jobsRunning() <= 1);
|
|
||||||
Thread.sleep(10);
|
|
||||||
}
|
|
||||||
assertEquals("files not fixed", 2, blockFixer.filesFixed());
|
|
||||||
|
|
||||||
dfs = getDFS(conf, dfs);
|
|
||||||
|
|
||||||
try {
|
|
||||||
Thread.sleep(5*1000);
|
|
||||||
} catch (InterruptedException ignore) {
|
|
||||||
}
|
|
||||||
assertTrue("file not fixed",
|
|
||||||
TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
|
|
||||||
assertTrue("file not fixed",
|
|
||||||
TestRaidDfs.validateFile(dfs, file2, file2Len, crc2));
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("Test testMaxPendingFiles exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,45 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestBlockFixerGeneratedBlockDist extends TestBlockFixer {
|
|
||||||
/**
|
|
||||||
* Tests integrity of generated block.
|
|
||||||
* Create a file and delete a block entirely. Wait for the block to be
|
|
||||||
* regenerated. Now stop RaidNode and corrupt the generated block.
|
|
||||||
* Test that corruption in the generated block can be detected by clients.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testGeneratedBlockDist() throws Exception {
|
|
||||||
generatedBlockTestCommon("testGeneratedBlock", 3, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests integrity of generated last block.
|
|
||||||
* Create a file and delete a block entirely. Wait for the block to be
|
|
||||||
* regenerated. Now stop RaidNode and corrupt the generated block.
|
|
||||||
* Test that corruption in the generated block can be detected by clients.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testGeneratedLastBlockDist() throws Exception {
|
|
||||||
generatedBlockTestCommon("testGeneratedLastBlock", 6, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,32 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestBlockFixerParityBlockFixDist extends TestBlockFixer {
|
|
||||||
@Test
|
|
||||||
public void testParityBlockFixDist() throws Exception {
|
|
||||||
implParityBlockFix("testParityBlockFixDist", false);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testParityHarBlockFixDist() throws Exception {
|
|
||||||
implParityHarBlockFix("testParityHarBlockFixDist", false);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,228 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertFalse;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.mapred.Reporter;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestDirectoryTraversal {
|
|
||||||
final static Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.TestDirectoryTraversal");
|
|
||||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
|
||||||
"target/test-data")).getAbsolutePath();
|
|
||||||
|
|
||||||
MiniDFSCluster dfs = null;
|
|
||||||
FileSystem fs = null;
|
|
||||||
Configuration conf = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test basic enumeration.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testEnumeration() throws IOException {
|
|
||||||
mySetup();
|
|
||||||
|
|
||||||
try {
|
|
||||||
Path topDir = new Path(TEST_DIR + "/testenumeration");
|
|
||||||
|
|
||||||
createTestTree(topDir);
|
|
||||||
|
|
||||||
LOG.info("Enumerating files");
|
|
||||||
List<FileStatus> startPaths = new LinkedList<FileStatus>();
|
|
||||||
startPaths.add(fs.getFileStatus(topDir));
|
|
||||||
DirectoryTraversal dt = new DirectoryTraversal(fs, startPaths, 2);
|
|
||||||
|
|
||||||
List<FileStatus> selected = new LinkedList<FileStatus>();
|
|
||||||
while (true) {
|
|
||||||
FileStatus f = dt.getNextFile();
|
|
||||||
if (f == null) break;
|
|
||||||
assertEquals(false, f.isDir());
|
|
||||||
LOG.info(f.getPath());
|
|
||||||
selected.add(f);
|
|
||||||
}
|
|
||||||
assertEquals(5, selected.size());
|
|
||||||
|
|
||||||
LOG.info("Enumerating directories");
|
|
||||||
startPaths.clear();
|
|
||||||
startPaths.add(fs.getFileStatus(topDir));
|
|
||||||
dt = new DirectoryTraversal(fs, startPaths);
|
|
||||||
selected.clear();
|
|
||||||
while (true) {
|
|
||||||
FileStatus dir = dt.getNextDirectory();
|
|
||||||
if (dir == null) break;
|
|
||||||
assertEquals(true, dir.isDir());
|
|
||||||
LOG.info(dir.getPath());
|
|
||||||
selected.add(dir);
|
|
||||||
}
|
|
||||||
assertEquals(4, selected.size());
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testSuspension() throws IOException {
|
|
||||||
LOG.info("Starting testSuspension");
|
|
||||||
mySetup();
|
|
||||||
|
|
||||||
try {
|
|
||||||
Path topDir = new Path(TEST_DIR + "/testenumeration");
|
|
||||||
|
|
||||||
createTestTree(topDir);
|
|
||||||
|
|
||||||
String top = topDir.toString();
|
|
||||||
List<FileStatus> startPaths = new LinkedList<FileStatus>();
|
|
||||||
startPaths.add(fs.getFileStatus(new Path(top + "/a")));
|
|
||||||
startPaths.add(fs.getFileStatus(new Path(top + "/b")));
|
|
||||||
DirectoryTraversal dt = new DirectoryTraversal(fs, startPaths);
|
|
||||||
|
|
||||||
int limit = 2;
|
|
||||||
short targetRepl = 1;
|
|
||||||
Path raid = new Path("/raid");
|
|
||||||
DirectoryTraversal.FileFilter filter =
|
|
||||||
new RaidFilter.TimeBasedFilter(conf,
|
|
||||||
RaidNode.xorDestinationPath(conf), 1, Time.now(), 0);
|
|
||||||
List<FileStatus> selected = dt.getFilteredFiles(filter, limit);
|
|
||||||
for (FileStatus f: selected) {
|
|
||||||
LOG.info(f.getPath());
|
|
||||||
}
|
|
||||||
assertEquals(limit, selected.size());
|
|
||||||
|
|
||||||
selected = dt.getFilteredFiles(filter, limit);
|
|
||||||
for (FileStatus f: selected) {
|
|
||||||
LOG.info(f.getPath());
|
|
||||||
}
|
|
||||||
assertEquals(limit, selected.size());
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFileFilter() throws IOException {
|
|
||||||
mySetup();
|
|
||||||
|
|
||||||
try {
|
|
||||||
Path topDir = new Path(TEST_DIR + "/testFileFilter");
|
|
||||||
int targetRepl = 1;
|
|
||||||
createTestTree(topDir);
|
|
||||||
Path file = new Path(topDir.toString() + "/a/f1");
|
|
||||||
FileStatus stat = fs.getFileStatus(file);
|
|
||||||
PolicyInfo info = new PolicyInfo("testFileFilter", conf);
|
|
||||||
info.setSrcPath(topDir.toString());
|
|
||||||
info.setErasureCode("rs");
|
|
||||||
info.setDescription("test policy");
|
|
||||||
info.setProperty("targetReplication", "1");
|
|
||||||
info.setProperty("metaReplication", "1");
|
|
||||||
|
|
||||||
DirectoryTraversal.FileFilter timeBasedXORFilter =
|
|
||||||
new RaidFilter.TimeBasedFilter(conf,
|
|
||||||
RaidNode.xorDestinationPath(conf), targetRepl,
|
|
||||||
Time.now(), 0);
|
|
||||||
DirectoryTraversal.FileFilter timeBasedRSFilter =
|
|
||||||
new RaidFilter.TimeBasedFilter(conf,
|
|
||||||
RaidNode.rsDestinationPath(conf), targetRepl,
|
|
||||||
Time.now(), 0);
|
|
||||||
DirectoryTraversal.FileFilter preferenceForRSFilter =
|
|
||||||
new RaidFilter.PreferenceFilter(
|
|
||||||
conf, RaidNode.rsDestinationPath(conf),
|
|
||||||
RaidNode.xorDestinationPath(conf), 1, Time.now(), 0);
|
|
||||||
|
|
||||||
assertTrue(timeBasedXORFilter.check(stat));
|
|
||||||
assertTrue(timeBasedRSFilter.check(stat));
|
|
||||||
assertTrue(preferenceForRSFilter.check(stat));
|
|
||||||
|
|
||||||
RaidNode.doRaid(
|
|
||||||
conf, info, stat, new RaidNode.Statistics(), Reporter.NULL);
|
|
||||||
|
|
||||||
assertTrue(timeBasedXORFilter.check(stat));
|
|
||||||
assertFalse(timeBasedRSFilter.check(stat));
|
|
||||||
assertFalse(preferenceForRSFilter.check(stat));
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a test directory tree.
|
|
||||||
* top
|
|
||||||
* / | \
|
|
||||||
* / | f5
|
|
||||||
* a b___
|
|
||||||
* / \ |\ \
|
|
||||||
* f1 f2 f3f4 c
|
|
||||||
*/
|
|
||||||
private void createTestTree(Path topDir) throws IOException {
|
|
||||||
String top = topDir.toString();
|
|
||||||
fs.delete(topDir, true);
|
|
||||||
|
|
||||||
fs.mkdirs(topDir);
|
|
||||||
fs.create(new Path(top + "/f5")).close();
|
|
||||||
|
|
||||||
fs.mkdirs(new Path(top + "/a"));
|
|
||||||
createTestFile(new Path(top + "/a/f1"));
|
|
||||||
createTestFile(new Path(top + "/a/f2"));
|
|
||||||
|
|
||||||
fs.mkdirs(new Path(top + "/b"));
|
|
||||||
fs.mkdirs(new Path(top + "/b/c"));
|
|
||||||
createTestFile(new Path(top + "/b/f3"));
|
|
||||||
createTestFile(new Path(top + "/b/f4"));
|
|
||||||
}
|
|
||||||
|
|
||||||
private void createTestFile(Path file) throws IOException {
|
|
||||||
long blockSize = 8192;
|
|
||||||
byte[] bytes = new byte[(int)blockSize];
|
|
||||||
FSDataOutputStream stm = fs.create(file, false, 4096, (short)1, blockSize);
|
|
||||||
stm.write(bytes);
|
|
||||||
stm.write(bytes);
|
|
||||||
stm.write(bytes);
|
|
||||||
stm.close();
|
|
||||||
FileStatus stat = fs.getFileStatus(file);
|
|
||||||
assertEquals(blockSize, stat.getBlockSize());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void mySetup() throws IOException {
|
|
||||||
conf = new Configuration();
|
|
||||||
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
|
|
||||||
dfs.waitActive();
|
|
||||||
fs = dfs.getFileSystem();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void myTearDown() {
|
|
||||||
if (dfs != null) { dfs.shutdown(); }
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,245 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestErasureCodes {
|
|
||||||
final int TEST_CODES = 100;
|
|
||||||
final int TEST_TIMES = 1000;
|
|
||||||
final Random RAND = new Random();
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEncodeDecode() {
|
|
||||||
for (int n = 0; n < TEST_CODES; n++) {
|
|
||||||
int stripeSize = RAND.nextInt(99) + 1; // 1, 2, 3, ... 100
|
|
||||||
int paritySize = RAND.nextInt(9) + 1; //1, 2, 3, 4, ... 10
|
|
||||||
ErasureCode ec = new ReedSolomonCode(stripeSize, paritySize);
|
|
||||||
for (int m = 0; m < TEST_TIMES; m++) {
|
|
||||||
int symbolMax = (int) Math.pow(2, ec.symbolSize());
|
|
||||||
int[] message = new int[stripeSize];
|
|
||||||
for (int i = 0; i < stripeSize; i++) {
|
|
||||||
message[i] = RAND.nextInt(symbolMax);
|
|
||||||
}
|
|
||||||
int[] parity = new int[paritySize];
|
|
||||||
ec.encode(message, parity);
|
|
||||||
int[] data = new int[stripeSize + paritySize];
|
|
||||||
int[] copy = new int[data.length];
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
data[i] = parity[i];
|
|
||||||
copy[i] = parity[i];
|
|
||||||
}
|
|
||||||
for (int i = 0; i < stripeSize; i++) {
|
|
||||||
data[i + paritySize] = message[i];
|
|
||||||
copy[i + paritySize] = message[i];
|
|
||||||
}
|
|
||||||
int erasedLen = paritySize == 1 ? 1 : RAND.nextInt(paritySize - 1) + 1;
|
|
||||||
int[] erasedLocations = randomErasedLocation(erasedLen, data.length);
|
|
||||||
for (int i = 0; i < erasedLocations.length; i++) {
|
|
||||||
data[erasedLocations[i]] = 0;
|
|
||||||
}
|
|
||||||
int[] erasedValues = new int[erasedLen];
|
|
||||||
ec.decode(data, erasedLocations, erasedValues);
|
|
||||||
for (int i = 0; i < erasedLen; i++) {
|
|
||||||
assertEquals("Decode failed", copy[erasedLocations[i]], erasedValues[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testRSPerformance() {
|
|
||||||
int stripeSize = 10;
|
|
||||||
int paritySize = 4;
|
|
||||||
ErasureCode ec = new ReedSolomonCode(stripeSize, paritySize);
|
|
||||||
int symbolMax = (int) Math.pow(2, ec.symbolSize());
|
|
||||||
byte[][] message = new byte[stripeSize][];
|
|
||||||
int bufsize = 1024 * 1024 * 10;
|
|
||||||
for (int i = 0; i < stripeSize; i++) {
|
|
||||||
message[i] = new byte[bufsize];
|
|
||||||
for (int j = 0; j < bufsize; j++) {
|
|
||||||
message[i][j] = (byte) RAND.nextInt(symbolMax);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
byte[][] parity = new byte[paritySize][];
|
|
||||||
for (int i = 0; i < paritySize; i++) {
|
|
||||||
parity[i] = new byte[bufsize];
|
|
||||||
}
|
|
||||||
long encodeStart = Time.now();
|
|
||||||
int[] tmpIn = new int[stripeSize];
|
|
||||||
int[] tmpOut = new int[paritySize];
|
|
||||||
for (int i = 0; i < bufsize; i++) {
|
|
||||||
// Copy message.
|
|
||||||
for (int j = 0; j < stripeSize; j++) tmpIn[j] = 0x000000FF & message[j][i];
|
|
||||||
ec.encode(tmpIn, tmpOut);
|
|
||||||
// Copy parity.
|
|
||||||
for (int j = 0; j < paritySize; j++) parity[j][i] = (byte)tmpOut[j];
|
|
||||||
}
|
|
||||||
long encodeEnd = Time.now();
|
|
||||||
float encodeMSecs = (encodeEnd - encodeStart);
|
|
||||||
System.out.println("Time to encode rs = " + encodeMSecs +
|
|
||||||
"msec (" + message[0].length / (1000 * encodeMSecs) + " MB/s)");
|
|
||||||
|
|
||||||
// Copy erased array.
|
|
||||||
int[] data = new int[paritySize + stripeSize];
|
|
||||||
// 4th location is the 0th symbol in the message
|
|
||||||
int[] erasedLocations = new int[]{4, 1, 5, 7};
|
|
||||||
int[] erasedValues = new int[erasedLocations.length];
|
|
||||||
byte[] copy = new byte[bufsize];
|
|
||||||
for (int j = 0; j < bufsize; j++) {
|
|
||||||
copy[j] = message[0][j];
|
|
||||||
message[0][j] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
long decodeStart = Time.now();
|
|
||||||
for (int i = 0; i < bufsize; i++) {
|
|
||||||
// Copy parity first.
|
|
||||||
for (int j = 0; j < paritySize; j++) {
|
|
||||||
data[j] = 0x000000FF & parity[j][i];
|
|
||||||
}
|
|
||||||
// Copy message. Skip 0 as the erased symbol
|
|
||||||
for (int j = 1; j < stripeSize; j++) {
|
|
||||||
data[j + paritySize] = 0x000000FF & message[j][i];
|
|
||||||
}
|
|
||||||
// Use 0, 2, 3, 6, 8, 9, 10, 11, 12, 13th symbol to reconstruct the data
|
|
||||||
ec.decode(data, erasedLocations, erasedValues);
|
|
||||||
message[0][i] = (byte)erasedValues[0];
|
|
||||||
}
|
|
||||||
long decodeEnd = Time.now();
|
|
||||||
float decodeMSecs = (decodeEnd - decodeStart);
|
|
||||||
System.out.println("Time to decode = " + decodeMSecs +
|
|
||||||
"msec (" + message[0].length / (1000 * decodeMSecs) + " MB/s)");
|
|
||||||
assertTrue("Decode failed", java.util.Arrays.equals(copy, message[0]));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testXorPerformance() {
|
|
||||||
java.util.Random RAND = new java.util.Random();
|
|
||||||
int stripeSize = 10;
|
|
||||||
byte[][] message = new byte[stripeSize][];
|
|
||||||
int bufsize = 1024 * 1024 * 10;
|
|
||||||
for (int i = 0; i < stripeSize; i++) {
|
|
||||||
message[i] = new byte[bufsize];
|
|
||||||
for (int j = 0; j < bufsize; j++) {
|
|
||||||
message[i][j] = (byte)RAND.nextInt(256);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
byte[] parity = new byte[bufsize];
|
|
||||||
|
|
||||||
long encodeStart = Time.now();
|
|
||||||
for (int i = 0; i < bufsize; i++) {
|
|
||||||
for (int j = 0; j < stripeSize; j++) parity[i] ^= message[j][i];
|
|
||||||
}
|
|
||||||
long encodeEnd = Time.now();
|
|
||||||
float encodeMSecs = encodeEnd - encodeStart;
|
|
||||||
System.out.println("Time to encode xor = " + encodeMSecs +
|
|
||||||
" msec (" + message[0].length / (1000 * encodeMSecs) + "MB/s)");
|
|
||||||
|
|
||||||
byte[] copy = new byte[bufsize];
|
|
||||||
for (int j = 0; j < bufsize; j++) {
|
|
||||||
copy[j] = message[0][j];
|
|
||||||
message[0][j] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
long decodeStart = Time.now();
|
|
||||||
for (int i = 0; i < bufsize; i++) {
|
|
||||||
for (int j = 1; j < stripeSize; j++) message[0][i] ^= message[j][i];
|
|
||||||
message[0][i] ^= parity[i];
|
|
||||||
}
|
|
||||||
long decodeEnd = Time.now();
|
|
||||||
float decodeMSecs = decodeEnd - decodeStart;
|
|
||||||
System.out.println("Time to decode xor = " + decodeMSecs +
|
|
||||||
" msec (" + message[0].length / (1000 * decodeMSecs) + "MB/s)");
|
|
||||||
assertTrue("Decode failed", java.util.Arrays.equals(copy, message[0]));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testComputeErrorLocations() {
|
|
||||||
for (int i = 0; i < TEST_TIMES; ++i) {
|
|
||||||
verifyErrorLocations(10, 4, 1);
|
|
||||||
verifyErrorLocations(10, 4, 2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void verifyErrorLocations(int stripeSize, int paritySize, int errors) {
|
|
||||||
int[] message = new int[stripeSize];
|
|
||||||
int[] parity = new int[paritySize];
|
|
||||||
Set<Integer> errorLocations = new HashSet<Integer>();
|
|
||||||
for (int i = 0; i < message.length; ++i) {
|
|
||||||
message[i] = RAND.nextInt(256);
|
|
||||||
}
|
|
||||||
while (errorLocations.size() < errors) {
|
|
||||||
int loc = RAND.nextInt(stripeSize + paritySize);
|
|
||||||
errorLocations.add(loc);
|
|
||||||
}
|
|
||||||
ReedSolomonCode codec = new ReedSolomonCode(stripeSize, paritySize);
|
|
||||||
codec.encode(message, parity);
|
|
||||||
int[] data = combineArrays(parity, message);
|
|
||||||
for (Integer i : errorLocations) {
|
|
||||||
data[i] = randError(data[i]);
|
|
||||||
}
|
|
||||||
Set<Integer> recoveredLocations = new HashSet<Integer>();
|
|
||||||
boolean resolved = codec.computeErrorLocations(data, recoveredLocations);
|
|
||||||
if (resolved) {
|
|
||||||
assertEquals(errorLocations, recoveredLocations);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private int randError(int actual) {
|
|
||||||
while (true) {
|
|
||||||
int r = RAND.nextInt(256);
|
|
||||||
if (r != actual) {
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private int[] combineArrays(int[] array1, int[] array2) {
|
|
||||||
int[] result = new int[array1.length + array2.length];
|
|
||||||
for (int i = 0; i < array1.length; ++i) {
|
|
||||||
result[i] = array1[i];
|
|
||||||
}
|
|
||||||
for (int i = 0; i < array2.length; ++i) {
|
|
||||||
result[i + array1.length] = array2[i];
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private int[] randomErasedLocation(int erasedLen, int dataLen) {
|
|
||||||
int[] erasedLocations = new int[erasedLen];
|
|
||||||
for (int i = 0; i < erasedLen; i++) {
|
|
||||||
Set<Integer> s = new HashSet<Integer>();
|
|
||||||
while (s.size() != erasedLen) {
|
|
||||||
s.add(RAND.nextInt(dataLen));
|
|
||||||
}
|
|
||||||
int t = 0;
|
|
||||||
for (int erased : s) {
|
|
||||||
erasedLocations[t++] = erased;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return erasedLocations;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,190 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestGaloisField {
|
|
||||||
|
|
||||||
final int TEST_TIMES = 10000;
|
|
||||||
final Random RAND = new Random();
|
|
||||||
final static GaloisField GF = GaloisField.getInstance();
|
|
||||||
|
|
||||||
private int randGF() {
|
|
||||||
return 0x000000FF & RAND.nextInt(GF.getFieldSize());
|
|
||||||
}
|
|
||||||
private int[] randGFPoly(int len) {
|
|
||||||
int[] result = new int[len];
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
result[i] = randGF();
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testGetInstance() {
|
|
||||||
GaloisField gf1 = GaloisField.getInstance(256, 285);
|
|
||||||
GaloisField gf2 = GaloisField.getInstance();
|
|
||||||
GaloisField gf3 = GaloisField.getInstance(128, 137);
|
|
||||||
GaloisField gf4 = GaloisField.getInstance(128, 137);
|
|
||||||
GaloisField gf5 = GaloisField.getInstance(512, 529);
|
|
||||||
GaloisField gf6 = GaloisField.getInstance(512, 529);
|
|
||||||
assertTrue(gf1 == gf2);
|
|
||||||
assertTrue(gf3 == gf4);
|
|
||||||
assertTrue(gf5 == gf6);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testDistributivity() {
|
|
||||||
for (int i = 0; i < TEST_TIMES; i++) {
|
|
||||||
int a = RAND.nextInt(GF.getFieldSize());
|
|
||||||
int b = RAND.nextInt(GF.getFieldSize());
|
|
||||||
int c = RAND.nextInt(GF.getFieldSize());
|
|
||||||
int result1 = GF.multiply(a, GF.add(b, c));
|
|
||||||
int result2 = GF.add(GF.multiply(a, b), GF.multiply(a, c));
|
|
||||||
assertTrue("Distributivity test #" + i + " failed: " + a + ", " + b + ", "
|
|
||||||
+ c, result1 == result2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testDevision() {
|
|
||||||
for (int i = 0; i < TEST_TIMES; i++) {
|
|
||||||
int a = RAND.nextInt(GF.getFieldSize());
|
|
||||||
int b = RAND.nextInt(GF.getFieldSize());
|
|
||||||
if (b == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int c = GF.divide(a, b);
|
|
||||||
assertTrue("Division test #" + i + " failed: " + a + "/" + b + " = " + c,
|
|
||||||
a == GF.multiply(c, b));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testPower() {
|
|
||||||
for (int i = 0; i < TEST_TIMES; i++) {
|
|
||||||
int a = randGF();
|
|
||||||
int n = RAND.nextInt(10);
|
|
||||||
int result1 = GF.power(a, n);
|
|
||||||
int result2 = 1;
|
|
||||||
for (int j = 0; j < n; j++) {
|
|
||||||
result2 = GF.multiply(result2, a);
|
|
||||||
}
|
|
||||||
assert(result1 == result2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testPolynomialDistributivity() {
|
|
||||||
final int TEST_LEN = 15;
|
|
||||||
for (int i = 0; i < TEST_TIMES; i++) {
|
|
||||||
int[] a = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
|
||||||
int[] b = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
|
||||||
int[] c = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
|
||||||
int[] result1 = GF.multiply(a, GF.add(b, c));
|
|
||||||
int[] result2 = GF.add(GF.multiply(a, b), GF.multiply(a, c));
|
|
||||||
assertTrue("Distributivity test on polynomials failed",
|
|
||||||
java.util.Arrays.equals(result1, result2));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testSubstitute() {
|
|
||||||
final int TEST_LEN = 15;
|
|
||||||
for (int i = 0; i < TEST_TIMES; i++) {
|
|
||||||
int[] a = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
|
||||||
int[] b = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
|
||||||
int[] c = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
|
||||||
int x = randGF();
|
|
||||||
// (a * b * c)(x)
|
|
||||||
int result1 = GF.substitute(GF.multiply(GF.multiply(a, b), c), x);
|
|
||||||
// a(x) * b(x) * c(x)
|
|
||||||
int result2 =
|
|
||||||
GF.multiply(GF.multiply(GF.substitute(a, x), GF.substitute(b, x)),
|
|
||||||
GF.substitute(c, x));
|
|
||||||
assertTrue("Substitute test on polynomial failed",
|
|
||||||
result1 == result2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testSolveVandermondeSystem() {
|
|
||||||
final int TEST_LEN = 15;
|
|
||||||
for (int i = 0; i < TEST_TIMES; i++) {
|
|
||||||
int[] z = randGFPoly(RAND.nextInt(TEST_LEN - 1) + 1);
|
|
||||||
// generate distinct values for x
|
|
||||||
int[] x = new int[z.length];
|
|
||||||
Set<Integer> s = new HashSet<Integer>();
|
|
||||||
while (s.size() != z.length) {
|
|
||||||
s.add(randGF());
|
|
||||||
}
|
|
||||||
int t = 0;
|
|
||||||
for (int v : s) {
|
|
||||||
x[t++] = v;
|
|
||||||
}
|
|
||||||
// compute the output for the Vandermonde system
|
|
||||||
int[] y = new int[x.length];
|
|
||||||
for (int j = 0; j < x.length; j++) {
|
|
||||||
y[j] = 0;
|
|
||||||
for (int k = 0; k < x.length; k++) {
|
|
||||||
//y[j] = y[j] + z[k] * pow(x[k], j);
|
|
||||||
y[j] = GF.add(y[j], GF.multiply(GF.power(x[k], j), z[k]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
GF.solveVandermondeSystem(x, y);
|
|
||||||
assertTrue("Solving Vandermonde system failed",
|
|
||||||
java.util.Arrays.equals(y, z));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testRemainder() {
|
|
||||||
final int TEST_LEN = 15;
|
|
||||||
for (int i = 0; i < TEST_TIMES; i++) {
|
|
||||||
int[] quotient = null;
|
|
||||||
int[] divisor = null;
|
|
||||||
int[] remainder = null;
|
|
||||||
int[] dividend = null;
|
|
||||||
while (true) {
|
|
||||||
quotient = randGFPoly(RAND.nextInt(TEST_LEN - 3) + 3);
|
|
||||||
divisor = randGFPoly(RAND.nextInt(quotient.length - 2) + 2);
|
|
||||||
remainder = randGFPoly(RAND.nextInt(divisor.length - 1) + 1);
|
|
||||||
dividend = GF.add(remainder, GF.multiply(quotient, divisor));
|
|
||||||
if (quotient[quotient.length - 1] != 0 &&
|
|
||||||
divisor[divisor.length - 1] != 0 &&
|
|
||||||
remainder[remainder.length - 1] != 0) {
|
|
||||||
// make sure all the leading terms are not zero
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
GF.remainder(dividend, divisor);
|
|
||||||
for (int j = 0; j < remainder.length; j++) {
|
|
||||||
assertTrue("Distributivity test on polynomials failed",
|
|
||||||
dividend[j] == remainder[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,79 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.OutputStreamWriter;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.nio.charset.Charset;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.junit.After;
|
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestHarIndexParser {
|
|
||||||
final static Log LOG = LogFactory.getLog(TestHarIndexParser.class);
|
|
||||||
File indexFile = null;
|
|
||||||
|
|
||||||
@Before
|
|
||||||
public void setUp() throws FileNotFoundException, IOException {
|
|
||||||
LOG.info("TestHarIndexParser.setUp()");
|
|
||||||
indexFile = File.createTempFile("harindex", ".tmp");
|
|
||||||
indexFile.deleteOnExit();
|
|
||||||
OutputStreamWriter out = new OutputStreamWriter(
|
|
||||||
new FileOutputStream(indexFile),
|
|
||||||
Charset.forName("UTF-8"));
|
|
||||||
out.write("%2F dir 1282018162460+0+493+hadoop+hadoop 0 0 f1 f2 f3 f4\n");
|
|
||||||
out.write("%2Ff1 file part-0 0 1024 1282018141145+1282018140822+420+hadoop+hadoop\n");
|
|
||||||
out.write("%2Ff3 file part-0 2048 1024 1282018148590+1282018148255+420+hadoop+hadoop\n");
|
|
||||||
out.write("%2Ff2 file part-0 1024 1024 1282018144198+1282018143852+420+hadoop+hadoop\n");
|
|
||||||
out.write("%2Ff4 file part-1 0 1024000 1282018162959+1282018162460+420+hadoop+hadoop\n");
|
|
||||||
out.flush();
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@After
|
|
||||||
public void tearDown() {
|
|
||||||
LOG.info("TestHarIndexParser.tearDown()");
|
|
||||||
if (indexFile != null)
|
|
||||||
indexFile.delete();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testHarIndexParser()
|
|
||||||
throws UnsupportedEncodingException, IOException {
|
|
||||||
LOG.info("testHarIndexParser started.");
|
|
||||||
InputStream in = new FileInputStream(indexFile);
|
|
||||||
long size = indexFile.length();
|
|
||||||
HarIndex parser = new HarIndex(in, size);
|
|
||||||
|
|
||||||
HarIndex.IndexEntry entry = parser.findEntry("part-0", 2100);
|
|
||||||
assertEquals("/f3", entry.fileName);
|
|
||||||
|
|
||||||
LOG.info("testHarIndexParser finished.");
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,121 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertFalse;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
public class TestRaidFilter {
|
|
||||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
|
||||||
"target/test-data")).getAbsolutePath();
|
|
||||||
final static Log LOG =
|
|
||||||
LogFactory.getLog("org.apache.hadoop.raid.TestRaidFilter");
|
|
||||||
|
|
||||||
Configuration conf;
|
|
||||||
MiniDFSCluster dfs = null;
|
|
||||||
FileSystem fs = null;
|
|
||||||
|
|
||||||
private void mySetup() throws Exception {
|
|
||||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
|
||||||
conf = new Configuration();
|
|
||||||
dfs = new MiniDFSCluster(conf, 2, true, null);
|
|
||||||
dfs.waitActive();
|
|
||||||
fs = dfs.getFileSystem();
|
|
||||||
String namenode = fs.getUri().toString();
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void myTearDown() throws Exception {
|
|
||||||
if (dfs != null) { dfs.shutdown(); }
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testLayeredPolicies() throws Exception {
|
|
||||||
mySetup();
|
|
||||||
Path src1 = new Path("/user/foo");
|
|
||||||
Path src2 = new Path("/user/foo/bar");
|
|
||||||
|
|
||||||
PolicyInfo info1 = new PolicyInfo("p1", conf);
|
|
||||||
info1.setSrcPath(src1.toString());
|
|
||||||
info1.setErasureCode("xor");
|
|
||||||
info1.setDescription("test policy");
|
|
||||||
info1.setProperty("targetReplication", "1");
|
|
||||||
info1.setProperty("metaReplication", "1");
|
|
||||||
info1.setProperty("modTimePeriod", "0");
|
|
||||||
|
|
||||||
PolicyInfo info2 = new PolicyInfo("p2", conf);
|
|
||||||
info2.setSrcPath(src2.toString());
|
|
||||||
info2.setErasureCode("xor");
|
|
||||||
info2.setDescription("test policy");
|
|
||||||
info2.setProperty("targetReplication", "1");
|
|
||||||
info2.setProperty("metaReplication", "1");
|
|
||||||
info2.setProperty("modTimePeriod", "0");
|
|
||||||
|
|
||||||
ArrayList<PolicyInfo> all = new ArrayList<PolicyInfo>();
|
|
||||||
all.add(info1);
|
|
||||||
all.add(info2);
|
|
||||||
|
|
||||||
try {
|
|
||||||
long blockSize = 1024;
|
|
||||||
byte[] bytes = new byte[(int)blockSize];
|
|
||||||
Path f1 = new Path(src1, "f1");
|
|
||||||
Path f2 = new Path(src2, "f2");
|
|
||||||
FSDataOutputStream stm1 = fs.create(f1, false, 4096, (short)1, blockSize);
|
|
||||||
FSDataOutputStream stm2 = fs.create(f2, false, 4096, (short)1, blockSize);
|
|
||||||
FSDataOutputStream[] stms = new FSDataOutputStream[]{stm1, stm2};
|
|
||||||
for (FSDataOutputStream stm: stms) {
|
|
||||||
stm.write(bytes);
|
|
||||||
stm.write(bytes);
|
|
||||||
stm.write(bytes);
|
|
||||||
stm.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
Thread.sleep(1000);
|
|
||||||
|
|
||||||
FileStatus stat1 = fs.getFileStatus(f1);
|
|
||||||
FileStatus stat2 = fs.getFileStatus(f2);
|
|
||||||
|
|
||||||
RaidFilter.Statistics stats = new RaidFilter.Statistics();
|
|
||||||
RaidFilter.TimeBasedFilter filter = new RaidFilter.TimeBasedFilter(
|
|
||||||
conf, RaidNode.xorDestinationPath(conf), info1, all,
|
|
||||||
Time.now(), stats);
|
|
||||||
System.out.println("Stats " + stats);
|
|
||||||
|
|
||||||
assertTrue(filter.check(stat1));
|
|
||||||
assertFalse(filter.check(stat2));
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,315 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.commons.logging.impl.Log4JLogger;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.mapred.JobConf;
|
|
||||||
import org.apache.hadoop.mapred.MiniMRCluster;
|
|
||||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.log4j.Level;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If a file gets deleted, then verify that the parity file gets deleted too.
|
|
||||||
*/
|
|
||||||
public class TestRaidHar {
|
|
||||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
|
||||||
"target/test-data")).getAbsolutePath();
|
|
||||||
final static String CONFIG_FILE = new File(TEST_DIR,
|
|
||||||
"test-raid.xml").getAbsolutePath();
|
|
||||||
final static long RELOAD_INTERVAL = 1000;
|
|
||||||
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode");
|
|
||||||
final Random rand = new Random();
|
|
||||||
|
|
||||||
{
|
|
||||||
((Log4JLogger)RaidNode.LOG).getLogger().setLevel(Level.ALL);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Configuration conf;
|
|
||||||
String namenode = null;
|
|
||||||
String hftp = null;
|
|
||||||
MiniDFSCluster dfs = null;
|
|
||||||
MiniMRCluster mr = null;
|
|
||||||
FileSystem fileSys = null;
|
|
||||||
String jobTrackerName = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create mapreduce and dfs clusters
|
|
||||||
*/
|
|
||||||
private void createClusters(boolean local) throws Exception {
|
|
||||||
|
|
||||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
|
||||||
conf = new Configuration();
|
|
||||||
conf.set("raid.config.file", CONFIG_FILE);
|
|
||||||
conf.setBoolean("raid.config.reload", true);
|
|
||||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
|
||||||
|
|
||||||
// scan all policies once every 5 second
|
|
||||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
|
||||||
|
|
||||||
// make all deletions not go through Trash
|
|
||||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
|
||||||
|
|
||||||
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
|
|
||||||
if (local) {
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
|
||||||
} else {
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
|
|
||||||
}
|
|
||||||
|
|
||||||
conf.set("raid.server.address", "localhost:0");
|
|
||||||
conf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
|
|
||||||
// create a dfs and map-reduce cluster
|
|
||||||
final int taskTrackers = 4;
|
|
||||||
|
|
||||||
dfs = new MiniDFSCluster(conf, 3, true, null);
|
|
||||||
dfs.waitActive();
|
|
||||||
fileSys = dfs.getFileSystem();
|
|
||||||
namenode = fileSys.getUri().toString();
|
|
||||||
mr = new MiniMRCluster(taskTrackers, namenode, 3);
|
|
||||||
JobConf jobConf = mr.createJobConf();
|
|
||||||
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
|
|
||||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
|
||||||
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
conf.set("mapred.job.tracker", jobTrackerName);
|
|
||||||
conf.set("mapreduce.framework.name", "yarn");
|
|
||||||
String rmAdress = jobConf.get("yarn.resourcemanager.address");
|
|
||||||
if (rmAdress != null) {
|
|
||||||
conf.set("yarn.resourcemanager.address", rmAdress);
|
|
||||||
}
|
|
||||||
String schedulerAdress =
|
|
||||||
jobConf.get("yarn.resourcemanager.scheduler.address");
|
|
||||||
if (schedulerAdress != null) {
|
|
||||||
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
|
|
||||||
}
|
|
||||||
String jobHistoryAddress =
|
|
||||||
jobConf.get("mapreduce.jobhistory.address");
|
|
||||||
if (jobHistoryAddress != null) {
|
|
||||||
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create raid.xml file for RaidNode
|
|
||||||
*/
|
|
||||||
private void mySetup(long targetReplication,
|
|
||||||
long metaReplication, long stripeLength) throws Exception {
|
|
||||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
|
||||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
|
||||||
String str = "<configuration> " +
|
|
||||||
"<srcPath prefix=\"/user/test/raidtest\"> " +
|
|
||||||
"<policy name = \"RaidTest1\"> " +
|
|
||||||
"<erasureCode>xor</erasureCode> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>targetReplication</name> " +
|
|
||||||
"<value>" + targetReplication + "</value> " +
|
|
||||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>metaReplication</name> " +
|
|
||||||
"<value>" + metaReplication + "</value> " +
|
|
||||||
"<description> replication factor of parity file" +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>stripeLength</name> " +
|
|
||||||
"<value>" + stripeLength + "</value> " +
|
|
||||||
"<description> the max number of blocks in a file to RAID together " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>time_before_har</name> " +
|
|
||||||
"<value>0</value> " +
|
|
||||||
"<description> amount of time waited before har'ing parity files" +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>modTimePeriod</name> " +
|
|
||||||
"<value>2000</value> " +
|
|
||||||
"<description> time (milliseconds) after a file is modified to make it " +
|
|
||||||
"a candidate for RAIDing " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"</policy>" +
|
|
||||||
"</srcPath>" +
|
|
||||||
"</configuration>";
|
|
||||||
fileWriter.write(str);
|
|
||||||
fileWriter.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* stop clusters created earlier
|
|
||||||
*/
|
|
||||||
private void stopClusters() throws Exception {
|
|
||||||
if (mr != null) { mr.shutdown(); }
|
|
||||||
if (dfs != null) { dfs.shutdown(); }
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test that parity files that do not have an associated master file
|
|
||||||
* get deleted.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testRaidHar() throws Exception {
|
|
||||||
LOG.info("Test testRaidHar started.");
|
|
||||||
|
|
||||||
long blockSizes [] = {1024L};
|
|
||||||
long stripeLengths [] = {5};
|
|
||||||
long targetReplication = 1;
|
|
||||||
long metaReplication = 1;
|
|
||||||
int numBlock = 9;
|
|
||||||
int iter = 0;
|
|
||||||
|
|
||||||
createClusters(true);
|
|
||||||
try {
|
|
||||||
for (long blockSize : blockSizes) {
|
|
||||||
for (long stripeLength : stripeLengths) {
|
|
||||||
doTestHar(iter, targetReplication, metaReplication,
|
|
||||||
stripeLength, blockSize, numBlock);
|
|
||||||
iter++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
stopClusters();
|
|
||||||
}
|
|
||||||
LOG.info("Test testRaidHar completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create parity file, delete original file and then validate that
|
|
||||||
* parity file is automatically deleted.
|
|
||||||
*/
|
|
||||||
private void doTestHar(int iter, long targetReplication,
|
|
||||||
long metaReplication, long stripeLength,
|
|
||||||
long blockSize, int numBlock) throws Exception {
|
|
||||||
LOG.info("doTestHar started---------------------------:" + " iter " + iter +
|
|
||||||
" blockSize=" + blockSize + " stripeLength=" + stripeLength);
|
|
||||||
mySetup(targetReplication, metaReplication, stripeLength);
|
|
||||||
Path dir = new Path("/user/test/raidtest/subdir/");
|
|
||||||
Path file1 = new Path(dir + "/file" + iter);
|
|
||||||
RaidNode cnode = null;
|
|
||||||
try {
|
|
||||||
Path destPath = new Path("/destraid/user/test/raidtest/subdir");
|
|
||||||
fileSys.delete(dir, true);
|
|
||||||
fileSys.delete(destPath, true);
|
|
||||||
for (int i = 0; i < 10; i++) {
|
|
||||||
Path file = new Path(dir + "/file" + i);
|
|
||||||
TestRaidNode.createOldFile(fileSys, file, 1, numBlock, blockSize);
|
|
||||||
}
|
|
||||||
LOG.info("doTestHar created test files for iteration " + iter);
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
FileStatus[] listPaths = null;
|
|
||||||
|
|
||||||
int maxFilesFound = 0;
|
|
||||||
// wait till file is raided
|
|
||||||
while (true) {
|
|
||||||
try {
|
|
||||||
listPaths = fileSys.listStatus(destPath);
|
|
||||||
int count = 0;
|
|
||||||
Path harPath = null;
|
|
||||||
int filesFound = 0;
|
|
||||||
if (listPaths != null) {
|
|
||||||
for (FileStatus s : listPaths) {
|
|
||||||
LOG.info("doTestHar found path " + s.getPath());
|
|
||||||
|
|
||||||
if (!s.isDir())
|
|
||||||
filesFound++;
|
|
||||||
if (filesFound > maxFilesFound)
|
|
||||||
maxFilesFound = filesFound;
|
|
||||||
|
|
||||||
if (s.getPath().toString().endsWith(".har")) {
|
|
||||||
// If a HAR directory is found, ensure that we have seen
|
|
||||||
// 10 parity files. We have to keep track of the max # of
|
|
||||||
// files since some parity files might get deleted by the
|
|
||||||
// purge thread.
|
|
||||||
assertEquals(10, maxFilesFound);
|
|
||||||
harPath = s.getPath();
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (count == 1 && listPaths.length == 1) {
|
|
||||||
Path partfile = new Path(harPath, "part-0");
|
|
||||||
assertEquals(fileSys.getFileStatus(partfile).getReplication(),
|
|
||||||
targetReplication);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
//ignore
|
|
||||||
}
|
|
||||||
LOG.info("doTestHar waiting for files to be raided and parity files to be har'ed and deleted. Found " +
|
|
||||||
(listPaths == null ? "none" : listPaths.length));
|
|
||||||
Thread.sleep(1000); // keep waiting
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
fileSys.delete(dir, true);
|
|
||||||
// wait till raid file is deleted
|
|
||||||
int count = 1;
|
|
||||||
while (count > 0) {
|
|
||||||
count = 0;
|
|
||||||
try {
|
|
||||||
listPaths = fileSys.listStatus(destPath);
|
|
||||||
if (listPaths != null) {
|
|
||||||
for (FileStatus s : listPaths) {
|
|
||||||
LOG.info("doTestHar found path " + s.getPath());
|
|
||||||
if (s.getPath().toString().endsWith(".har")) {
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) { } //ignoring
|
|
||||||
LOG.info("doTestHar waiting for har file to be deleted. Found " +
|
|
||||||
(listPaths == null ? "none" : listPaths.length) + " files");
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("doTestHar Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
|
||||||
}
|
|
||||||
LOG.info("doTestHar completed:" + " blockSize=" + blockSize +
|
|
||||||
" stripeLength=" + stripeLength);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,738 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
import static org.junit.Assert.fail;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.util.zip.CRC32;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.mapred.JobConf;
|
|
||||||
import org.apache.hadoop.mapred.JobContext;
|
|
||||||
import org.apache.hadoop.mapred.MiniMRCluster;
|
|
||||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyList;
|
|
||||||
import org.apache.hadoop.util.JarFinder;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test the generation of parity blocks for files with different block
|
|
||||||
* sizes. Also test that a data block can be regenerated from a raid stripe
|
|
||||||
* using the parity block
|
|
||||||
*/
|
|
||||||
public class TestRaidNode {
|
|
||||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
|
||||||
"target/test-data")).getAbsolutePath();
|
|
||||||
public static final String DistRaid_JAR = JarFinder.getJar(DistRaid.class);
|
|
||||||
final static String CONFIG_FILE = new File(TEST_DIR,
|
|
||||||
"test-raid.xml").getAbsolutePath();
|
|
||||||
final static long RELOAD_INTERVAL = 1000;
|
|
||||||
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode");
|
|
||||||
final static Random rand = new Random();
|
|
||||||
|
|
||||||
Configuration conf;
|
|
||||||
String namenode = null;
|
|
||||||
String hftp = null;
|
|
||||||
MiniDFSCluster dfs = null;
|
|
||||||
MiniMRCluster mr = null;
|
|
||||||
FileSystem fileSys = null;
|
|
||||||
String jobTrackerName = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create mapreduce and dfs clusters
|
|
||||||
*/
|
|
||||||
private void createClusters(boolean local) throws Exception {
|
|
||||||
|
|
||||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
|
||||||
conf = new Configuration();
|
|
||||||
conf.set("raid.config.file", CONFIG_FILE);
|
|
||||||
conf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
conf.setBoolean("raid.config.reload", true);
|
|
||||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
|
||||||
conf.setBoolean("dfs.permissions.enabled", true);
|
|
||||||
conf.setLong(JobMonitor.JOBMONITOR_INTERVAL_KEY, 20000);
|
|
||||||
conf.setLong(RaidNode.TRIGGER_MONITOR_SLEEP_TIME_KEY, 3000L);
|
|
||||||
|
|
||||||
// scan all policies once every 5 second
|
|
||||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
|
||||||
|
|
||||||
// make all deletions not go through Trash
|
|
||||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
|
||||||
|
|
||||||
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
|
|
||||||
if (local) {
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
|
||||||
} else {
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
|
|
||||||
}
|
|
||||||
|
|
||||||
conf.set("raid.server.address", "localhost:0");
|
|
||||||
|
|
||||||
// create a dfs and map-reduce cluster
|
|
||||||
MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
|
|
||||||
builder.numDataNodes(6);
|
|
||||||
builder.format(true);
|
|
||||||
dfs = builder.build();
|
|
||||||
dfs.waitActive();
|
|
||||||
fileSys = dfs.getFileSystem();
|
|
||||||
|
|
||||||
namenode = fileSys.getUri().toString();
|
|
||||||
final int taskTrackers = 4;
|
|
||||||
mr = new MiniMRCluster(taskTrackers, namenode, 3);
|
|
||||||
JobConf jobConf = mr.createJobConf();
|
|
||||||
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
|
|
||||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
|
||||||
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
conf.set("mapred.job.tracker", jobTrackerName);
|
|
||||||
conf.set("mapreduce.framework.name", "yarn");
|
|
||||||
String rmAdress = jobConf.get("yarn.resourcemanager.address");
|
|
||||||
if (rmAdress != null) {
|
|
||||||
conf.set("yarn.resourcemanager.address", rmAdress);
|
|
||||||
}
|
|
||||||
String schedulerAdress =
|
|
||||||
jobConf.get("yarn.resourcemanager.scheduler.address");
|
|
||||||
if (schedulerAdress != null) {
|
|
||||||
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
|
|
||||||
}
|
|
||||||
String jobHistoryAddress =
|
|
||||||
jobConf.get("mapreduce.jobhistory.address");
|
|
||||||
if (jobHistoryAddress != null) {
|
|
||||||
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class ConfigBuilder {
|
|
||||||
private List<String> policies;
|
|
||||||
|
|
||||||
public ConfigBuilder() {
|
|
||||||
policies = new java.util.ArrayList<String>();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addPolicy(String name, String path, String parent) {
|
|
||||||
String str =
|
|
||||||
"<srcPath prefix=\"" + path + "\"> " +
|
|
||||||
"<policy name = \"" + name + "\"> " +
|
|
||||||
"<parentPolicy>" + parent + "</parentPolicy>" +
|
|
||||||
"</policy>" +
|
|
||||||
"</srcPath>";
|
|
||||||
policies.add(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addPolicy(String name, short srcReplication,
|
|
||||||
long targetReplication, long metaReplication, long stripeLength) {
|
|
||||||
String str =
|
|
||||||
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
|
|
||||||
"<policy name = \"" + name + "\"> " +
|
|
||||||
"<erasureCode>xor</erasureCode> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>srcReplication</name> " +
|
|
||||||
"<value>" + srcReplication + "</value> " +
|
|
||||||
"<description> pick only files whole replFactor is greater than or equal to " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>targetReplication</name> " +
|
|
||||||
"<value>" + targetReplication + "</value> " +
|
|
||||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>metaReplication</name> " +
|
|
||||||
"<value>" + metaReplication + "</value> " +
|
|
||||||
"<description> replication factor of parity file" +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>stripeLength</name> " +
|
|
||||||
"<value>" + stripeLength + "</value> " +
|
|
||||||
"<description> the max number of blocks in a file to RAID together " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>modTimePeriod</name> " +
|
|
||||||
"<value>2000</value> " +
|
|
||||||
"<description> time (milliseconds) after a file is modified to make it " +
|
|
||||||
"a candidate for RAIDing " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"</policy>" +
|
|
||||||
"</srcPath>";
|
|
||||||
policies.add(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addPolicy(String name, String path, short srcReplication,
|
|
||||||
long targetReplication, long metaReplication, long stripeLength) {
|
|
||||||
String str =
|
|
||||||
"<srcPath prefix=\"" + path + "\"> " +
|
|
||||||
"<policy name = \"" + name + "\"> " +
|
|
||||||
"<erasureCode>xor</erasureCode> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>srcReplication</name> " +
|
|
||||||
"<value>" + srcReplication + "</value> " +
|
|
||||||
"<description> pick only files whole replFactor is greater than or equal to " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>targetReplication</name> " +
|
|
||||||
"<value>" + targetReplication + "</value> " +
|
|
||||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>metaReplication</name> " +
|
|
||||||
"<value>" + metaReplication + "</value> " +
|
|
||||||
"<description> replication factor of parity file" +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>stripeLength</name> " +
|
|
||||||
"<value>" + stripeLength + "</value> " +
|
|
||||||
"<description> the max number of blocks in a file to RAID together " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>modTimePeriod</name> " +
|
|
||||||
"<value>2000</value> " +
|
|
||||||
"<description> time (milliseconds) after a file is modified to make it " +
|
|
||||||
"a candidate for RAIDing " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"</policy>" +
|
|
||||||
"</srcPath>";
|
|
||||||
policies.add(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void persist() throws IOException {
|
|
||||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
|
||||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
|
||||||
fileWriter.write("<configuration>");
|
|
||||||
for (String policy: policies) {
|
|
||||||
fileWriter.write(policy);
|
|
||||||
}
|
|
||||||
fileWriter.write("</configuration>");
|
|
||||||
fileWriter.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* stop clusters created earlier
|
|
||||||
*/
|
|
||||||
private void stopClusters() throws Exception {
|
|
||||||
if (mr != null) { mr.shutdown(); }
|
|
||||||
if (dfs != null) { dfs.shutdown(); }
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test to run a filter
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testPathFilter() throws Exception {
|
|
||||||
LOG.info("Test testPathFilter started.");
|
|
||||||
|
|
||||||
long blockSizes [] = {1024L};
|
|
||||||
int stripeLengths [] = {5, 6, 10, 11, 12};
|
|
||||||
int targetReplication = 1;
|
|
||||||
int metaReplication = 1;
|
|
||||||
int numBlock = 11;
|
|
||||||
int iter = 0;
|
|
||||||
|
|
||||||
createClusters(true);
|
|
||||||
try {
|
|
||||||
for (long blockSize : blockSizes) {
|
|
||||||
for (long stripeLength : stripeLengths) {
|
|
||||||
doTestPathFilter(iter, targetReplication, metaReplication,
|
|
||||||
stripeLength, blockSize, numBlock);
|
|
||||||
iter++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
doCheckPolicy();
|
|
||||||
} finally {
|
|
||||||
stopClusters();
|
|
||||||
}
|
|
||||||
LOG.info("Test testPathFilter completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test to run a filter
|
|
||||||
*/
|
|
||||||
private void doTestPathFilter(int iter, long targetReplication,
|
|
||||||
long metaReplication, long stripeLength,
|
|
||||||
long blockSize, int numBlock) throws Exception {
|
|
||||||
LOG.info("doTestPathFilter started---------------------------:" + " iter " + iter +
|
|
||||||
" blockSize=" + blockSize + " stripeLength=" + stripeLength);
|
|
||||||
ConfigBuilder cb = new ConfigBuilder();
|
|
||||||
cb.addPolicy("policy1", "/user/dhruba/raidtest", (short)1, targetReplication, metaReplication, stripeLength);
|
|
||||||
cb.persist();
|
|
||||||
|
|
||||||
RaidShell shell = null;
|
|
||||||
Path dir = new Path("/user/dhruba/raidtest/");
|
|
||||||
Path file1 = new Path(dir + "/file" + iter);
|
|
||||||
RaidNode cnode = null;
|
|
||||||
try {
|
|
||||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
|
||||||
fileSys.delete(dir, true);
|
|
||||||
fileSys.delete(destPath, true);
|
|
||||||
long crc1 = createOldFile(fileSys, file1, 1, numBlock, blockSize);
|
|
||||||
LOG.info("doTestPathFilter created test files for iteration " + iter);
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
FileStatus[] listPaths = null;
|
|
||||||
|
|
||||||
// wait till file is raided
|
|
||||||
while (true) {
|
|
||||||
try {
|
|
||||||
listPaths = fileSys.listStatus(destPath);
|
|
||||||
int count = 0;
|
|
||||||
if (listPaths != null && listPaths.length == 1) {
|
|
||||||
for (FileStatus s : listPaths) {
|
|
||||||
LOG.info("doTestPathFilter found path " + s.getPath());
|
|
||||||
if (!s.getPath().toString().endsWith(".tmp") &&
|
|
||||||
fileSys.getFileStatus(file1).getReplication() ==
|
|
||||||
targetReplication) {
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (count > 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
//ignore
|
|
||||||
}
|
|
||||||
LOG.info("doTestPathFilter waiting for files to be raided. Found " +
|
|
||||||
(listPaths == null ? "none" : listPaths.length));
|
|
||||||
Thread.sleep(1000); // keep waiting
|
|
||||||
}
|
|
||||||
// assertEquals(listPaths.length, 1); // all files raided
|
|
||||||
LOG.info("doTestPathFilter all files found in Raid.");
|
|
||||||
|
|
||||||
// check for error at beginning of file
|
|
||||||
shell = new RaidShell(conf);
|
|
||||||
shell.initializeRpc(conf, cnode.getListenerAddress());
|
|
||||||
if (numBlock >= 1) {
|
|
||||||
LOG.info("doTestPathFilter Check error at beginning of file.");
|
|
||||||
simulateError(shell, fileSys, file1, crc1, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// check for error at the beginning of second block
|
|
||||||
if (numBlock >= 2) {
|
|
||||||
LOG.info("doTestPathFilter Check error at beginning of second block.");
|
|
||||||
simulateError(shell, fileSys, file1, crc1, blockSize + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// check for error at the middle of third block
|
|
||||||
if (numBlock >= 3) {
|
|
||||||
LOG.info("doTestPathFilter Check error at middle of third block.");
|
|
||||||
simulateError(shell, fileSys, file1, crc1, 2 * blockSize + 10);
|
|
||||||
}
|
|
||||||
|
|
||||||
// check for error at the middle of second stripe
|
|
||||||
if (numBlock >= stripeLength + 1) {
|
|
||||||
LOG.info("doTestPathFilter Check error at middle of second stripe.");
|
|
||||||
simulateError(shell, fileSys, file1, crc1,
|
|
||||||
stripeLength * blockSize + 100);
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("doTestPathFilter Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
if (shell != null) shell.close();
|
|
||||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
|
||||||
LOG.info("doTestPathFilter delete file " + file1);
|
|
||||||
fileSys.delete(file1, true);
|
|
||||||
}
|
|
||||||
LOG.info("doTestPathFilter completed:" + " blockSize=" + blockSize +
|
|
||||||
" stripeLength=" + stripeLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check that raid occurs only on files that have a replication factor
|
|
||||||
// greater than or equal to the specified value
|
|
||||||
private void doCheckPolicy() throws Exception {
|
|
||||||
LOG.info("doCheckPolicy started---------------------------:");
|
|
||||||
short srcReplication = 1;
|
|
||||||
long targetReplication = 2;
|
|
||||||
long metaReplication = 1;
|
|
||||||
long stripeLength = 2;
|
|
||||||
long blockSize = 1024;
|
|
||||||
int numBlock = 3;
|
|
||||||
ConfigBuilder cb = new ConfigBuilder();
|
|
||||||
cb.addPolicy("policy1", "/user/dhruba/policytest", srcReplication,
|
|
||||||
targetReplication, metaReplication, stripeLength);
|
|
||||||
cb.persist();
|
|
||||||
Path dir = new Path("/user/dhruba/policytest/");
|
|
||||||
Path file1 = new Path(dir + "/file1");
|
|
||||||
Path file2 = new Path(dir + "/file2");
|
|
||||||
RaidNode cnode = null;
|
|
||||||
try {
|
|
||||||
Path destPath = new Path("/destraid/user/dhruba/policytest");
|
|
||||||
fileSys.delete(dir, true);
|
|
||||||
fileSys.delete(destPath, true);
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
|
|
||||||
// this file should be picked up RaidNode
|
|
||||||
createOldFile(fileSys, file2, 2, numBlock, blockSize);
|
|
||||||
FileStatus[] listPaths = null;
|
|
||||||
|
|
||||||
long firstmodtime = 0;
|
|
||||||
// wait till file is raided
|
|
||||||
while (true) {
|
|
||||||
Thread.sleep(1000); // waiting
|
|
||||||
try {
|
|
||||||
listPaths = fileSys.listStatus(destPath);
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
LOG.warn("File not found " + destPath);
|
|
||||||
// The directory have been deleted by the purge thread.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int count = 0;
|
|
||||||
if (listPaths != null && listPaths.length == 1) {
|
|
||||||
for (FileStatus s : listPaths) {
|
|
||||||
LOG.info("doCheckPolicy found path " + s.getPath());
|
|
||||||
if (!s.getPath().toString().endsWith(".tmp") &&
|
|
||||||
fileSys.getFileStatus(file2).getReplication() ==
|
|
||||||
targetReplication) {
|
|
||||||
count++;
|
|
||||||
firstmodtime = s.getModificationTime();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (count > 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
LOG.info("doCheckPolicy waiting for files to be raided. Found " +
|
|
||||||
(listPaths == null ? "none" : listPaths.length));
|
|
||||||
}
|
|
||||||
assertEquals(listPaths.length, 1);
|
|
||||||
|
|
||||||
LOG.info("doCheckPolicy all files found in Raid the first time.");
|
|
||||||
|
|
||||||
LOG.info("doCheckPolicy: recreating source file");
|
|
||||||
createOldFile(fileSys, file2, 2, numBlock, blockSize);
|
|
||||||
|
|
||||||
FileStatus st = fileSys.getFileStatus(file2);
|
|
||||||
assertTrue(st.getModificationTime() > firstmodtime);
|
|
||||||
|
|
||||||
// wait till file is raided
|
|
||||||
while (true) {
|
|
||||||
Thread.sleep(20000L); // waiting
|
|
||||||
listPaths = fileSys.listStatus(destPath);
|
|
||||||
int count = 0;
|
|
||||||
if (listPaths != null && listPaths.length == 1) {
|
|
||||||
for (FileStatus s : listPaths) {
|
|
||||||
LOG.info("doCheckPolicy found path " + s.getPath() + " " + s.getModificationTime());
|
|
||||||
if (!s.getPath().toString().endsWith(".tmp") &&
|
|
||||||
s.getModificationTime() > firstmodtime &&
|
|
||||||
fileSys.getFileStatus(file2).getReplication() ==
|
|
||||||
targetReplication) {
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (count > 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
LOG.info("doCheckPolicy waiting for files to be raided. Found " +
|
|
||||||
(listPaths == null ? "none" : listPaths.length));
|
|
||||||
}
|
|
||||||
assertEquals(listPaths.length, 1);
|
|
||||||
|
|
||||||
LOG.info("doCheckPolicy: file got re-raided as expected.");
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("doCheckPolicy Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
|
||||||
LOG.info("doTestPathFilter delete file " + file1);
|
|
||||||
fileSys.delete(file1, false);
|
|
||||||
}
|
|
||||||
LOG.info("doCheckPolicy completed:");
|
|
||||||
}
|
|
||||||
|
|
||||||
static public void createTestFiles(FileSystem fileSys,
|
|
||||||
String path, String destpath, int nfile,
|
|
||||||
int nblock) throws IOException {
|
|
||||||
createTestFiles(fileSys, path, destpath, nfile, nblock, (short)1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void createTestFiles(FileSystem fileSys, String path, String destpath, int nfile,
|
|
||||||
int nblock, short repl) throws IOException {
|
|
||||||
long blockSize = 1024L;
|
|
||||||
Path dir = new Path(path);
|
|
||||||
Path destPath = new Path(destpath);
|
|
||||||
fileSys.delete(dir, true);
|
|
||||||
fileSys.delete(destPath, true);
|
|
||||||
|
|
||||||
for(int i = 0 ; i < nfile; i++){
|
|
||||||
Path file = new Path(path + "file" + i);
|
|
||||||
createOldFile(fileSys, file, repl, nblock, blockSize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test dist Raid
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testDistRaid() throws Exception {
|
|
||||||
LOG.info("Test testDistRaid started.");
|
|
||||||
long targetReplication = 2;
|
|
||||||
long metaReplication = 2;
|
|
||||||
long stripeLength = 3;
|
|
||||||
short srcReplication = 1;
|
|
||||||
|
|
||||||
createClusters(false);
|
|
||||||
ConfigBuilder cb = new ConfigBuilder();
|
|
||||||
cb.addPolicy("policy1", "/user/dhruba/raidtest",
|
|
||||||
srcReplication, targetReplication, metaReplication, stripeLength);
|
|
||||||
cb.addPolicy("policy2", "/user/dhruba/raidtest2",
|
|
||||||
srcReplication, targetReplication, metaReplication, stripeLength);
|
|
||||||
cb.persist();
|
|
||||||
|
|
||||||
RaidNode cnode = null;
|
|
||||||
try {
|
|
||||||
createTestFiles(fileSys, "/user/dhruba/raidtest/",
|
|
||||||
"/destraid/user/dhruba/raidtest", 5, 7);
|
|
||||||
createTestFiles(fileSys, "/user/dhruba/raidtest2/",
|
|
||||||
"/destraid/user/dhruba/raidtest2", 5, 7);
|
|
||||||
LOG.info("Test testDistRaid created test files");
|
|
||||||
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
localConf.set(JobContext.JAR, TestRaidNode.DistRaid_JAR);
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
// Verify the policies are parsed correctly
|
|
||||||
for (PolicyList policyList : cnode.getAllPolicies()) {
|
|
||||||
for (PolicyInfo p : policyList.getAll()) {
|
|
||||||
if (p.getName().equals("policy1")) {
|
|
||||||
Path srcPath = new Path("/user/dhruba/raidtest");
|
|
||||||
FileSystem fs = srcPath.getFileSystem(conf);
|
|
||||||
assertTrue(p.getSrcPath().equals(
|
|
||||||
srcPath.makeQualified(fs.getUri(), fs.getWorkingDirectory())));
|
|
||||||
} else {
|
|
||||||
assertTrue(p.getName().equals("policy2"));
|
|
||||||
Path srcPath = new Path("/user/dhruba/raidtest2");
|
|
||||||
FileSystem fs = srcPath.getFileSystem(conf);
|
|
||||||
assertTrue(p.getSrcPath().equals(
|
|
||||||
srcPath.makeQualified(fs.getUri(), fs.getWorkingDirectory())));
|
|
||||||
}
|
|
||||||
assertEquals(targetReplication,
|
|
||||||
Integer.parseInt(p.getProperty("targetReplication")));
|
|
||||||
assertEquals(metaReplication,
|
|
||||||
Integer.parseInt(p.getProperty("metaReplication")));
|
|
||||||
assertEquals(stripeLength,
|
|
||||||
Integer.parseInt(p.getProperty("stripeLength")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
long start = Time.now();
|
|
||||||
final int MAX_WAITTIME = 300000;
|
|
||||||
|
|
||||||
assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode);
|
|
||||||
DistRaidNode dcnode = (DistRaidNode) cnode;
|
|
||||||
|
|
||||||
while (dcnode.jobMonitor.jobsMonitored() < 2 &&
|
|
||||||
Time.now() - start < MAX_WAITTIME) {
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
|
|
||||||
start = Time.now();
|
|
||||||
while (dcnode.jobMonitor.jobsSucceeded() < 2 &&
|
|
||||||
Time.now() - start < MAX_WAITTIME) {
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
assertEquals(dcnode.jobMonitor.jobsSucceeded(), dcnode.jobMonitor.jobsMonitored());
|
|
||||||
LOG.info("Test testDistRaid successful.");
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("testDistRaid Exception " + e + StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
|
||||||
stopClusters();
|
|
||||||
}
|
|
||||||
LOG.info("Test testDistRaid completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// simulate a corruption at specified offset and verify that eveyrthing is good
|
|
||||||
//
|
|
||||||
void simulateError(RaidShell shell, FileSystem fileSys, Path file1,
|
|
||||||
long crc, long corruptOffset) throws IOException {
|
|
||||||
// recover the file assuming that we encountered a corruption at offset 0
|
|
||||||
String[] args = new String[3];
|
|
||||||
args[0] = "-recover";
|
|
||||||
args[1] = file1.toString();
|
|
||||||
args[2] = Long.toString(corruptOffset);
|
|
||||||
Path recover1 = shell.recover(args[0], args, 1)[0];
|
|
||||||
|
|
||||||
// compare that the recovered file is identical to the original one
|
|
||||||
LOG.info("Comparing file " + file1 + " with recovered file " + recover1);
|
|
||||||
validateFile(fileSys, file1, recover1, crc);
|
|
||||||
fileSys.delete(recover1, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// creates a file and populate it with random data. Returns its crc.
|
|
||||||
//
|
|
||||||
static long createOldFile(FileSystem fileSys, Path name, int repl, int numBlocks, long blocksize)
|
|
||||||
throws IOException {
|
|
||||||
CRC32 crc = new CRC32();
|
|
||||||
FSDataOutputStream stm = fileSys.create(name, true,
|
|
||||||
fileSys.getConf().getInt("io.file.buffer.size", 4096),
|
|
||||||
(short)repl, blocksize);
|
|
||||||
// fill random data into file
|
|
||||||
byte[] b = new byte[(int)blocksize];
|
|
||||||
for (int i = 0; i < numBlocks; i++) {
|
|
||||||
if (i == (numBlocks-1)) {
|
|
||||||
b = new byte[(int)blocksize/2];
|
|
||||||
}
|
|
||||||
rand.nextBytes(b);
|
|
||||||
stm.write(b);
|
|
||||||
crc.update(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
stm.close();
|
|
||||||
return crc.getValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// validates that file matches the crc.
|
|
||||||
//
|
|
||||||
private void validateFile(FileSystem fileSys, Path name1, Path name2, long crc)
|
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
FileStatus stat1 = fileSys.getFileStatus(name1);
|
|
||||||
FileStatus stat2 = fileSys.getFileStatus(name2);
|
|
||||||
assertTrue(" Length of file " + name1 + " is " + stat1.getLen() +
|
|
||||||
" is different from length of file " + name1 + " " + stat2.getLen(),
|
|
||||||
stat1.getLen() == stat2.getLen());
|
|
||||||
|
|
||||||
CRC32 newcrc = new CRC32();
|
|
||||||
FSDataInputStream stm = fileSys.open(name2);
|
|
||||||
final byte[] b = new byte[4192];
|
|
||||||
int num = 0;
|
|
||||||
while (num >= 0) {
|
|
||||||
num = stm.read(b);
|
|
||||||
if (num < 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
newcrc.update(b, 0, num);
|
|
||||||
}
|
|
||||||
stm.close();
|
|
||||||
if (newcrc.getValue() != crc) {
|
|
||||||
fail("CRC mismatch of files " + name1 + " with file " + name2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testSuspendTraversal() throws Exception {
|
|
||||||
LOG.info("Test testSuspendTraversal started.");
|
|
||||||
long targetReplication = 2;
|
|
||||||
long metaReplication = 2;
|
|
||||||
long stripeLength = 3;
|
|
||||||
short srcReplication = 1;
|
|
||||||
|
|
||||||
createClusters(false);
|
|
||||||
ConfigBuilder cb = new ConfigBuilder();
|
|
||||||
cb.addPolicy("policy1", "/user/dhruba/raidtest",
|
|
||||||
srcReplication, targetReplication, metaReplication, stripeLength);
|
|
||||||
cb.persist();
|
|
||||||
|
|
||||||
RaidNode cnode = null;
|
|
||||||
try {
|
|
||||||
for(int i = 0; i < 4; i++){
|
|
||||||
Path file = new Path("/user/dhruba/raidtest/dir" + i + "/file" + i);
|
|
||||||
createOldFile(fileSys, file, 1, 7, 1024L);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG.info("Test testSuspendTraversal created test files");
|
|
||||||
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.setInt("raid.distraid.max.jobs", 2);
|
|
||||||
localConf.setInt("raid.distraid.max.files", 2);
|
|
||||||
localConf.setInt("raid.directorytraversal.threads", 1);
|
|
||||||
localConf.set(JobContext.JAR, TestRaidNode.DistRaid_JAR);
|
|
||||||
// 4 test files: 2 jobs with 2 files each.
|
|
||||||
final int numJobsExpected = 2;
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
|
|
||||||
long start = Time.now();
|
|
||||||
final int MAX_WAITTIME = 300000;
|
|
||||||
|
|
||||||
assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode);
|
|
||||||
DistRaidNode dcnode = (DistRaidNode) cnode;
|
|
||||||
|
|
||||||
start = Time.now();
|
|
||||||
while (dcnode.jobMonitor.jobsSucceeded() < numJobsExpected &&
|
|
||||||
Time.now() - start < MAX_WAITTIME) {
|
|
||||||
LOG.info("Waiting for num jobs succeeded " + dcnode.jobMonitor.jobsSucceeded() +
|
|
||||||
" to reach " + numJobsExpected);
|
|
||||||
Thread.sleep(3000);
|
|
||||||
}
|
|
||||||
// Wait for any running jobs to finish.
|
|
||||||
start = Time.now();
|
|
||||||
while (dcnode.jobMonitor.runningJobsCount() > 0 &&
|
|
||||||
Time.now() - start < MAX_WAITTIME) {
|
|
||||||
LOG.info("Waiting for zero running jobs: " +
|
|
||||||
dcnode.jobMonitor.runningJobsCount());
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
assertEquals(numJobsExpected, dcnode.jobMonitor.jobsMonitored());
|
|
||||||
assertEquals(numJobsExpected, dcnode.jobMonitor.jobsSucceeded());
|
|
||||||
|
|
||||||
LOG.info("Test testSuspendTraversal successful.");
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("testSuspendTraversal Exception " + e + StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
|
||||||
stopClusters();
|
|
||||||
}
|
|
||||||
LOG.info("Test testSuspendTraversal completed.");
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,521 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertFalse;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.commons.logging.impl.Log4JLogger;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
|
||||||
import org.apache.hadoop.mapred.JobConf;
|
|
||||||
import org.apache.hadoop.mapred.MiniMRCluster;
|
|
||||||
import org.apache.hadoop.mapred.Reporter;
|
|
||||||
import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
|
|
||||||
import org.apache.hadoop.raid.protocol.PolicyInfo;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.apache.log4j.Level;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If a file gets deleted, then verify that the parity file gets deleted too.
|
|
||||||
*/
|
|
||||||
public class TestRaidPurge {
|
|
||||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
|
||||||
"target/test-data")).getAbsolutePath();
|
|
||||||
final static String CONFIG_FILE = new File(TEST_DIR,
|
|
||||||
"test-raid.xml").getAbsolutePath();
|
|
||||||
final static long RELOAD_INTERVAL = 1000;
|
|
||||||
final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode");
|
|
||||||
final Random rand = new Random();
|
|
||||||
|
|
||||||
{
|
|
||||||
((Log4JLogger)RaidNode.LOG).getLogger().setLevel(Level.ALL);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Configuration conf;
|
|
||||||
String namenode = null;
|
|
||||||
String hftp = null;
|
|
||||||
MiniDFSCluster dfs = null;
|
|
||||||
MiniMRCluster mr = null;
|
|
||||||
FileSystem fileSys = null;
|
|
||||||
String jobTrackerName = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create mapreduce and dfs clusters
|
|
||||||
*/
|
|
||||||
private void createClusters(boolean local) throws Exception {
|
|
||||||
|
|
||||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
|
||||||
conf = new Configuration();
|
|
||||||
conf.set("raid.config.file", CONFIG_FILE);
|
|
||||||
conf.setBoolean("raid.config.reload", true);
|
|
||||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
|
||||||
|
|
||||||
// scan all policies once every 5 second
|
|
||||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
|
||||||
|
|
||||||
// make all deletions not go through Trash
|
|
||||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.dfs.DFSClient");
|
|
||||||
|
|
||||||
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
|
|
||||||
if (local) {
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
|
||||||
} else {
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
|
|
||||||
}
|
|
||||||
|
|
||||||
conf.set("raid.server.address", "localhost:0");
|
|
||||||
|
|
||||||
// create a dfs and map-reduce cluster
|
|
||||||
final int taskTrackers = 4;
|
|
||||||
final int jobTrackerPort = 60050;
|
|
||||||
|
|
||||||
dfs = new MiniDFSCluster(conf, 3, true, null);
|
|
||||||
dfs.waitActive();
|
|
||||||
fileSys = dfs.getFileSystem();
|
|
||||||
namenode = fileSys.getUri().toString();
|
|
||||||
mr = new MiniMRCluster(taskTrackers, namenode, 3);
|
|
||||||
JobConf jobConf = mr.createJobConf();
|
|
||||||
jobTrackerName = "localhost:" + jobConf.get(JTConfig.JT_IPC_ADDRESS);
|
|
||||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
|
||||||
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
conf.set("mapred.job.tracker", jobTrackerName);
|
|
||||||
conf.set("mapreduce.framework.name", "yarn");
|
|
||||||
String rmAdress = jobConf.get("yarn.resourcemanager.address");
|
|
||||||
if (rmAdress != null) {
|
|
||||||
conf.set("yarn.resourcemanager.address", rmAdress);
|
|
||||||
}
|
|
||||||
String schedulerAdress =
|
|
||||||
jobConf.get("yarn.resourcemanager.scheduler.address");
|
|
||||||
if (schedulerAdress != null) {
|
|
||||||
conf.set("yarn.resourcemanager.scheduler.address", schedulerAdress);
|
|
||||||
}
|
|
||||||
String jobHistoryAddress =
|
|
||||||
jobConf.get("mapreduce.jobhistory.address");
|
|
||||||
if (jobHistoryAddress != null) {
|
|
||||||
conf.set("mapreduce.jobhistory.address", jobHistoryAddress);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create raid.xml file for RaidNode
|
|
||||||
*/
|
|
||||||
private void mySetup(long targetReplication,
|
|
||||||
long metaReplication, long stripeLength) throws Exception {
|
|
||||||
int harDelay = 1; // 1 day.
|
|
||||||
mySetup(targetReplication, metaReplication, stripeLength, harDelay);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void mySetup(long targetReplication,
|
|
||||||
long metaReplication, long stripeLength, int harDelay) throws Exception {
|
|
||||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
|
||||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
|
||||||
String str = "<configuration> " +
|
|
||||||
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
|
|
||||||
"<policy name = \"RaidTest1\"> " +
|
|
||||||
"<erasureCode>xor</erasureCode> " +
|
|
||||||
"<destPath> /destraid</destPath> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>targetReplication</name> " +
|
|
||||||
"<value>" + targetReplication + "</value> " +
|
|
||||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>metaReplication</name> " +
|
|
||||||
"<value>" + metaReplication + "</value> " +
|
|
||||||
"<description> replication factor of parity file" +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>stripeLength</name> " +
|
|
||||||
"<value>" + stripeLength + "</value> " +
|
|
||||||
"<description> the max number of blocks in a file to RAID together " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>modTimePeriod</name> " +
|
|
||||||
"<value>2000</value> " +
|
|
||||||
"<description> time (milliseconds) after a file is modified to make it " +
|
|
||||||
"a candidate for RAIDing " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>time_before_har</name> " +
|
|
||||||
"<value> " + harDelay + "</value> " +
|
|
||||||
"<description> amount of time waited before har'ing parity files" +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"</policy>" +
|
|
||||||
"</srcPath>" +
|
|
||||||
"</configuration>";
|
|
||||||
fileWriter.write(str);
|
|
||||||
fileWriter.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* stop clusters created earlier
|
|
||||||
*/
|
|
||||||
private void stopClusters() throws Exception {
|
|
||||||
if (mr != null) { mr.shutdown(); }
|
|
||||||
if (dfs != null) { dfs.shutdown(); }
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test that parity files that do not have an associated master file
|
|
||||||
* get deleted.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testPurge() throws Exception {
|
|
||||||
LOG.info("Test testPurge started.");
|
|
||||||
|
|
||||||
long blockSizes [] = {1024L};
|
|
||||||
long stripeLengths [] = {5};
|
|
||||||
long targetReplication = 1;
|
|
||||||
long metaReplication = 1;
|
|
||||||
int numBlock = 9;
|
|
||||||
int iter = 0;
|
|
||||||
|
|
||||||
createClusters(true);
|
|
||||||
try {
|
|
||||||
for (long blockSize : blockSizes) {
|
|
||||||
for (long stripeLength : stripeLengths) {
|
|
||||||
doTestPurge(iter, targetReplication, metaReplication,
|
|
||||||
stripeLength, blockSize, numBlock);
|
|
||||||
iter++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
stopClusters();
|
|
||||||
}
|
|
||||||
LOG.info("Test testPurge completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create parity file, delete original file and then validate that
|
|
||||||
* parity file is automatically deleted.
|
|
||||||
*/
|
|
||||||
private void doTestPurge(int iter, long targetReplication,
|
|
||||||
long metaReplication, long stripeLength,
|
|
||||||
long blockSize, int numBlock) throws Exception {
|
|
||||||
LOG.info("doTestPurge started---------------------------:" + " iter " + iter +
|
|
||||||
" blockSize=" + blockSize + " stripeLength=" + stripeLength);
|
|
||||||
mySetup(targetReplication, metaReplication, stripeLength);
|
|
||||||
Path dir = new Path("/user/dhruba/raidtest/");
|
|
||||||
Path file1 = new Path(dir + "/file" + iter);
|
|
||||||
RaidNode cnode = null;
|
|
||||||
try {
|
|
||||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
|
||||||
fileSys.delete(dir, true);
|
|
||||||
fileSys.delete(destPath, true);
|
|
||||||
TestRaidNode.createOldFile(fileSys, file1, 1, numBlock, blockSize);
|
|
||||||
LOG.info("doTestPurge created test files for iteration " + iter);
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
FileStatus[] listPaths = null;
|
|
||||||
|
|
||||||
// wait till file is raided
|
|
||||||
while (true) {
|
|
||||||
try {
|
|
||||||
listPaths = fileSys.listStatus(destPath);
|
|
||||||
int count = 0;
|
|
||||||
if (listPaths != null && listPaths.length == 1) {
|
|
||||||
for (FileStatus s : listPaths) {
|
|
||||||
LOG.info("doTestPurge found path " + s.getPath());
|
|
||||||
if (!s.getPath().toString().endsWith(".tmp")) {
|
|
||||||
count++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (count > 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
//ignore
|
|
||||||
}
|
|
||||||
LOG.info("doTestPurge waiting for files to be raided. Found " +
|
|
||||||
(listPaths == null ? "none" : listPaths.length));
|
|
||||||
Thread.sleep(1000); // keep waiting
|
|
||||||
}
|
|
||||||
// assertEquals(listPaths.length, 1); // all files raided
|
|
||||||
LOG.info("doTestPurge all files found in Raid.");
|
|
||||||
|
|
||||||
// delete original file
|
|
||||||
assertTrue("Unable to delete original file " + file1 ,
|
|
||||||
fileSys.delete(file1, true));
|
|
||||||
LOG.info("deleted file " + file1);
|
|
||||||
|
|
||||||
// wait till parity file and directory are automatically deleted
|
|
||||||
while (fileSys.exists(destPath)) {
|
|
||||||
LOG.info("doTestPurge waiting for parity files to be removed.");
|
|
||||||
Thread.sleep(1000); // keep waiting
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("doTestPurge Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
|
||||||
LOG.info("doTestPurge delete file " + file1);
|
|
||||||
fileSys.delete(file1, true);
|
|
||||||
}
|
|
||||||
LOG.info("doTestPurge completed:" + " blockSize=" + blockSize +
|
|
||||||
" stripeLength=" + stripeLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a file, wait for parity file to get HARed. Then modify the file,
|
|
||||||
* wait for the HAR to get purged.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testPurgeHar() throws Exception {
|
|
||||||
LOG.info("testPurgeHar started");
|
|
||||||
int harDelay = 0;
|
|
||||||
createClusters(true);
|
|
||||||
mySetup(1, 1, 5, harDelay);
|
|
||||||
Path dir = new Path("/user/dhruba/raidtest/");
|
|
||||||
Path destPath = new Path("/raid/user/dhruba/raidtest");
|
|
||||||
Path file1 = new Path(dir + "/file");
|
|
||||||
RaidNode cnode = null;
|
|
||||||
try {
|
|
||||||
TestRaidNode.createOldFile(fileSys, file1, 1, 8, 8192L);
|
|
||||||
LOG.info("testPurgeHar created test files");
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
|
|
||||||
// Wait till har is created.
|
|
||||||
while (true) {
|
|
||||||
try {
|
|
||||||
FileStatus[] listPaths = listPaths = fileSys.listStatus(destPath);
|
|
||||||
if (listPaths != null && listPaths.length == 1) {
|
|
||||||
FileStatus s = listPaths[0];
|
|
||||||
LOG.info("testPurgeHar found path " + s.getPath());
|
|
||||||
if (s.getPath().toString().endsWith(".har")) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
//ignore
|
|
||||||
}
|
|
||||||
Thread.sleep(1000); // keep waiting
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set an old timestamp.
|
|
||||||
fileSys.setTimes(file1, 0, 0);
|
|
||||||
|
|
||||||
boolean found = false;
|
|
||||||
FileStatus[] listPaths = null;
|
|
||||||
while (!found || listPaths == null || listPaths.length > 1) {
|
|
||||||
listPaths = fileSys.listStatus(destPath);
|
|
||||||
if (listPaths != null) {
|
|
||||||
for (FileStatus s: listPaths) {
|
|
||||||
LOG.info("testPurgeHar waiting for parity file to be recreated" +
|
|
||||||
" and har to be deleted found " + s.getPath());
|
|
||||||
if (s.getPath().toString().endsWith("file") &&
|
|
||||||
s.getModificationTime() == 0) {
|
|
||||||
found = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("testPurgeHar Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
|
||||||
fileSys.delete(dir, true);
|
|
||||||
fileSys.delete(destPath, true);
|
|
||||||
stopClusters();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create parity file, delete original file's directory and then validate that
|
|
||||||
* parity directory is automatically deleted.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testPurgeDirectory() throws Exception {
|
|
||||||
long stripeLength = 5;
|
|
||||||
long blockSize = 8192;
|
|
||||||
long targetReplication = 1;
|
|
||||||
long metaReplication = 1;
|
|
||||||
int numBlock = 9;
|
|
||||||
|
|
||||||
createClusters(true);
|
|
||||||
mySetup(targetReplication, metaReplication, stripeLength);
|
|
||||||
Path dir = new Path("/user/dhruba/raidtest/");
|
|
||||||
Path file1 = new Path(dir + "/file1");
|
|
||||||
RaidNode cnode = null;
|
|
||||||
try {
|
|
||||||
TestRaidNode.createOldFile(fileSys, file1, 1, numBlock, blockSize);
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
|
|
||||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
|
||||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
|
||||||
|
|
||||||
// delete original directory.
|
|
||||||
assertTrue("Unable to delete original directory " + file1 ,
|
|
||||||
fileSys.delete(file1.getParent(), true));
|
|
||||||
LOG.info("deleted file " + file1);
|
|
||||||
|
|
||||||
// wait till parity file and directory are automatically deleted
|
|
||||||
long start = Time.now();
|
|
||||||
while (fileSys.exists(destPath) &&
|
|
||||||
Time.now() - start < 120000) {
|
|
||||||
LOG.info("testPurgeDirectory waiting for parity files to be removed.");
|
|
||||||
Thread.sleep(1000); // keep waiting
|
|
||||||
}
|
|
||||||
assertFalse(fileSys.exists(destPath));
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("testPurgeDirectory Exception " + e +
|
|
||||||
StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
|
||||||
LOG.info("testPurgeDirectory delete file " + file1);
|
|
||||||
fileSys.delete(file1, true);
|
|
||||||
stopClusters();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test that an XOR parity file is removed when a RS parity file is detected.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testPurgePreference() throws Exception {
|
|
||||||
createClusters(true);
|
|
||||||
Path dir = new Path("/user/test/raidtest/");
|
|
||||||
Path file1 = new Path(dir + "/file1");
|
|
||||||
|
|
||||||
PolicyInfo infoXor = new PolicyInfo("testPurgePreference", conf);
|
|
||||||
infoXor.setSrcPath("/user/test/raidtest");
|
|
||||||
infoXor.setErasureCode("xor");
|
|
||||||
infoXor.setDescription("test policy");
|
|
||||||
infoXor.setProperty("targetReplication", "2");
|
|
||||||
infoXor.setProperty("metaReplication", "2");
|
|
||||||
|
|
||||||
PolicyInfo infoRs = new PolicyInfo("testPurgePreference", conf);
|
|
||||||
infoRs.setSrcPath("/user/test/raidtest");
|
|
||||||
infoRs.setErasureCode("rs");
|
|
||||||
infoRs.setDescription("test policy");
|
|
||||||
infoRs.setProperty("targetReplication", "1");
|
|
||||||
infoRs.setProperty("metaReplication", "1");
|
|
||||||
try {
|
|
||||||
TestRaidNode.createOldFile(fileSys, file1, 1, 9, 8192L);
|
|
||||||
FileStatus stat = fileSys.getFileStatus(file1);
|
|
||||||
|
|
||||||
// Create the parity files.
|
|
||||||
RaidNode.doRaid(
|
|
||||||
conf, infoXor, stat, new RaidNode.Statistics(), Reporter.NULL);
|
|
||||||
RaidNode.doRaid(
|
|
||||||
conf, infoRs, stat, new RaidNode.Statistics(), Reporter.NULL);
|
|
||||||
Path xorParity =
|
|
||||||
new Path(RaidNode.DEFAULT_RAID_LOCATION, "user/test/raidtest/file1");
|
|
||||||
Path rsParity =
|
|
||||||
new Path(RaidNode.DEFAULT_RAIDRS_LOCATION, "user/test/raidtest/file1");
|
|
||||||
assertTrue(fileSys.exists(xorParity));
|
|
||||||
assertTrue(fileSys.exists(rsParity));
|
|
||||||
|
|
||||||
// Check purge of a single parity file.
|
|
||||||
RaidNode cnode = RaidNode.createRaidNode(conf);
|
|
||||||
FileStatus raidRsStat =
|
|
||||||
fileSys.getFileStatus(new Path(RaidNode.DEFAULT_RAIDRS_LOCATION));
|
|
||||||
cnode.purgeMonitor.recursePurge(infoRs.getErasureCode(), fileSys, fileSys,
|
|
||||||
RaidNode.DEFAULT_RAIDRS_LOCATION, raidRsStat);
|
|
||||||
|
|
||||||
// Calling purge under the RS path has no effect.
|
|
||||||
assertTrue(fileSys.exists(xorParity));
|
|
||||||
assertTrue(fileSys.exists(rsParity));
|
|
||||||
|
|
||||||
FileStatus raidStat =
|
|
||||||
fileSys.getFileStatus(new Path(RaidNode.DEFAULT_RAID_LOCATION));
|
|
||||||
cnode.purgeMonitor.recursePurge(infoXor.getErasureCode(), fileSys, fileSys,
|
|
||||||
RaidNode.DEFAULT_RAID_LOCATION, raidStat);
|
|
||||||
// XOR parity must have been purged by now.
|
|
||||||
assertFalse(fileSys.exists(xorParity));
|
|
||||||
assertTrue(fileSys.exists(rsParity));
|
|
||||||
|
|
||||||
// Now check the purge of a parity har.
|
|
||||||
// Delete the RS parity for now.
|
|
||||||
fileSys.delete(rsParity);
|
|
||||||
// Recreate the XOR parity.
|
|
||||||
Path xorHar =
|
|
||||||
new Path(RaidNode.DEFAULT_RAID_LOCATION, "user/test/raidtest/raidtest" +
|
|
||||||
RaidNode.HAR_SUFFIX);
|
|
||||||
RaidNode.doRaid(
|
|
||||||
conf, infoXor, stat, new RaidNode.Statistics(), Reporter.NULL);
|
|
||||||
assertTrue(fileSys.exists(xorParity));
|
|
||||||
assertFalse(fileSys.exists(xorHar));
|
|
||||||
|
|
||||||
// Create the har.
|
|
||||||
long cutoff = Time.now();
|
|
||||||
cnode.recurseHar(infoXor, fileSys, raidStat,
|
|
||||||
RaidNode.DEFAULT_RAID_LOCATION, fileSys, cutoff,
|
|
||||||
RaidNode.tmpHarPathForCode(conf, infoXor.getErasureCode()));
|
|
||||||
|
|
||||||
// Call purge to get rid of the parity file. The har should remain.
|
|
||||||
cnode.purgeMonitor.recursePurge(infoXor.getErasureCode(), fileSys, fileSys,
|
|
||||||
RaidNode.DEFAULT_RAID_LOCATION, raidStat);
|
|
||||||
// XOR har should exist but xor parity file should have been purged.
|
|
||||||
assertFalse(fileSys.exists(xorParity));
|
|
||||||
assertTrue(fileSys.exists(xorHar));
|
|
||||||
|
|
||||||
// Now create the RS parity.
|
|
||||||
RaidNode.doRaid(
|
|
||||||
conf, infoRs, stat, new RaidNode.Statistics(), Reporter.NULL);
|
|
||||||
cnode.purgeMonitor.recursePurge(infoXor.getErasureCode(), fileSys, fileSys,
|
|
||||||
RaidNode.DEFAULT_RAID_LOCATION, raidStat);
|
|
||||||
// XOR har should get deleted.
|
|
||||||
assertTrue(fileSys.exists(rsParity));
|
|
||||||
assertFalse(fileSys.exists(xorParity));
|
|
||||||
assertFalse(fileSys.exists(xorHar));
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
stopClusters();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,267 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Random;
|
|
||||||
import java.util.zip.CRC32;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
|
||||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
|
|
||||||
public class TestRaidShell {
|
|
||||||
final static Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.TestRaidShell");
|
|
||||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
|
||||||
"target/test-data")).getAbsolutePath();
|
|
||||||
final static String CONFIG_FILE = new File(TEST_DIR,
|
|
||||||
"test-raid.xml").getAbsolutePath();
|
|
||||||
final static long RELOAD_INTERVAL = 1000;
|
|
||||||
final static int NUM_DATANODES = 3;
|
|
||||||
Configuration conf;
|
|
||||||
String namenode = null;
|
|
||||||
MiniDFSCluster dfs = null;
|
|
||||||
String hftp = null;
|
|
||||||
FileSystem fileSys = null;
|
|
||||||
RaidNode cnode = null;
|
|
||||||
Random rand = new Random();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a file with three stripes, corrupt a block each in two stripes,
|
|
||||||
* and wait for the the file to be fixed.
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testBlockFix() throws Exception {
|
|
||||||
LOG.info("Test testBlockFix started.");
|
|
||||||
long blockSize = 8192L;
|
|
||||||
int stripeLength = 3;
|
|
||||||
mySetup(stripeLength, -1);
|
|
||||||
Path file1 = new Path("/user/dhruba/raidtest/file1");
|
|
||||||
Path destPath = new Path("/destraid/user/dhruba/raidtest");
|
|
||||||
Path parityFile = new Path(destPath, "file1");
|
|
||||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
|
||||||
1, 7, blockSize);
|
|
||||||
long file1Len = fileSys.getFileStatus(file1).getLen();
|
|
||||||
LOG.info("Test testBlockFix created test files");
|
|
||||||
|
|
||||||
// create an instance of the RaidNode
|
|
||||||
Configuration localConf = new Configuration(conf);
|
|
||||||
localConf.set(RaidNode.RAID_LOCATION_KEY, "/destraid");
|
|
||||||
localConf.setInt("raid.blockfix.interval", 1000);
|
|
||||||
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
|
||||||
conf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
|
||||||
cnode = RaidNode.createRaidNode(null, localConf);
|
|
||||||
|
|
||||||
try {
|
|
||||||
TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
|
|
||||||
cnode.stop();
|
|
||||||
cnode.join();
|
|
||||||
cnode = null;
|
|
||||||
|
|
||||||
FileStatus srcStat = fileSys.getFileStatus(file1);
|
|
||||||
LocatedBlocks locations = RaidDFSUtil.getBlockLocations(
|
|
||||||
(DistributedFileSystem) fileSys, file1.toUri().getPath(),
|
|
||||||
0, srcStat.getLen());
|
|
||||||
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
|
||||||
|
|
||||||
// Corrupt blocks in different stripes. We can fix them.
|
|
||||||
int[] corruptBlockIdxs = new int[]{0, 4, 6};
|
|
||||||
for (int idx: corruptBlockIdxs) {
|
|
||||||
LOG.info("Corrupting block " + locations.get(idx).getBlock());
|
|
||||||
corruptBlock(locations.get(idx).getBlock());
|
|
||||||
}
|
|
||||||
TestBlockFixer.reportCorruptBlocks(fileSys, file1, corruptBlockIdxs,
|
|
||||||
srcStat.getBlockSize());
|
|
||||||
|
|
||||||
waitForCorruptBlocks(corruptBlockIdxs.length, dfs, file1);
|
|
||||||
|
|
||||||
// Create RaidShell and fix the file.
|
|
||||||
RaidShell shell = new RaidShell(conf);
|
|
||||||
String[] args = new String[2];
|
|
||||||
args[0] = "-recoverBlocks";
|
|
||||||
args[1] = file1.toUri().getPath();
|
|
||||||
ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
waitForCorruptBlocks(0, dfs, file1);
|
|
||||||
|
|
||||||
assertTrue(TestRaidDfs.validateFile(dfs, file1, file1Len, crc1));
|
|
||||||
|
|
||||||
// Now corrupt and fix the parity file.
|
|
||||||
FileStatus parityStat = fileSys.getFileStatus(parityFile);
|
|
||||||
long parityCrc = getCRC(fileSys, parityFile);
|
|
||||||
locations = RaidDFSUtil.getBlockLocations(
|
|
||||||
dfs, parityFile.toUri().getPath(), 0, parityStat.getLen());
|
|
||||||
corruptBlock(locations.get(0).getBlock());
|
|
||||||
TestBlockFixer.reportCorruptBlocks(fileSys, parityFile, new int[]{0},
|
|
||||||
srcStat.getBlockSize());
|
|
||||||
waitForCorruptBlocks(1, dfs, parityFile);
|
|
||||||
|
|
||||||
args[1] = parityFile.toUri().getPath();
|
|
||||||
ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
waitForCorruptBlocks(0, dfs, file1);
|
|
||||||
assertEquals(parityCrc, getCRC(fileSys, parityFile));
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.info("Test testBlockFix Exception " + e + StringUtils.stringifyException(e));
|
|
||||||
throw e;
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
LOG.info("Test testBlockFix completed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
private void waitForCorruptBlocks(
|
|
||||||
int numCorruptBlocks, DistributedFileSystem dfs, Path file)
|
|
||||||
throws Exception {
|
|
||||||
String path = file.toUri().getPath();
|
|
||||||
FileStatus stat = dfs.getFileStatus(file);
|
|
||||||
long start = Time.now();
|
|
||||||
long actual = 0;
|
|
||||||
do {
|
|
||||||
actual = RaidDFSUtil.corruptBlocksInFile(
|
|
||||||
dfs, path, 0, stat.getLen()).size();
|
|
||||||
if (actual == numCorruptBlocks) break;
|
|
||||||
if (Time.now() - start > 120000) break;
|
|
||||||
LOG.info("Waiting for " + numCorruptBlocks + " corrupt blocks in " +
|
|
||||||
path + ", found " + actual);
|
|
||||||
Thread.sleep(1000);
|
|
||||||
} while (true);
|
|
||||||
assertEquals(numCorruptBlocks, actual);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void mySetup(int stripeLength, int timeBeforeHar) throws Exception {
|
|
||||||
|
|
||||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
|
||||||
conf = new Configuration();
|
|
||||||
|
|
||||||
conf.set("raid.config.file", CONFIG_FILE);
|
|
||||||
conf.setBoolean("raid.config.reload", true);
|
|
||||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
|
||||||
|
|
||||||
// scan all policies once every 5 second
|
|
||||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
|
||||||
|
|
||||||
// make all deletions not go through Trash
|
|
||||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
|
||||||
|
|
||||||
// do not use map-reduce cluster for Raiding
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
|
||||||
conf.set("raid.server.address", "localhost:0");
|
|
||||||
conf.setInt("hdfs.raid.stripeLength", stripeLength);
|
|
||||||
conf.set("hdfs.raid.locations", "/destraid");
|
|
||||||
|
|
||||||
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
|
|
||||||
dfs.waitActive();
|
|
||||||
fileSys = dfs.getFileSystem();
|
|
||||||
namenode = fileSys.getUri().toString();
|
|
||||||
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
|
|
||||||
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
|
|
||||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
|
||||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
|
||||||
String str = "<configuration> " +
|
|
||||||
"<srcPath prefix=\"/user/dhruba/raidtest\"> " +
|
|
||||||
"<policy name = \"RaidTest1\"> " +
|
|
||||||
"<erasureCode>xor</erasureCode> " +
|
|
||||||
"<destPath> /destraid</destPath> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>targetReplication</name> " +
|
|
||||||
"<value>1</value> " +
|
|
||||||
"<description>after RAIDing, decrease the replication factor of a file to this value." +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>metaReplication</name> " +
|
|
||||||
"<value>1</value> " +
|
|
||||||
"<description> replication factor of parity file" +
|
|
||||||
"</description> " +
|
|
||||||
"</property> " +
|
|
||||||
"<property> " +
|
|
||||||
"<name>modTimePeriod</name> " +
|
|
||||||
"<value>2000</value> " +
|
|
||||||
"<description> time (milliseconds) after a file is modified to make it " +
|
|
||||||
"a candidate for RAIDing " +
|
|
||||||
"</description> " +
|
|
||||||
"</property> ";
|
|
||||||
if (timeBeforeHar >= 0) {
|
|
||||||
str +=
|
|
||||||
"<property> " +
|
|
||||||
"<name>time_before_har</name> " +
|
|
||||||
"<value>" + timeBeforeHar + "</value> " +
|
|
||||||
"<description> amount of time waited before har'ing parity files" +
|
|
||||||
"</description> " +
|
|
||||||
"</property> ";
|
|
||||||
}
|
|
||||||
|
|
||||||
str +=
|
|
||||||
"</policy>" +
|
|
||||||
"</srcPath>" +
|
|
||||||
"</configuration>";
|
|
||||||
fileWriter.write(str);
|
|
||||||
fileWriter.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void myTearDown() throws Exception {
|
|
||||||
if (cnode != null) { cnode.stop(); cnode.join(); }
|
|
||||||
if (dfs != null) { dfs.shutdown(); }
|
|
||||||
}
|
|
||||||
|
|
||||||
private long getCRC(FileSystem fs, Path p) throws IOException {
|
|
||||||
CRC32 crc = new CRC32();
|
|
||||||
FSDataInputStream stm = fs.open(p);
|
|
||||||
int b;
|
|
||||||
while ((b = stm.read())>=0) {
|
|
||||||
crc.update(b);
|
|
||||||
}
|
|
||||||
stm.close();
|
|
||||||
return crc.getValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
void corruptBlock(ExtendedBlock block) throws IOException {
|
|
||||||
assertTrue("Could not corrupt block",
|
|
||||||
dfs.corruptBlockOnDataNodes(block) > 0);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,724 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http:www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileWriter;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
|
||||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
|
||||||
import org.junit.After;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
|
|
||||||
public class TestRaidShellFsck {
|
|
||||||
final static Log LOG =
|
|
||||||
LogFactory.getLog("org.apache.hadoop.raid.TestRaidShellFsck");
|
|
||||||
final static String TEST_DIR =
|
|
||||||
new File(System.
|
|
||||||
getProperty("test.build.data", "target/test-data")).getAbsolutePath();
|
|
||||||
|
|
||||||
final static String CONFIG_FILE = new File(TEST_DIR, "test-raid.xml").
|
|
||||||
getAbsolutePath();
|
|
||||||
final static long RELOAD_INTERVAL = 1000;
|
|
||||||
final static int NUM_DATANODES = 4;
|
|
||||||
final static int STRIPE_BLOCKS = 3; // number of blocks per stripe
|
|
||||||
final static int FILE_BLOCKS = 6; // number of blocks that file consists of
|
|
||||||
final static short REPL = 1; // replication factor before raiding
|
|
||||||
final static long BLOCK_SIZE = 8192L; // size of block in byte
|
|
||||||
final static String DIR_PATH = "/user/pkling/raidtest";
|
|
||||||
final static Path FILE_PATH0 =
|
|
||||||
new Path("/user/pkling/raidtest/raidfsck.test");
|
|
||||||
final static Path FILE_PATH1 =
|
|
||||||
new Path("/user/pkling/raidtest/raidfsck2.test");
|
|
||||||
final static Path RAID_PATH = new Path("/destraid/user/pkling/raidtest");
|
|
||||||
final static String HAR_NAME = "raidtest_raid.har";
|
|
||||||
final static String RAID_DIR = "/destraid";
|
|
||||||
|
|
||||||
Configuration conf = null;
|
|
||||||
Configuration raidConf = null;
|
|
||||||
Configuration clientConf = null;
|
|
||||||
MiniDFSCluster cluster = null;
|
|
||||||
DistributedFileSystem dfs = null;
|
|
||||||
RaidNode rnode = null;
|
|
||||||
|
|
||||||
|
|
||||||
RaidShell shell = null;
|
|
||||||
String[] args = null;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* creates a MiniDFS instance with a raided file in it
|
|
||||||
*/
|
|
||||||
private void setUp(boolean doHar) throws IOException, ClassNotFoundException {
|
|
||||||
|
|
||||||
final int timeBeforeHar;
|
|
||||||
if (doHar) {
|
|
||||||
timeBeforeHar = 0;
|
|
||||||
} else {
|
|
||||||
timeBeforeHar = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
|
||||||
conf = new Configuration();
|
|
||||||
|
|
||||||
conf.set("raid.config.file", CONFIG_FILE);
|
|
||||||
conf.setBoolean("raid.config.reload", true);
|
|
||||||
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
|
|
||||||
|
|
||||||
// scan all policies once every 5 second
|
|
||||||
conf.setLong("raid.policy.rescan.interval", 5000);
|
|
||||||
|
|
||||||
// make all deletions not go through Trash
|
|
||||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
|
||||||
|
|
||||||
// do not use map-reduce cluster for Raiding
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
|
||||||
// use local block fixer
|
|
||||||
conf.set("raid.blockfix.classname",
|
|
||||||
"org.apache.hadoop.raid.LocalBlockFixer");
|
|
||||||
|
|
||||||
conf.set("raid.server.address", "localhost:0");
|
|
||||||
conf.setInt("hdfs.raid.stripeLength", STRIPE_BLOCKS);
|
|
||||||
conf.set("hdfs.raid.locations", RAID_DIR);
|
|
||||||
|
|
||||||
conf.setInt("dfs.corruptfilesreturned.max", 500);
|
|
||||||
|
|
||||||
conf.setBoolean("dfs.permissions", false);
|
|
||||||
|
|
||||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES)
|
|
||||||
.build();
|
|
||||||
cluster.waitActive();
|
|
||||||
dfs = (DistributedFileSystem) cluster.getFileSystem();
|
|
||||||
String namenode = dfs.getUri().toString();
|
|
||||||
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
|
|
||||||
FileWriter fileWriter = new FileWriter(CONFIG_FILE);
|
|
||||||
fileWriter.write("<?xml version=\"1.0\"?>\n");
|
|
||||||
String str =
|
|
||||||
"<configuration> " +
|
|
||||||
" <srcPath prefix=\"" + DIR_PATH + "\"> " +
|
|
||||||
" <policy name = \"RaidTest1\"> " +
|
|
||||||
" <erasureCode>xor</erasureCode> " +
|
|
||||||
" <destPath> " + RAID_DIR + " </destPath> " +
|
|
||||||
" <property> " +
|
|
||||||
" <name>targetReplication</name> " +
|
|
||||||
" <value>1</value> " +
|
|
||||||
" <description>after RAIDing, decrease the replication " +
|
|
||||||
"factor of a file to this value.</description> " +
|
|
||||||
" </property> " +
|
|
||||||
" <property> " +
|
|
||||||
" <name>metaReplication</name> " +
|
|
||||||
" <value>1</value> " +
|
|
||||||
" <description> replication factor of parity file</description> " +
|
|
||||||
" </property> " +
|
|
||||||
" <property> " +
|
|
||||||
" <name>modTimePeriod</name> " +
|
|
||||||
" <value>2000</value> " +
|
|
||||||
" <description>time (milliseconds) after a file is modified " +
|
|
||||||
"to make it a candidate for RAIDing</description> " +
|
|
||||||
" </property> ";
|
|
||||||
|
|
||||||
if (timeBeforeHar >= 0) {
|
|
||||||
str +=
|
|
||||||
" <property> " +
|
|
||||||
" <name>time_before_har</name> " +
|
|
||||||
" <value>" + timeBeforeHar + "</value> " +
|
|
||||||
" <description> amount of time waited before har'ing parity " +
|
|
||||||
"files</description> " +
|
|
||||||
" </property> ";
|
|
||||||
}
|
|
||||||
|
|
||||||
str +=
|
|
||||||
" </policy>" +
|
|
||||||
" </srcPath>" +
|
|
||||||
"</configuration>";
|
|
||||||
|
|
||||||
fileWriter.write(str);
|
|
||||||
fileWriter.close();
|
|
||||||
|
|
||||||
createTestFile(FILE_PATH0);
|
|
||||||
createTestFile(FILE_PATH1);
|
|
||||||
|
|
||||||
Path[] filePaths = { FILE_PATH0, FILE_PATH1 };
|
|
||||||
raidTestFiles(RAID_PATH, filePaths, doHar);
|
|
||||||
|
|
||||||
clientConf = new Configuration(raidConf);
|
|
||||||
clientConf.set("fs.hdfs.impl",
|
|
||||||
"org.apache.hadoop.hdfs.DistributedRaidFileSystem");
|
|
||||||
clientConf.set("fs.raid.underlyingfs.impl",
|
|
||||||
"org.apache.hadoop.hdfs.DistributedFileSystem");
|
|
||||||
|
|
||||||
// prepare shell and arguments
|
|
||||||
shell = new RaidShell(clientConf);
|
|
||||||
args = new String[2];
|
|
||||||
args[0] = "-fsck";
|
|
||||||
args[1] = DIR_PATH;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates test file consisting of random data
|
|
||||||
*/
|
|
||||||
private void createTestFile(Path filePath) throws IOException {
|
|
||||||
Random rand = new Random();
|
|
||||||
FSDataOutputStream stm = dfs.create(filePath, true,
|
|
||||||
conf.getInt("io.file.buffer.size",
|
|
||||||
4096), REPL, BLOCK_SIZE);
|
|
||||||
|
|
||||||
final byte[] b = new byte[(int) BLOCK_SIZE];
|
|
||||||
for (int i = 0; i < FILE_BLOCKS; i++) {
|
|
||||||
rand.nextBytes(b);
|
|
||||||
stm.write(b);
|
|
||||||
}
|
|
||||||
stm.close();
|
|
||||||
LOG.info("test file created");
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* raids test file
|
|
||||||
*/
|
|
||||||
private void raidTestFiles(Path raidPath, Path[] filePaths, boolean doHar)
|
|
||||||
throws IOException, ClassNotFoundException {
|
|
||||||
// create RaidNode
|
|
||||||
raidConf = new Configuration(conf);
|
|
||||||
raidConf.set(RaidNode.RAID_LOCATION_KEY, RAID_DIR);
|
|
||||||
raidConf.setInt("raid.blockfix.interval", 1000);
|
|
||||||
raidConf.setLong("har.block.size", BLOCK_SIZE * 3);
|
|
||||||
// the RaidNode does the raiding inline (instead of submitting to MR node)
|
|
||||||
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
|
|
||||||
rnode = RaidNode.createRaidNode(null, raidConf);
|
|
||||||
|
|
||||||
for (Path filePath: filePaths) {
|
|
||||||
long waitStart = Time.now();
|
|
||||||
boolean raided = false;
|
|
||||||
|
|
||||||
Path parityFilePath = new Path(RAID_DIR,
|
|
||||||
filePath.toString().substring(1));
|
|
||||||
|
|
||||||
while (!raided) {
|
|
||||||
try {
|
|
||||||
FileStatus[] listPaths = dfs.listStatus(raidPath);
|
|
||||||
if (listPaths != null) {
|
|
||||||
if (doHar) {
|
|
||||||
// case with HAR
|
|
||||||
for (FileStatus f: listPaths) {
|
|
||||||
if (f.getPath().toString().endsWith(".har")) {
|
|
||||||
// check if the parity file is in the index
|
|
||||||
final Path indexPath = new Path(f.getPath(), "_index");
|
|
||||||
final FileStatus indexFileStatus =
|
|
||||||
dfs.getFileStatus(indexPath);
|
|
||||||
final HarIndex harIndex =
|
|
||||||
new HarIndex(dfs.open(indexPath), indexFileStatus.getLen());
|
|
||||||
final HarIndex.IndexEntry indexEntry =
|
|
||||||
harIndex.findEntryByFileName(parityFilePath.toString());
|
|
||||||
if (indexEntry != null) {
|
|
||||||
LOG.info("raid file " + parityFilePath.toString() +
|
|
||||||
" found in Har archive: " +
|
|
||||||
f.getPath().toString() +
|
|
||||||
" ts=" + indexEntry.mtime);
|
|
||||||
raided = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
// case without HAR
|
|
||||||
for (FileStatus f : listPaths) {
|
|
||||||
Path found = new Path(f.getPath().toUri().getPath());
|
|
||||||
if (parityFilePath.equals(found)) {
|
|
||||||
LOG.info("raid file found: " + f.getPath().toString());
|
|
||||||
raided = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (FileNotFoundException ignore) {
|
|
||||||
}
|
|
||||||
if (!raided) {
|
|
||||||
if (Time.now() > waitStart + 40000L) {
|
|
||||||
LOG.error("parity file not created after 40s");
|
|
||||||
throw new IOException("parity file not HARed after 40s");
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
Thread.sleep(1000);
|
|
||||||
} catch (InterruptedException ignore) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rnode.stop();
|
|
||||||
rnode.join();
|
|
||||||
rnode = null;
|
|
||||||
LOG.info("test file raided");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* sleeps for up to 20s until the number of corrupt files
|
|
||||||
* in the file system is equal to the number specified
|
|
||||||
*/
|
|
||||||
private void waitUntilCorruptFileCount(DistributedFileSystem dfs,
|
|
||||||
int corruptFiles)
|
|
||||||
throws IOException {
|
|
||||||
long waitStart = Time.now();
|
|
||||||
while (RaidDFSUtil.getCorruptFiles(dfs).length != corruptFiles) {
|
|
||||||
try {
|
|
||||||
Thread.sleep(1000);
|
|
||||||
} catch (InterruptedException ignore) {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Time.now() > waitStart + 20000L) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int corruptFilesFound = RaidDFSUtil.getCorruptFiles(dfs).length;
|
|
||||||
if (corruptFilesFound != corruptFiles) {
|
|
||||||
throw new IOException("expected " + corruptFiles +
|
|
||||||
" corrupt files but got " +
|
|
||||||
corruptFilesFound);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* removes a specified block from MiniDFS storage and reports it as corrupt
|
|
||||||
*/
|
|
||||||
private void removeAndReportBlock(DistributedFileSystem blockDfs,
|
|
||||||
Path filePath,
|
|
||||||
LocatedBlock block)
|
|
||||||
throws IOException {
|
|
||||||
TestRaidDfs.corruptBlock(cluster, filePath, block.getBlock(), NUM_DATANODES, true);
|
|
||||||
|
|
||||||
// report deleted block to the name node
|
|
||||||
LocatedBlock[] toReport = { block };
|
|
||||||
blockDfs.getClient().getNamenode().reportBadBlocks(toReport);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* removes a file block in the specified stripe
|
|
||||||
*/
|
|
||||||
private void removeFileBlock(Path filePath, int stripe, int blockInStripe)
|
|
||||||
throws IOException {
|
|
||||||
LocatedBlocks fileBlocks = dfs.getClient().getNamenode().
|
|
||||||
getBlockLocations(filePath.toString(), 0, FILE_BLOCKS * BLOCK_SIZE);
|
|
||||||
if (fileBlocks.locatedBlockCount() != FILE_BLOCKS) {
|
|
||||||
throw new IOException("expected " + FILE_BLOCKS +
|
|
||||||
" file blocks but found " +
|
|
||||||
fileBlocks.locatedBlockCount());
|
|
||||||
}
|
|
||||||
if (blockInStripe >= STRIPE_BLOCKS) {
|
|
||||||
throw new IOException("blockInStripe is " + blockInStripe +
|
|
||||||
" but must be smaller than " + STRIPE_BLOCKS);
|
|
||||||
}
|
|
||||||
LocatedBlock block = fileBlocks.get(stripe * STRIPE_BLOCKS + blockInStripe);
|
|
||||||
removeAndReportBlock(dfs, filePath, block);
|
|
||||||
LOG.info("removed file " + filePath.toString() + " block " +
|
|
||||||
stripe * STRIPE_BLOCKS + " in stripe " + stripe);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* removes a parity block in the specified stripe
|
|
||||||
*/
|
|
||||||
private void removeParityBlock(Path filePath, int stripe) throws IOException {
|
|
||||||
// find parity file
|
|
||||||
Path destPath = new Path(RAID_DIR);
|
|
||||||
RaidNode.ParityFilePair ppair = null;
|
|
||||||
|
|
||||||
ppair = RaidNode.getParityFile(destPath, filePath, conf);
|
|
||||||
String parityPathStr = ppair.getPath().toUri().getPath();
|
|
||||||
LOG.info("parity path: " + parityPathStr);
|
|
||||||
FileSystem parityFS = ppair.getFileSystem();
|
|
||||||
if (!(parityFS instanceof DistributedFileSystem)) {
|
|
||||||
throw new IOException("parity file is not on distributed file system");
|
|
||||||
}
|
|
||||||
DistributedFileSystem parityDFS = (DistributedFileSystem) parityFS;
|
|
||||||
|
|
||||||
|
|
||||||
// now corrupt the block corresponding to the stripe selected
|
|
||||||
FileStatus parityFileStatus =
|
|
||||||
parityDFS.getFileStatus(new Path(parityPathStr));
|
|
||||||
long parityBlockSize = parityFileStatus.getBlockSize();
|
|
||||||
long parityFileLength = parityFileStatus.getLen();
|
|
||||||
long parityFileLengthInBlocks = (parityFileLength / parityBlockSize) +
|
|
||||||
(((parityFileLength % parityBlockSize) == 0) ? 0L : 1L);
|
|
||||||
if (parityFileLengthInBlocks <= stripe) {
|
|
||||||
throw new IOException("selected stripe " + stripe +
|
|
||||||
" but parity file only has " +
|
|
||||||
parityFileLengthInBlocks + " blocks");
|
|
||||||
}
|
|
||||||
if (parityBlockSize != BLOCK_SIZE) {
|
|
||||||
throw new IOException("file block size is " + BLOCK_SIZE +
|
|
||||||
" but parity file block size is " +
|
|
||||||
parityBlockSize);
|
|
||||||
}
|
|
||||||
LocatedBlocks parityFileBlocks = parityDFS.getClient().getNamenode().
|
|
||||||
getBlockLocations(parityPathStr, 0, parityFileLength);
|
|
||||||
if (parityFileBlocks.locatedBlockCount() != parityFileLengthInBlocks) {
|
|
||||||
throw new IOException("expected " + parityFileLengthInBlocks +
|
|
||||||
" parity file blocks but got " +
|
|
||||||
parityFileBlocks.locatedBlockCount() +
|
|
||||||
" blocks");
|
|
||||||
}
|
|
||||||
LocatedBlock parityFileBlock = parityFileBlocks.get(stripe);
|
|
||||||
removeAndReportBlock(parityDFS, new Path(parityPathStr), parityFileBlock);
|
|
||||||
LOG.info("removed parity file block/stripe " + stripe +
|
|
||||||
" for " + filePath.toString());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* removes a block from the har part file
|
|
||||||
*/
|
|
||||||
private void removeHarParityBlock(int block) throws IOException {
|
|
||||||
Path harPath = new Path(RAID_PATH, HAR_NAME);
|
|
||||||
FileStatus [] listPaths = dfs.listStatus(harPath);
|
|
||||||
|
|
||||||
boolean deleted = false;
|
|
||||||
|
|
||||||
for (FileStatus f: listPaths) {
|
|
||||||
if (f.getPath().getName().startsWith("part-")) {
|
|
||||||
final Path partPath = new Path(f.getPath().toUri().getPath());
|
|
||||||
final LocatedBlocks partBlocks = dfs.getClient().getNamenode().
|
|
||||||
getBlockLocations(partPath.toString(),
|
|
||||||
0,
|
|
||||||
f.getLen());
|
|
||||||
|
|
||||||
if (partBlocks.locatedBlockCount() <= block) {
|
|
||||||
throw new IOException("invalid har block " + block);
|
|
||||||
}
|
|
||||||
|
|
||||||
final LocatedBlock partBlock = partBlocks.get(block);
|
|
||||||
removeAndReportBlock(dfs, partPath, partBlock);
|
|
||||||
LOG.info("removed block " + block + "/" +
|
|
||||||
partBlocks.locatedBlockCount() +
|
|
||||||
" of file " + partPath.toString() +
|
|
||||||
" block size " + partBlock.getBlockSize());
|
|
||||||
deleted = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!deleted) {
|
|
||||||
throw new IOException("cannot find part file in " + harPath.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks fsck with no missing blocks
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testClean() throws Exception {
|
|
||||||
LOG.info("testClean");
|
|
||||||
setUp(false);
|
|
||||||
int result = ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 0, but returns " +
|
|
||||||
Integer.toString(result), result == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks fsck with missing block in file block but not in parity block
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testFileBlockMissing() throws Exception {
|
|
||||||
LOG.info("testFileBlockMissing");
|
|
||||||
setUp(false);
|
|
||||||
waitUntilCorruptFileCount(dfs, 0);
|
|
||||||
removeFileBlock(FILE_PATH0, 0, 0);
|
|
||||||
waitUntilCorruptFileCount(dfs, 1);
|
|
||||||
|
|
||||||
int result = ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 0, but returns " +
|
|
||||||
Integer.toString(result), result == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks fsck with missing block in parity block but not in file block
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testParityBlockMissing() throws Exception {
|
|
||||||
LOG.info("testParityBlockMissing");
|
|
||||||
setUp(false);
|
|
||||||
waitUntilCorruptFileCount(dfs, 0);
|
|
||||||
removeParityBlock(FILE_PATH0, 0);
|
|
||||||
waitUntilCorruptFileCount(dfs, 1);
|
|
||||||
|
|
||||||
int result = ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 0, but returns " +
|
|
||||||
Integer.toString(result), result == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks fsck with missing block in both file block and parity block
|
|
||||||
* in different stripes
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testFileBlockAndParityBlockMissingInDifferentStripes()
|
|
||||||
throws Exception {
|
|
||||||
LOG.info("testFileBlockAndParityBlockMissingInDifferentStripes");
|
|
||||||
setUp(false);
|
|
||||||
waitUntilCorruptFileCount(dfs, 0);
|
|
||||||
removeFileBlock(FILE_PATH0, 0, 0);
|
|
||||||
waitUntilCorruptFileCount(dfs, 1);
|
|
||||||
removeParityBlock(FILE_PATH0, 1);
|
|
||||||
waitUntilCorruptFileCount(dfs, 2);
|
|
||||||
|
|
||||||
int result = ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 0, but returns " +
|
|
||||||
Integer.toString(result), result == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks fsck with missing block in both file block and parity block
|
|
||||||
* in same stripe
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testFileBlockAndParityBlockMissingInSameStripe()
|
|
||||||
throws Exception {
|
|
||||||
LOG.info("testFileBlockAndParityBlockMissingInSameStripe");
|
|
||||||
setUp(false);
|
|
||||||
waitUntilCorruptFileCount(dfs, 0);
|
|
||||||
removeParityBlock(FILE_PATH0, 1);
|
|
||||||
waitUntilCorruptFileCount(dfs, 1);
|
|
||||||
removeFileBlock(FILE_PATH0, 1, 0);
|
|
||||||
waitUntilCorruptFileCount(dfs, 2);
|
|
||||||
|
|
||||||
int result = ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 1, but returns " +
|
|
||||||
Integer.toString(result), result == 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks fsck with two missing file blocks in same stripe
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void test2FileBlocksMissingInSameStripe()
|
|
||||||
throws Exception {
|
|
||||||
LOG.info("test2FileBlocksMissingInSameStripe");
|
|
||||||
setUp(false);
|
|
||||||
waitUntilCorruptFileCount(dfs, 0);
|
|
||||||
removeFileBlock(FILE_PATH0, 1, 1);
|
|
||||||
waitUntilCorruptFileCount(dfs, 1);
|
|
||||||
removeFileBlock(FILE_PATH0, 1, 0);
|
|
||||||
waitUntilCorruptFileCount(dfs, 1);
|
|
||||||
|
|
||||||
int result = ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 1, but returns " +
|
|
||||||
Integer.toString(result), result == 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks fsck with two missing file blocks in different stripes
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void test2FileBlocksMissingInDifferentStripes()
|
|
||||||
throws Exception {
|
|
||||||
LOG.info("test2FileBlocksMissingInDifferentStripes");
|
|
||||||
setUp(false);
|
|
||||||
waitUntilCorruptFileCount(dfs, 0);
|
|
||||||
removeFileBlock(FILE_PATH0, 1, 1);
|
|
||||||
waitUntilCorruptFileCount(dfs, 1);
|
|
||||||
removeFileBlock(FILE_PATH0, 0, 0);
|
|
||||||
waitUntilCorruptFileCount(dfs, 1);
|
|
||||||
|
|
||||||
int result = ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 0, but returns " +
|
|
||||||
Integer.toString(result), result == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks fsck with file block missing (HAR)
|
|
||||||
* use 2 files to verify HAR offset logic in RaidShell fsck
|
|
||||||
* both files have one corrupt block, parity blocks are clean
|
|
||||||
*
|
|
||||||
* parity blocks in har (file.stripe):
|
|
||||||
* +-----+-----+-----+ +-----+
|
|
||||||
* | 0.0 | 0.1 | 1.0 | | 1.1 |
|
|
||||||
* +-----+-----+-----+ +-----+
|
|
||||||
* 0 1
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testFileBlockMissingHar()
|
|
||||||
throws Exception {
|
|
||||||
LOG.info("testFileBlockMissingHar");
|
|
||||||
setUp(true);
|
|
||||||
waitUntilCorruptFileCount(dfs, 0);
|
|
||||||
removeFileBlock(FILE_PATH0, 1, 1);
|
|
||||||
removeFileBlock(FILE_PATH1, 1, 1);
|
|
||||||
waitUntilCorruptFileCount(dfs, 2);
|
|
||||||
|
|
||||||
int result = ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 0, but returns " +
|
|
||||||
Integer.toString(result), result == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks fsck with file block missing (HAR)
|
|
||||||
* use 2 files to verify HAR offset logic in RaidShell fsck
|
|
||||||
*
|
|
||||||
* parity blocks in har (file.stripe):
|
|
||||||
* +-----+-----+-----+ +-----+
|
|
||||||
* | 0.0 | 0.1 | 1.0 | | 1.1 |
|
|
||||||
* +-----+-----+-----+ +-----+
|
|
||||||
* 0 1
|
|
||||||
*
|
|
||||||
* corrupt file 0, stripe 0 file block 0
|
|
||||||
* corrupt file 0, stripe 1 file block 0
|
|
||||||
* corrupt file 1, stripe 0 file block 0
|
|
||||||
* corrupt file 1, stripe 1 file block 0
|
|
||||||
* corrupt har block 0
|
|
||||||
* both files should be corrupt
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testFileBlockAndParityBlockMissingHar1()
|
|
||||||
throws Exception {
|
|
||||||
LOG.info("testFileBlockAndParityBlockMissingHar1");
|
|
||||||
setUp(true);
|
|
||||||
waitUntilCorruptFileCount(dfs, 0);
|
|
||||||
removeFileBlock(FILE_PATH0, 0, 0);
|
|
||||||
removeFileBlock(FILE_PATH0, 1, 0);
|
|
||||||
removeFileBlock(FILE_PATH1, 0, 0);
|
|
||||||
removeFileBlock(FILE_PATH1, 1, 0);
|
|
||||||
removeHarParityBlock(0);
|
|
||||||
waitUntilCorruptFileCount(dfs, 3);
|
|
||||||
|
|
||||||
int result = ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 2, but returns " +
|
|
||||||
Integer.toString(result), result == 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks fsck with file block missing (HAR)
|
|
||||||
* use 2 files to verify HAR offset logic in RaidShell fsck
|
|
||||||
*
|
|
||||||
* parity blocks in har (file.stripe):
|
|
||||||
* +-----+-----+-----+ +-----+
|
|
||||||
* | 0.0 | 0.1 | 1.0 | | 1.1 |
|
|
||||||
* +-----+-----+-----+ +-----+
|
|
||||||
* 0 1
|
|
||||||
*
|
|
||||||
* corrupt file 0, stripe 0 file block 0
|
|
||||||
* corrupt file 0, stripe 1 file block 0
|
|
||||||
* corrupt file 1, stripe 0 file block 0
|
|
||||||
* corrupt file 1, stripe 1 file block 0
|
|
||||||
* corrupt har block 1
|
|
||||||
* only file 2 should be corrupt
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testFileBlockAndParityBlockMissingHar2()
|
|
||||||
throws Exception {
|
|
||||||
LOG.info("testFileBlockAndParityBlockMissingHar2");
|
|
||||||
setUp(true);
|
|
||||||
waitUntilCorruptFileCount(dfs, 0);
|
|
||||||
removeFileBlock(FILE_PATH0, 0, 0);
|
|
||||||
removeFileBlock(FILE_PATH0, 1, 0);
|
|
||||||
removeFileBlock(FILE_PATH1, 0, 0);
|
|
||||||
removeFileBlock(FILE_PATH1, 1, 0);
|
|
||||||
removeHarParityBlock(1);
|
|
||||||
waitUntilCorruptFileCount(dfs, 3);
|
|
||||||
|
|
||||||
int result = ToolRunner.run(shell, args);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 1, but returns " +
|
|
||||||
Integer.toString(result), result == 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* checks that fsck does not report corrupt file that is not in
|
|
||||||
* the specified path
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testPathFilter()
|
|
||||||
throws Exception {
|
|
||||||
LOG.info("testPathFilter");
|
|
||||||
setUp(false);
|
|
||||||
waitUntilCorruptFileCount(dfs, 0);
|
|
||||||
removeParityBlock(FILE_PATH0, 1);
|
|
||||||
waitUntilCorruptFileCount(dfs, 1);
|
|
||||||
removeFileBlock(FILE_PATH0, 1, 0);
|
|
||||||
waitUntilCorruptFileCount(dfs, 2);
|
|
||||||
|
|
||||||
String[] otherArgs = new String[2];
|
|
||||||
otherArgs[0] = "-fsck";
|
|
||||||
otherArgs[1] = "/user/pkling/other";
|
|
||||||
int result = ToolRunner.run(shell, otherArgs);
|
|
||||||
|
|
||||||
assertTrue("fsck should return 0, but returns " +
|
|
||||||
Integer.toString(result), result == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@After
|
|
||||||
public void tearDown() throws Exception {
|
|
||||||
if (rnode != null) {
|
|
||||||
rnode.stop();
|
|
||||||
rnode.join();
|
|
||||||
rnode = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cluster != null) {
|
|
||||||
cluster.shutdown();
|
|
||||||
cluster = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
dfs = null;
|
|
||||||
|
|
||||||
LOG.info("Test cluster shut down");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,135 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.hdfs.RaidDFSUtil;
|
|
||||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
|
||||||
import org.apache.hadoop.mapred.Reporter;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
|
|
||||||
public class TestReedSolomonDecoder {
|
|
||||||
final static Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.TestReedSolomonDecoder");
|
|
||||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
|
||||||
"target/test-data")).getAbsolutePath();
|
|
||||||
final static int NUM_DATANODES = 3;
|
|
||||||
|
|
||||||
Configuration conf;
|
|
||||||
MiniDFSCluster dfs = null;
|
|
||||||
FileSystem fileSys = null;
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testDecoder() throws Exception {
|
|
||||||
mySetup();
|
|
||||||
int stripeSize = 10;
|
|
||||||
int paritySize = 4;
|
|
||||||
long blockSize = 8192;
|
|
||||||
Path file1 = new Path("/user/raidtest/file1");
|
|
||||||
Path recoveredFile1 = new Path("/user/raidtest/file1.recovered");
|
|
||||||
Path parityFile1 = new Path("/rsraid/user/raidtest/file1");
|
|
||||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
|
||||||
1, 25, blockSize);
|
|
||||||
FileStatus file1Stat = fileSys.getFileStatus(file1);
|
|
||||||
|
|
||||||
conf.setInt("raid.rsdecoder.bufsize", 512);
|
|
||||||
conf.setInt("raid.rsencoder.bufsize", 512);
|
|
||||||
|
|
||||||
try {
|
|
||||||
// First encode the file.
|
|
||||||
ReedSolomonEncoder encoder = new ReedSolomonEncoder(
|
|
||||||
conf, stripeSize, paritySize);
|
|
||||||
short parityRepl = 1;
|
|
||||||
encoder.encodeFile(fileSys, file1, fileSys, parityFile1, parityRepl,
|
|
||||||
Reporter.NULL);
|
|
||||||
|
|
||||||
// Ensure there are no corrupt files yet.
|
|
||||||
DistributedFileSystem dfs = (DistributedFileSystem)fileSys;
|
|
||||||
String[] corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals(corruptFiles.length, 0);
|
|
||||||
|
|
||||||
// Now corrupt the file.
|
|
||||||
long corruptOffset = blockSize * 5;
|
|
||||||
FileStatus srcStat = fileSys.getFileStatus(file1);
|
|
||||||
LocatedBlocks locations = RaidDFSUtil.getBlockLocations(dfs,
|
|
||||||
file1.toUri().getPath(), 0, srcStat.getLen());
|
|
||||||
corruptBlock(locations.get(5).getBlock());
|
|
||||||
corruptBlock(locations.get(6).getBlock());
|
|
||||||
TestBlockFixer.reportCorruptBlocks(dfs, file1, new int[]{5, 6},
|
|
||||||
srcStat.getBlockSize());
|
|
||||||
|
|
||||||
// Ensure file is corrupted.
|
|
||||||
corruptFiles = RaidDFSUtil.getCorruptFiles(dfs);
|
|
||||||
assertEquals(corruptFiles.length, 1);
|
|
||||||
assertEquals(corruptFiles[0], file1.toString());
|
|
||||||
|
|
||||||
// Fix the file.
|
|
||||||
ReedSolomonDecoder decoder = new ReedSolomonDecoder(
|
|
||||||
conf, stripeSize, paritySize);
|
|
||||||
decoder.decodeFile(fileSys, file1, fileSys, parityFile1,
|
|
||||||
corruptOffset, recoveredFile1);
|
|
||||||
assertTrue(TestRaidDfs.validateFile(
|
|
||||||
fileSys, recoveredFile1, file1Stat.getLen(), crc1));
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void corruptBlock(ExtendedBlock block) throws IOException {
|
|
||||||
assertTrue("Could not corrupt block",
|
|
||||||
dfs.corruptBlockOnDataNodes(block) > 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void mySetup() throws Exception {
|
|
||||||
|
|
||||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
|
||||||
conf = new Configuration();
|
|
||||||
|
|
||||||
// make all deletions not go through Trash
|
|
||||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
|
||||||
|
|
||||||
conf.setBoolean("dfs.permissions", false);
|
|
||||||
|
|
||||||
dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
|
|
||||||
dfs.waitActive();
|
|
||||||
fileSys = dfs.getFileSystem();
|
|
||||||
String namenode = fileSys.getUri().toString();
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void myTearDown() throws Exception {
|
|
||||||
if (dfs != null) { dfs.shutdown(); }
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,94 +0,0 @@
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hadoop.raid;
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
|
||||||
import org.apache.hadoop.hdfs.TestRaidDfs;
|
|
||||||
import org.apache.hadoop.mapred.Reporter;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
|
|
||||||
public class TestReedSolomonEncoder {
|
|
||||||
final static Log LOG = LogFactory.getLog(
|
|
||||||
"org.apache.hadoop.raid.TestReedSolomonEncoder");
|
|
||||||
final static String TEST_DIR = new File(System.getProperty("test.build.data",
|
|
||||||
"target/test-data")).getAbsolutePath();
|
|
||||||
final static int NUM_DATANODES = 3;
|
|
||||||
|
|
||||||
Configuration conf;
|
|
||||||
String namenode = null;
|
|
||||||
MiniDFSCluster dfs = null;
|
|
||||||
FileSystem fileSys = null;
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEncoder() throws Exception {
|
|
||||||
mySetup();
|
|
||||||
int stripeSize = 10;
|
|
||||||
int paritySize = 4;
|
|
||||||
long blockSize = 8192;
|
|
||||||
Path file1 = new Path("/user/raidtest/file1");
|
|
||||||
Path parityFile1 = new Path("/rsraid/user/raidtest/file1");
|
|
||||||
long crc1 = TestRaidDfs.createTestFilePartialLastBlock(fileSys, file1,
|
|
||||||
1, 25, blockSize);
|
|
||||||
try {
|
|
||||||
ReedSolomonEncoder encoder = new ReedSolomonEncoder(
|
|
||||||
conf, stripeSize, paritySize);
|
|
||||||
short parityRepl = 1;
|
|
||||||
encoder.encodeFile(fileSys, file1, fileSys, parityFile1, parityRepl,
|
|
||||||
Reporter.NULL);
|
|
||||||
|
|
||||||
FileStatus parityStat = fileSys.getFileStatus(parityFile1);
|
|
||||||
assertEquals(4*8192*3, parityStat.getLen());
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
myTearDown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void mySetup() throws Exception {
|
|
||||||
|
|
||||||
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
|
|
||||||
conf = new Configuration();
|
|
||||||
|
|
||||||
// make all deletions not go through Trash
|
|
||||||
conf.set("fs.shell.delete.classname", "org.apache.hadoop.hdfs.DFSClient");
|
|
||||||
|
|
||||||
dfs = new MiniDFSCluster(conf, NUM_DATANODES, true, null);
|
|
||||||
dfs.waitActive();
|
|
||||||
fileSys = dfs.getFileSystem();
|
|
||||||
namenode = fileSys.getUri().toString();
|
|
||||||
|
|
||||||
FileSystem.setDefaultUri(conf, namenode);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void myTearDown() throws Exception {
|
|
||||||
if (dfs != null) { dfs.shutdown(); }
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -34,7 +34,6 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
<module>hadoop-hdfs</module>
|
<module>hadoop-hdfs</module>
|
||||||
<module>hadoop-hdfs-httpfs</module>
|
<module>hadoop-hdfs-httpfs</module>
|
||||||
<module>hadoop-hdfs/src/contrib/bkjournal</module>
|
<module>hadoop-hdfs/src/contrib/bkjournal</module>
|
||||||
<module>hadoop-hdfs-raid</module>
|
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|
|
@ -1711,10 +1711,6 @@
|
||||||
output="${build.dir.eclipse-contrib-classes}/gridmix/main" />
|
output="${build.dir.eclipse-contrib-classes}/gridmix/main" />
|
||||||
<source path="${contrib.dir}/gridmix/src/test"
|
<source path="${contrib.dir}/gridmix/src/test"
|
||||||
output="${build.dir.eclipse-contrib-classes}/gridmix/test" />
|
output="${build.dir.eclipse-contrib-classes}/gridmix/test" />
|
||||||
<source path="${contrib.dir}/raid/src/java"
|
|
||||||
output="${build.dir.eclipse-contrib-classes}/raid/main" />
|
|
||||||
<source path="${contrib.dir}/raid/src/test"
|
|
||||||
output="${build.dir.eclipse-contrib-classes}/raid/test" />
|
|
||||||
<source path="${contrib.dir}/vaidya/src/java"
|
<source path="${contrib.dir}/vaidya/src/java"
|
||||||
output="${build.dir.eclipse-contrib-classes}/vaidya/main" />
|
output="${build.dir.eclipse-contrib-classes}/vaidya/main" />
|
||||||
<source path="${contrib.dir}/vertica/src/java"
|
<source path="${contrib.dir}/vertica/src/java"
|
||||||
|
|
|
@ -62,7 +62,6 @@
|
||||||
<fileset dir="." includes="streaming/build.xml"/>
|
<fileset dir="." includes="streaming/build.xml"/>
|
||||||
<fileset dir="." includes="gridmix/build.xml"/>
|
<fileset dir="." includes="gridmix/build.xml"/>
|
||||||
<fileset dir="." includes="vertica/build.xml"/>
|
<fileset dir="." includes="vertica/build.xml"/>
|
||||||
<fileset dir="." includes="raid/build.xml"/>
|
|
||||||
</subant>
|
</subant>
|
||||||
<available file="${build.contrib.dir}/testsfailed" property="testsfailed"/>
|
<available file="${build.contrib.dir}/testsfailed" property="testsfailed"/>
|
||||||
<fail if="testsfailed">Tests failed!</fail>
|
<fail if="testsfailed">Tests failed!</fail>
|
||||||
|
|
|
@ -1,201 +0,0 @@
|
||||||
# Copyright 2008 The Apache Software Foundation Licensed under the
|
|
||||||
# Apache License, Version 2.0 (the "License"); you may not use this
|
|
||||||
# file except in compliance with the License. You may obtain a copy
|
|
||||||
# of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless
|
|
||||||
# required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
# implied. See the License for the specific language governing
|
|
||||||
# permissions and limitations under the License.
|
|
||||||
|
|
||||||
This package implements a Distributed Raid File System. It is used alongwith
|
|
||||||
an instance of the Hadoop Distributed File System (HDFS). It can be used to
|
|
||||||
provide better protection against data corruption. It can also be used to
|
|
||||||
reduce the total storage requirements of HDFS.
|
|
||||||
|
|
||||||
Distributed Raid File System consists of two main software components. The first component
|
|
||||||
is the RaidNode, a daemon that creates parity files from specified HDFS files.
|
|
||||||
The second component "raidfs" is a software that is layered over a HDFS client and it
|
|
||||||
intercepts all calls that an application makes to the HDFS client. If HDFS encounters
|
|
||||||
corrupted data while reading a file, the raidfs client detects it; it uses the
|
|
||||||
relevant parity blocks to recover the corrupted data (if possible) and returns
|
|
||||||
the data to the application. The application is completely transparent to the
|
|
||||||
fact that parity data was used to satisfy it's read request.
|
|
||||||
|
|
||||||
The primary use of this feature is to save disk space for HDFS files.
|
|
||||||
HDFS typically stores data in triplicate.
|
|
||||||
The Distributed Raid File System can be configured in such a way that a set of
|
|
||||||
data blocks of a file are combined together to form one or more parity blocks.
|
|
||||||
This allows one to reduce the replication factor of a HDFS file from 3 to 2
|
|
||||||
while keeping the failure probabilty relatively same as before. This typically
|
|
||||||
results in saving 25% to 30% of storage space in a HDFS cluster.
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
BUILDING:
|
|
||||||
|
|
||||||
In HADOOP_PREFIX, run ant package to build Hadoop and its contrib packages.
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
INSTALLING and CONFIGURING:
|
|
||||||
|
|
||||||
The entire code is packaged in the form of a single jar file hadoop-*-raid.jar.
|
|
||||||
To use HDFS Raid, you need to put the above mentioned jar file on
|
|
||||||
the CLASSPATH. The easiest way is to copy the hadoop-*-raid.jar
|
|
||||||
from HADOOP_PREFIX/build/contrib/raid to HADOOP_PREFIX/lib. Alternatively
|
|
||||||
you can modify HADOOP_CLASSPATH to include this jar, in conf/hadoop-env.sh.
|
|
||||||
|
|
||||||
There is a single configuration file named raid.xml that describes the HDFS
|
|
||||||
path(s) that you want to raid. A sample of this file can be found in
|
|
||||||
sc/contrib/raid/conf/raid.xml. Please edit the entries in this file to list the
|
|
||||||
path(s) that you want to raid. Then, edit the hdfs-site.xml file for
|
|
||||||
your installation to include a reference to this raid.xml. You can add the
|
|
||||||
following to your hdfs-site.xml
|
|
||||||
<property>
|
|
||||||
<name>raid.config.file</name>
|
|
||||||
<value>/mnt/hdfs/DFS/conf/raid.xml</value>
|
|
||||||
<description>This is needed by the RaidNode </description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
Please add an entry to your hdfs-site.xml to enable hdfs clients to use the
|
|
||||||
parity bits to recover corrupted data.
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>fs.hdfs.impl</name>
|
|
||||||
<value>org.apache.hadoop.dfs.DistributedRaidFileSystem</value>
|
|
||||||
<description>The FileSystem for hdfs: uris.</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
OPTIONAL CONFIGIURATION:
|
|
||||||
|
|
||||||
The following properties can be set in hdfs-site.xml to further tune you configuration:
|
|
||||||
|
|
||||||
Specifies the location where parity files are located.
|
|
||||||
<property>
|
|
||||||
<name>hdfs.raid.locations</name>
|
|
||||||
<value>hdfs://newdfs.data:8000/raid</value>
|
|
||||||
<description>The location for parity files. If this is
|
|
||||||
is not defined, then defaults to /raid.
|
|
||||||
</descrition>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
Specify the parity stripe length
|
|
||||||
<property>
|
|
||||||
<name>hdfs.raid.stripeLength</name>
|
|
||||||
<value>10</value>
|
|
||||||
<description>The number of blocks in a file to be combined into
|
|
||||||
a single raid parity block. The default value is 5. The lower
|
|
||||||
the number the greater is the disk space you will save when you
|
|
||||||
enable raid.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
Specify the size of HAR part-files
|
|
||||||
<property>
|
|
||||||
<name>raid.har.partfile.size</name>
|
|
||||||
<value>4294967296</value>
|
|
||||||
<description>The size of HAR part files that store raid parity
|
|
||||||
files. The default is 4GB. The higher the number the fewer the
|
|
||||||
number of files used to store the HAR archive.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
Specify which implementation of RaidNode to use.
|
|
||||||
<property>
|
|
||||||
<name>raid.classname</name>
|
|
||||||
<value>org.apache.hadoop.raid.DistRaidNode</value>
|
|
||||||
<description>Specify which implementation of RaidNode to use
|
|
||||||
(class name).
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
|
|
||||||
Specify the periodicy at which the RaidNode re-calculates (if necessary)
|
|
||||||
the parity blocks
|
|
||||||
<property>
|
|
||||||
<name>raid.policy.rescan.interval</name>
|
|
||||||
<value>5000</value>
|
|
||||||
<description>Specify the periodicity in milliseconds after which
|
|
||||||
all source paths are rescanned and parity blocks recomputed if
|
|
||||||
necessary. By default, this value is 1 hour.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
By default, the DistributedRaidFileSystem assumes that the underlying file
|
|
||||||
system is the DistributedFileSystem. If you want to layer the DistributedRaidFileSystem
|
|
||||||
over some other file system, then define a property named fs.raid.underlyingfs.impl
|
|
||||||
that specifies the name of the underlying class. For example, if you want to layer
|
|
||||||
The DistributedRaidFileSystem over an instance of the NewFileSystem, then
|
|
||||||
<property>
|
|
||||||
<name>fs.raid.underlyingfs.impl</name>
|
|
||||||
<value>org.apche.hadoop.new.NewFileSystem</value>
|
|
||||||
<description>Specify the filesystem that is layered immediately below the
|
|
||||||
DistributedRaidFileSystem. By default, this value is DistributedFileSystem.
|
|
||||||
</description>
|
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
ADMINISTRATION:
|
|
||||||
|
|
||||||
The Distributed Raid File System provides support for administration at runtime without
|
|
||||||
any downtime to cluster services. It is possible to add/delete new paths to be raided without
|
|
||||||
interrupting any load on the cluster. If you change raid.xml, its contents will be
|
|
||||||
reload within seconds and the new contents will take effect immediately.
|
|
||||||
|
|
||||||
Designate one machine in your cluster to run the RaidNode software. You can run this daemon
|
|
||||||
on any machine irrespective of whether that machine is running any other hadoop daemon or not.
|
|
||||||
You can start the RaidNode by running the following on the selected machine:
|
|
||||||
nohup $HADOOP_PREFIX/bin/hadoop org.apache.hadoop.raid.RaidNode >> /xxx/logs/hadoop-root-raidnode-hadoop.xxx.com.log &
|
|
||||||
|
|
||||||
Optionally, we provide two scripts to start and stop the RaidNode. Copy the scripts
|
|
||||||
start-raidnode.sh and stop-raidnode.sh to the directory $HADOOP_PREFIX/bin in the machine
|
|
||||||
you would like to deploy the daemon. You can start or stop the RaidNode by directly
|
|
||||||
callying the scripts from that machine. If you want to deploy the RaidNode remotely,
|
|
||||||
copy start-raidnode-remote.sh and stop-raidnode-remote.sh to $HADOOP_PREFIX/bin at
|
|
||||||
the machine from which you want to trigger the remote deployment and create a text
|
|
||||||
file $HADOOP_PREFIX/conf/raidnode at the same machine containing the name of the server
|
|
||||||
where the RaidNode should run. These scripts run ssh to the specified machine and
|
|
||||||
invoke start/stop-raidnode.sh there. As an example, you might want to change
|
|
||||||
start-mapred.sh in the JobTracker machine so that it automatically calls
|
|
||||||
start-raidnode-remote.sh (and do the equivalent thing for stop-mapred.sh and
|
|
||||||
stop-raidnode-remote.sh).
|
|
||||||
|
|
||||||
To validate the integrity of a file system, run RaidFSCK as follows:
|
|
||||||
$HADOOP_PREFIX/bin/hadoop org.apache.hadoop.raid.RaidShell -fsck [path]
|
|
||||||
|
|
||||||
This will print a list of corrupt files (i.e., files which have lost too many
|
|
||||||
blocks and can no longer be fixed by Raid).
|
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
IMPLEMENTATION:
|
|
||||||
|
|
||||||
The RaidNode periodically scans all the specified paths in the configuration
|
|
||||||
file. For each path, it recursively scans all files that have more than 2 blocks
|
|
||||||
and that has not been modified during the last few hours (default is 24 hours).
|
|
||||||
It picks the specified number of blocks (as specified by the stripe size),
|
|
||||||
from the file, generates a parity block by combining them and
|
|
||||||
stores the results as another HDFS file in the specified destination
|
|
||||||
directory. There is a one-to-one mapping between a HDFS
|
|
||||||
file and its parity file. The RaidNode also periodically finds parity files
|
|
||||||
that are orphaned and deletes them.
|
|
||||||
|
|
||||||
The Distributed Raid FileSystem is layered over a DistributedFileSystem
|
|
||||||
instance intercepts all calls that go into HDFS. HDFS throws a ChecksumException
|
|
||||||
or a BlocMissingException when a file read encounters bad data. The layered
|
|
||||||
Distributed Raid FileSystem catches these exceptions, locates the corresponding
|
|
||||||
parity file, extract the original data from the parity files and feeds the
|
|
||||||
extracted data back to the application in a completely transparent way.
|
|
||||||
|
|
||||||
The layered Distributed Raid FileSystem does not fix the data-loss that it
|
|
||||||
encounters while serving data. It merely make the application transparently
|
|
||||||
use the parity blocks to re-create the original data. A command line tool
|
|
||||||
"fsckraid" is currently under development that will fix the corrupted files
|
|
||||||
by extracting the data from the associated parity files. An adminstrator
|
|
||||||
can run "fsckraid" manually as and when needed.
|
|
|
@ -1,64 +0,0 @@
|
||||||
<?xml version="1.0"?>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!--
|
|
||||||
Before you can run these subtargets directly, you need
|
|
||||||
to call at top-level: ant deploy-contrib compile-core-test
|
|
||||||
-->
|
|
||||||
<project name="raid" default="jar">
|
|
||||||
|
|
||||||
<import file="../build-contrib.xml"/>
|
|
||||||
|
|
||||||
<!-- the unit test classpath -->
|
|
||||||
<path id="contrib.classpath.raid">
|
|
||||||
<pathelement location="${hadoop.root}/src/contrib/raid/lib"/>
|
|
||||||
<path refid="contrib-classpath"/>
|
|
||||||
</path>
|
|
||||||
|
|
||||||
<target name="test" depends="compile,compile-test,test-junit" description="Automated Test Framework" if="test.available"/>
|
|
||||||
|
|
||||||
<target name="test-junit" depends="compile,compile-test" if="test.available">
|
|
||||||
<junit maxmemory="512m" showoutput="${test.output}" fork="yes" printsummary="yes" errorProperty="tests.failed"
|
|
||||||
haltonfailure="no" failureProperty="tests.failed" timeout="${test.timeout}">
|
|
||||||
|
|
||||||
<classpath refid="test.classpath"/>
|
|
||||||
<sysproperty key="test.build.data" value="${build.test}/data"/>
|
|
||||||
<sysproperty key="build.test" value="${build.test}"/>
|
|
||||||
<sysproperty key="user.dir" value="${build.test}/data"/>
|
|
||||||
<sysproperty key="fs.default.name" value="${fs.default.name}"/>
|
|
||||||
<sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
|
|
||||||
<sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/>
|
|
||||||
<sysproperty key="test.src.dir" value="${test.src.dir}"/>
|
|
||||||
<formatter type="${test.junit.output.format}" />
|
|
||||||
<batchtest todir="${build.test}" unless="testcase">
|
|
||||||
<fileset dir="${src.test}">
|
|
||||||
<include name="**/Test*.java"/>
|
|
||||||
</fileset>
|
|
||||||
</batchtest>
|
|
||||||
<batchtest todir="${build.test}" if="testcase">
|
|
||||||
<fileset dir="${src.test}">
|
|
||||||
<include name="**/${testcase}.java"/>
|
|
||||||
</fileset>
|
|
||||||
</batchtest>
|
|
||||||
</junit>
|
|
||||||
<fail if="tests.failed">Tests failed!</fail>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
</project>
|
|
||||||
|
|
|
@ -1,145 +0,0 @@
|
||||||
<?xml version="1.0" ?>
|
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
<ivy-module version="1.0" xmlns:m="http://ant.apache.org/ivy/maven">
|
|
||||||
<info organisation="org.apache.hadoop" module="${ant.project.name}">
|
|
||||||
<license name="Apache 2.0"/>
|
|
||||||
<description>Rumen</description>
|
|
||||||
</info>
|
|
||||||
<configurations defaultconfmapping="default">
|
|
||||||
<!--these match the Maven configurations-->
|
|
||||||
<conf name="default" extends="master,runtime"/>
|
|
||||||
<conf name="master" description="contains the artifact but no dependencies"/>
|
|
||||||
<conf name="runtime" description="runtime but not the artifact" />
|
|
||||||
|
|
||||||
<conf name="common" visibility="private" extends="runtime"
|
|
||||||
description="artifacts needed to compile/test the application"/>
|
|
||||||
<conf name="test" visibility="private" extends="runtime"/>
|
|
||||||
</configurations>
|
|
||||||
|
|
||||||
<publications>
|
|
||||||
<!--get the artifact from our module name-->
|
|
||||||
<artifact conf="master"/>
|
|
||||||
</publications>
|
|
||||||
<dependencies>
|
|
||||||
<dependency org="org.apache.hadoop" name="hadoop-annotations" rev="${hadoop-common.version}" conf="common->default"/>
|
|
||||||
<dependency org="org.apache.hadoop"
|
|
||||||
name="hadoop-common"
|
|
||||||
rev="${hadoop-common.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
<dependency org="org.apache.hadoop"
|
|
||||||
name="hadoop-common"
|
|
||||||
rev="${hadoop-common.version}"
|
|
||||||
conf="test->default">
|
|
||||||
<artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests"/>
|
|
||||||
</dependency>
|
|
||||||
<dependency org="org.apache.hadoop"
|
|
||||||
name="hadoop-hdfs"
|
|
||||||
rev="${hadoop-hdfs.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
<dependency org="org.apache.hadoop"
|
|
||||||
name="hadoop-hdfs"
|
|
||||||
rev="${hadoop-hdfs.version}"
|
|
||||||
conf="test->default">
|
|
||||||
<artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests"/>
|
|
||||||
</dependency>
|
|
||||||
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
|
|
||||||
rev="${yarn.version}" conf="common->default"/>
|
|
||||||
<dependency org="org.apache.hadoop" name="hadoop-yarn-common"
|
|
||||||
rev="${yarn.version}" conf="common->default"/>
|
|
||||||
<dependency org="org.apache.hadoop" name="hadoop-archives"
|
|
||||||
rev="${hadoop-common.version}" conf="common->default"/>
|
|
||||||
|
|
||||||
<dependency org="commons-logging"
|
|
||||||
name="commons-logging"
|
|
||||||
rev="${commons-logging.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
<dependency org="log4j"
|
|
||||||
name="log4j"
|
|
||||||
rev="${log4j.version}"
|
|
||||||
conf="common->master"/>
|
|
||||||
<dependency org="junit"
|
|
||||||
name="junit"
|
|
||||||
rev="${junit.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
|
|
||||||
<!-- necessary for Mini*Clusters -->
|
|
||||||
<dependency org="commons-httpclient"
|
|
||||||
name="commons-httpclient"
|
|
||||||
rev="${commons-httpclient.version}"
|
|
||||||
conf="common->master"/>
|
|
||||||
<dependency org="commons-codec"
|
|
||||||
name="commons-codec"
|
|
||||||
rev="${commons-codec.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
<dependency org="commons-net"
|
|
||||||
name="commons-net"
|
|
||||||
rev="${commons-net.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
<dependency org="org.mortbay.jetty"
|
|
||||||
name="jetty"
|
|
||||||
rev="${jetty.version}"
|
|
||||||
conf="common->master"/>
|
|
||||||
<dependency org="org.mortbay.jetty"
|
|
||||||
name="jetty-util"
|
|
||||||
rev="${jetty-util.version}"
|
|
||||||
conf="common->master"/>
|
|
||||||
<dependency org="org.mortbay.jetty"
|
|
||||||
name="jsp-api-2.1"
|
|
||||||
rev="${jetty.version}"
|
|
||||||
conf="common->master"/>
|
|
||||||
<dependency org="org.mortbay.jetty"
|
|
||||||
name="jsp-2.1"
|
|
||||||
rev="${jetty.version}"
|
|
||||||
conf="common->master"/>
|
|
||||||
<dependency org="org.mortbay.jetty"
|
|
||||||
name="servlet-api-2.5"
|
|
||||||
rev="${servlet-api-2.5.version}"
|
|
||||||
conf="common->master"/>
|
|
||||||
<dependency org="commons-cli"
|
|
||||||
name="commons-cli"
|
|
||||||
rev="${commons-cli.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
<dependency org="org.apache.avro"
|
|
||||||
name="avro"
|
|
||||||
rev="${avro.version}"
|
|
||||||
conf="common->default">
|
|
||||||
<exclude module="ant"/>
|
|
||||||
<exclude module="jetty"/>
|
|
||||||
<exclude module="slf4j-simple"/>
|
|
||||||
</dependency>
|
|
||||||
<dependency org="org.codehaus.jackson"
|
|
||||||
name="jackson-mapper-asl"
|
|
||||||
rev="${jackson.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
<dependency org="org.codehaus.jackson"
|
|
||||||
name="jackson-core-asl"
|
|
||||||
rev="${jackson.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
<dependency org="com.thoughtworks.paranamer"
|
|
||||||
name="paranamer"
|
|
||||||
rev="${paranamer.version}"
|
|
||||||
conf="common->default"/>
|
|
||||||
|
|
||||||
<!-- Exclusions for transitive dependencies pulled in by log4j -->
|
|
||||||
<exclude org="com.sun.jdmk"/>
|
|
||||||
<exclude org="com.sun.jmx"/>
|
|
||||||
<exclude org="javax.jms"/>
|
|
||||||
<exclude org="javax.mail"/>
|
|
||||||
|
|
||||||
</dependencies>
|
|
||||||
</ivy-module>
|
|
|
@ -1,18 +0,0 @@
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
|
||||||
# this work for additional information regarding copyright ownership.
|
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
# (the "License"); you may not use this file except in compliance with
|
|
||||||
# the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
#This properties file lists the versions of the various artifacts used by hadoop.
|
|
||||||
#It drives ivy and the generation of a maven POM
|
|
||||||
#These are the versions of our dependencies (in alphabetical order)
|
|
|
@ -257,11 +257,6 @@
|
||||||
<artifactId>hadoop-client</artifactId>
|
<artifactId>hadoop-client</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-hdfs-raid</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
|
Loading…
Reference in New Issue