HDFS-15340. RBF: Implement BalanceProcedureScheduler basic framework. Contributed by Jinglun.
This commit is contained in:
parent
0b7799bf6e
commit
1983eea62d
|
@ -0,0 +1,361 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import org.apache.commons.lang3.builder.EqualsBuilder;
|
||||||
|
import org.apache.commons.lang3.builder.HashCodeBuilder;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.io.Writable;
|
||||||
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Job is a state machine consists of many procedures. The procedures are
|
||||||
|
* executed as a chain. Each procedure needs to specify the next procedure. If
|
||||||
|
* there is no next procedure then the job is finished.
|
||||||
|
*/
|
||||||
|
public final class BalanceJob<T extends BalanceProcedure> implements Writable {
|
||||||
|
private String id;
|
||||||
|
private BalanceProcedureScheduler scheduler;
|
||||||
|
private volatile boolean jobDone = false;
|
||||||
|
private Exception error;
|
||||||
|
public static final Logger LOG = LoggerFactory.getLogger(BalanceJob.class);
|
||||||
|
private Map<String, T> procedureTable = new HashMap<>();
|
||||||
|
private T firstProcedure;
|
||||||
|
private T curProcedure;
|
||||||
|
private T lastProcedure;
|
||||||
|
private boolean removeAfterDone;
|
||||||
|
|
||||||
|
static final String NEXT_PROCEDURE_NONE = "NONE";
|
||||||
|
private static Set<String> reservedNames = new HashSet<>();
|
||||||
|
|
||||||
|
static {
|
||||||
|
reservedNames.add(NEXT_PROCEDURE_NONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder<T extends BalanceProcedure> {
|
||||||
|
|
||||||
|
private List<T> procedures = new ArrayList<>();
|
||||||
|
private boolean removeAfterDone = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append a procedure to the tail.
|
||||||
|
*/
|
||||||
|
public Builder nextProcedure(T procedure) {
|
||||||
|
int size = procedures.size();
|
||||||
|
if (size > 0) {
|
||||||
|
procedures.get(size - 1).setNextProcedure(procedure.name());
|
||||||
|
}
|
||||||
|
procedure.setNextProcedure(NEXT_PROCEDURE_NONE);
|
||||||
|
procedures.add(procedure);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Automatically remove this job from the scheduler cache when the job is
|
||||||
|
* done.
|
||||||
|
*/
|
||||||
|
public Builder removeAfterDone(boolean remove) {
|
||||||
|
removeAfterDone = remove;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BalanceJob build() throws IOException {
|
||||||
|
BalanceJob job = new BalanceJob(procedures, removeAfterDone);
|
||||||
|
for (BalanceProcedure<T> p : procedures) {
|
||||||
|
p.setJob(job);
|
||||||
|
}
|
||||||
|
return job;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private BalanceJob(Iterable<T> procedures, boolean remove)
|
||||||
|
throws IOException {
|
||||||
|
for (T p : procedures) {
|
||||||
|
String taskName = p.name();
|
||||||
|
if (reservedNames.contains(taskName)) {
|
||||||
|
throw new IOException(taskName + " is reserved.");
|
||||||
|
}
|
||||||
|
procedureTable.put(p.name(), p);
|
||||||
|
if (firstProcedure == null) {
|
||||||
|
firstProcedure = p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
removeAfterDone = remove;
|
||||||
|
lastProcedure = null;
|
||||||
|
curProcedure = firstProcedure;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the state machine.
|
||||||
|
*/
|
||||||
|
public void execute() {
|
||||||
|
boolean quit = false;
|
||||||
|
try {
|
||||||
|
while (!jobDone && !quit && scheduler.isRunning()) {
|
||||||
|
if (curProcedure == null) { // Job done.
|
||||||
|
finish(null);
|
||||||
|
quit = true;
|
||||||
|
} else {
|
||||||
|
if (curProcedure == firstProcedure || lastProcedure != curProcedure) {
|
||||||
|
LOG.info("Start procedure {}, last procedure is {}",
|
||||||
|
curProcedure.name(),
|
||||||
|
lastProcedure == null ? null : lastProcedure.name());
|
||||||
|
}
|
||||||
|
if (curProcedure.execute()) {
|
||||||
|
lastProcedure = curProcedure;
|
||||||
|
curProcedure = next();
|
||||||
|
}
|
||||||
|
if (!scheduler.writeJournal(this)) {
|
||||||
|
quit = true; // Write journal failed. Simply quit because this job
|
||||||
|
// has already been added to the recoverQueue.
|
||||||
|
LOG.debug("Write journal failed. Quit and wait for recovery.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (BalanceProcedure.RetryException tre) {
|
||||||
|
scheduler.delay(this, curProcedure.delayMillisBeforeRetry());
|
||||||
|
} catch (Exception e) {
|
||||||
|
finish(e);
|
||||||
|
} catch (Throwable t) {
|
||||||
|
IOException err = new IOException("Got throwable error.", t);
|
||||||
|
finish(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private T next() {
|
||||||
|
if (curProcedure == null) {
|
||||||
|
return firstProcedure;
|
||||||
|
} else {
|
||||||
|
return procedureTable.get(curProcedure.nextProcedure());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Job finishes. It could be either success or failure.
|
||||||
|
* @param exception the exception that causes the job to fail. null indicates
|
||||||
|
* the job is successful.
|
||||||
|
*/
|
||||||
|
private synchronized void finish(Exception exception) {
|
||||||
|
assert !jobDone;
|
||||||
|
if (scheduler.jobDone(this)) {
|
||||||
|
jobDone = true;
|
||||||
|
error = exception;
|
||||||
|
notifyAll();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void setScheduler(BalanceProcedureScheduler scheduler) {
|
||||||
|
this.scheduler = scheduler;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the uid of the job.
|
||||||
|
*/
|
||||||
|
public String getId() {
|
||||||
|
return this.id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether this job should be removed after it's done.
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
public boolean shouldRemoveAfterDone() {
|
||||||
|
return removeAfterDone;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void setLastProcedure(T lastProcedure) {
|
||||||
|
this.lastProcedure = lastProcedure;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void setCurrentProcedure(T currentProcedure) {
|
||||||
|
this.curProcedure = currentProcedure;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return true if the job has finished.
|
||||||
|
*/
|
||||||
|
public boolean isJobDone() {
|
||||||
|
return jobDone;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait until the job is done.
|
||||||
|
*/
|
||||||
|
public synchronized void waitJobDone() throws InterruptedException {
|
||||||
|
while (!jobDone) {
|
||||||
|
wait();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the error exception during the job execution. This should be called
|
||||||
|
* after the job finishes.
|
||||||
|
*/
|
||||||
|
public Exception getError() {
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
if (id == null) {
|
||||||
|
throw new IOException("BalanceJob with id=null can not be serialized.");
|
||||||
|
}
|
||||||
|
Text.writeString(out, id);
|
||||||
|
int taskTableSize = procedureTable.size();
|
||||||
|
out.writeInt(taskTableSize);
|
||||||
|
for (T p : procedureTable.values()) {
|
||||||
|
Text.writeString(out, p.getClass().getName());
|
||||||
|
p.write(out);
|
||||||
|
}
|
||||||
|
if (firstProcedure != null) {
|
||||||
|
Text.writeString(out, firstProcedure.name());
|
||||||
|
} else {
|
||||||
|
Text.writeString(out, NEXT_PROCEDURE_NONE);
|
||||||
|
}
|
||||||
|
if (curProcedure != null) {
|
||||||
|
Text.writeString(out, curProcedure.name());
|
||||||
|
} else {
|
||||||
|
Text.writeString(out, NEXT_PROCEDURE_NONE);
|
||||||
|
}
|
||||||
|
if (lastProcedure != null) {
|
||||||
|
Text.writeString(out, lastProcedure.name());
|
||||||
|
} else {
|
||||||
|
Text.writeString(out, NEXT_PROCEDURE_NONE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
this.id = Text.readString(in);
|
||||||
|
procedureTable = new HashMap<>();
|
||||||
|
int taskTableSize = in.readInt();
|
||||||
|
for (int i = 0; i < taskTableSize; i++) {
|
||||||
|
String className = Text.readString(in);
|
||||||
|
try {
|
||||||
|
T p = (T) ReflectionUtils.newInstance(Class.forName(className), null);
|
||||||
|
p.readFields(in);
|
||||||
|
procedureTable.put(p.name(), p);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Failed reading Procedure.", e);
|
||||||
|
throw new IOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String firstProcedureName = Text.readString(in);
|
||||||
|
if (firstProcedureName.equals(NEXT_PROCEDURE_NONE)) {
|
||||||
|
firstProcedure = null;
|
||||||
|
} else {
|
||||||
|
firstProcedure = procedureTable.get(firstProcedureName);
|
||||||
|
}
|
||||||
|
String currentProcedureName = Text.readString(in);
|
||||||
|
if (currentProcedureName.equals(NEXT_PROCEDURE_NONE)) {
|
||||||
|
curProcedure = null;
|
||||||
|
} else {
|
||||||
|
curProcedure = procedureTable.get(currentProcedureName);
|
||||||
|
}
|
||||||
|
String lastProcedureName = Text.readString(in);
|
||||||
|
if (lastProcedureName.equals(NEXT_PROCEDURE_NONE)) {
|
||||||
|
lastProcedure = null;
|
||||||
|
} else {
|
||||||
|
lastProcedure = procedureTable.get(lastProcedureName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (obj == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (obj == this) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (obj.getClass() != getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
BalanceJob bj = (BalanceJob) obj;
|
||||||
|
return new EqualsBuilder()
|
||||||
|
.append(id, bj.id)
|
||||||
|
.append(procedureTable, bj.procedureTable)
|
||||||
|
.append(firstProcedure, bj.firstProcedure)
|
||||||
|
.isEquals();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return new HashCodeBuilder(17, 37)
|
||||||
|
.append(id)
|
||||||
|
.append(procedureTable)
|
||||||
|
.toHashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "{jobId=" + id + "}";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the detail description of this job.
|
||||||
|
*/
|
||||||
|
public String getDetailMessage() {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append("id=").append(id);
|
||||||
|
if (firstProcedure != null) {
|
||||||
|
builder.append(",firstProcedure=").append(firstProcedure);
|
||||||
|
}
|
||||||
|
if (curProcedure != null) {
|
||||||
|
builder.append(",currentProcedure=").append(curProcedure);
|
||||||
|
}
|
||||||
|
builder.append(",jobDone=").append(jobDone);
|
||||||
|
if (error != null) {
|
||||||
|
builder.append(",error=").append(error.getMessage());
|
||||||
|
}
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isSchedulerShutdown() {
|
||||||
|
return !scheduler.isRunning();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
Map<String, T> getProcedureTable() {
|
||||||
|
return procedureTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
T getCurProcedure() {
|
||||||
|
return curProcedure;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configurable;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Journal of the state machine. It handles the job persistence and recover.
|
||||||
|
*/
|
||||||
|
public interface BalanceJournal extends Configurable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save journal of this job.
|
||||||
|
*/
|
||||||
|
void saveJob(BalanceJob job) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recover the job from journal.
|
||||||
|
*/
|
||||||
|
void recoverJob(BalanceJob job) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List all unfinished jobs.
|
||||||
|
*/
|
||||||
|
BalanceJob[] listAllJobs() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear all the journals of this job.
|
||||||
|
*/
|
||||||
|
void clear(BalanceJob job) throws IOException;
|
||||||
|
}
|
|
@ -0,0 +1,203 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.util.SequentialNumber;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.DataOutputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.procedure.BalanceProcedureConfigKeys.SCHEDULER_JOURNAL_URI;
|
||||||
|
import static org.apache.hadoop.hdfs.procedure.BalanceProcedureConfigKeys.TMP_TAIL;
|
||||||
|
import static org.apache.hadoop.hdfs.procedure.BalanceProcedureConfigKeys.JOB_PREFIX;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* BalanceJournal based on HDFS. This class stores all the journals in the HDFS.
|
||||||
|
* The jobs are persisted into the HDFS and recovered from the HDFS.
|
||||||
|
*/
|
||||||
|
public class BalanceJournalInfoHDFS implements BalanceJournal {
|
||||||
|
|
||||||
|
public static final Logger LOG = LoggerFactory.getLogger(
|
||||||
|
BalanceJournalInfoHDFS.class);
|
||||||
|
|
||||||
|
public static class IdGenerator extends SequentialNumber {
|
||||||
|
protected IdGenerator(long initialValue) {
|
||||||
|
super(initialValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private URI workUri;
|
||||||
|
private Configuration conf;
|
||||||
|
private IdGenerator generator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save job journal to HDFS.
|
||||||
|
*
|
||||||
|
* All the journals are saved in the path base-dir. Each job has an individual
|
||||||
|
* directory named after the job id.
|
||||||
|
* When a job is saved, a new journal file is created. The file's name
|
||||||
|
* consists of a prefix 'JOB-' and an incremental sequential id. The file with
|
||||||
|
* the largest id is the latest journal of this job.
|
||||||
|
*
|
||||||
|
* Layout:
|
||||||
|
* base-dir/
|
||||||
|
* /job-3f1da5e5-2a60-48de-8736-418d134edbe9/
|
||||||
|
* /JOB-0
|
||||||
|
* /JOB-3
|
||||||
|
* /JOB-5
|
||||||
|
* /job-ebc19478-2324-46c2-8d1a-2f8c4391dc09/
|
||||||
|
* /JOB-1
|
||||||
|
* /JOB-2
|
||||||
|
* /JOB-4
|
||||||
|
*/
|
||||||
|
public void saveJob(BalanceJob job) throws IOException {
|
||||||
|
Path jobFile = getNewStateJobPath(job);
|
||||||
|
Path tmpJobFile = new Path(jobFile + TMP_TAIL);
|
||||||
|
FSDataOutputStream out = null;
|
||||||
|
try {
|
||||||
|
FileSystem fs = FileSystem.get(workUri, conf);
|
||||||
|
out = fs.create(tmpJobFile);
|
||||||
|
job.write(new DataOutputStream(out));
|
||||||
|
out.close();
|
||||||
|
out = null;
|
||||||
|
fs.rename(tmpJobFile, jobFile);
|
||||||
|
} finally {
|
||||||
|
IOUtils.closeStream(out);
|
||||||
|
}
|
||||||
|
LOG.debug("Save journal of job={}", job);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recover job from journal on HDFS.
|
||||||
|
*/
|
||||||
|
public void recoverJob(BalanceJob job) throws IOException {
|
||||||
|
FSDataInputStream in = null;
|
||||||
|
try {
|
||||||
|
Path logPath = getLatestStateJobPath(job);
|
||||||
|
FileSystem fs = FileSystem.get(workUri, conf);
|
||||||
|
in = fs.open(logPath);
|
||||||
|
job.readFields(in);
|
||||||
|
LOG.debug("Recover job={} from journal.", job);
|
||||||
|
} finally {
|
||||||
|
if (in != null) {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BalanceJob[] listAllJobs() throws IOException {
|
||||||
|
FileSystem fs = FileSystem.get(workUri, conf);
|
||||||
|
Path workPath = new Path(workUri.getPath());
|
||||||
|
FileStatus[] statuses;
|
||||||
|
try {
|
||||||
|
statuses = fs.listStatus(workPath);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
LOG.debug("Create work path {}", workPath);
|
||||||
|
fs.mkdirs(workPath);
|
||||||
|
return new BalanceJob[0];
|
||||||
|
}
|
||||||
|
BalanceJob[] jobs = new BalanceJob[statuses.length];
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
builder.append("List all jobs from journal [");
|
||||||
|
for (int i = 0; i < statuses.length; i++) {
|
||||||
|
if (statuses[i].isDirectory()) {
|
||||||
|
jobs[i] = new BalanceJob.Builder<>().build();
|
||||||
|
jobs[i].setId(statuses[i].getPath().getName());
|
||||||
|
builder.append(jobs[i]);
|
||||||
|
if (i < statuses.length -1) {
|
||||||
|
builder.append(", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
builder.append("]");
|
||||||
|
LOG.debug(builder.toString());
|
||||||
|
return jobs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clear(BalanceJob job) throws IOException {
|
||||||
|
Path jobBase = getJobBaseDir(job);
|
||||||
|
FileSystem fs = FileSystem.get(workUri, conf);
|
||||||
|
if (fs.exists(jobBase)) {
|
||||||
|
fs.delete(jobBase, true);
|
||||||
|
}
|
||||||
|
LOG.debug("Clear journal of job=" + job);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setConf(Configuration conf) {
|
||||||
|
try {
|
||||||
|
this.workUri = new URI(conf.get(SCHEDULER_JOURNAL_URI));
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new IllegalArgumentException("URI resolution failed.", e);
|
||||||
|
}
|
||||||
|
this.conf = conf;
|
||||||
|
this.generator = new IdGenerator(Time.monotonicNow());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Configuration getConf() {
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Path getJobBaseDir(BalanceJob job) {
|
||||||
|
String jobId = job.getId();
|
||||||
|
return new Path(workUri.getPath(), jobId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Path getNewStateJobPath(BalanceJob job) {
|
||||||
|
Path basePath = getJobBaseDir(job);
|
||||||
|
Path logPath = new Path(basePath, JOB_PREFIX + generator.nextValue());
|
||||||
|
return logPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Path getLatestStateJobPath(BalanceJob job) throws IOException {
|
||||||
|
Path latestFile = null;
|
||||||
|
Path basePath = getJobBaseDir(job);
|
||||||
|
FileSystem fs = FileSystem.get(workUri, conf);
|
||||||
|
RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(basePath, false);
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
FileStatus status = iterator.next();
|
||||||
|
String fileName = status.getPath().getName();
|
||||||
|
if (fileName.startsWith(JOB_PREFIX) && !fileName.contains(TMP_TAIL)) {
|
||||||
|
if (latestFile == null) {
|
||||||
|
latestFile = status.getPath();
|
||||||
|
} else if (latestFile.getName().compareTo(fileName) <= 0) {
|
||||||
|
latestFile = status.getPath();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return latestFile;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,226 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.builder.EqualsBuilder;
|
||||||
|
import org.apache.commons.lang3.builder.HashCodeBuilder;
|
||||||
|
import org.apache.hadoop.io.LongWritable;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.io.Writable;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.procedure.BalanceJob.NEXT_PROCEDURE_NONE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The basic components of the Job. Extend this class to implement different
|
||||||
|
* job logic.
|
||||||
|
*/
|
||||||
|
public abstract class BalanceProcedure<T extends BalanceProcedure>
|
||||||
|
implements Writable {
|
||||||
|
|
||||||
|
public static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(BalanceProcedure.class);
|
||||||
|
private String nextProcedure; // the procedure after this procedure.
|
||||||
|
private String name; // the name of this procedure.
|
||||||
|
private long delayDuration; // this specifies how long will this procedure be
|
||||||
|
// delayed. The delay is triggered by throwing a
|
||||||
|
// RetryException.
|
||||||
|
private BalanceJob job;
|
||||||
|
|
||||||
|
public BalanceProcedure() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The constructor of BalanceProcedure.
|
||||||
|
*
|
||||||
|
* @param name the name of the procedure.
|
||||||
|
* @param nextProcedure the name of the next procedure.
|
||||||
|
* @param delayDuration the delay duration when this procedure is delayed.
|
||||||
|
*/
|
||||||
|
public BalanceProcedure(String name, String nextProcedure,
|
||||||
|
long delayDuration) {
|
||||||
|
this();
|
||||||
|
this.name = name;
|
||||||
|
this.nextProcedure = nextProcedure;
|
||||||
|
this.delayDuration = delayDuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BalanceProcedure(String name, long delayDuration) {
|
||||||
|
this(name, NEXT_PROCEDURE_NONE, delayDuration);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The main process. This is called by the ProcedureScheduler.
|
||||||
|
|
||||||
|
* Make sure the process quits fast when it's interrupted and the scheduler is
|
||||||
|
* shut down.
|
||||||
|
*
|
||||||
|
* One procedure may have many phases and all the phases share the same member
|
||||||
|
* variables. Each time this method returns, the journal is saved. User can
|
||||||
|
* serialize the current phase in write(DataOutput) so the job can continue
|
||||||
|
* with the last unfinished phase after it is recovered.
|
||||||
|
* The return value indicates whether the job should go to the next procedure.
|
||||||
|
* Return true after all the phases finish.
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* class ProcedureWithManyPhase extends BalanceProcedure {
|
||||||
|
*
|
||||||
|
* enum PHASE {
|
||||||
|
* P1, P2, P3
|
||||||
|
* }
|
||||||
|
* PHASE phase;
|
||||||
|
*
|
||||||
|
* public boolean execute(T lastProcedure) throws RetryException,
|
||||||
|
* IOException {
|
||||||
|
* switch (phase) {
|
||||||
|
* case P1:
|
||||||
|
* // do something.
|
||||||
|
* return false;
|
||||||
|
* case P2:
|
||||||
|
* // do something.
|
||||||
|
* return false;
|
||||||
|
* case P3:
|
||||||
|
* // do something.
|
||||||
|
* return true;
|
||||||
|
* default:
|
||||||
|
* throw new IOException("Unexpected phase " + phase);
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* public void write(DataOutput out) {
|
||||||
|
* out.writeInt(phase.ordinal());
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* public void readFields(DataInput in) throws IOException {
|
||||||
|
* stage = Stage.values()[in.readInt()];
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @throws RetryException if this procedure needs delay a while then retry.
|
||||||
|
* @return true if the procedure has done and the job will go to the next
|
||||||
|
* procedure, otherwise false.
|
||||||
|
*/
|
||||||
|
public abstract boolean execute() throws RetryException, IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The time in milliseconds the procedure should wait before retry.
|
||||||
|
*/
|
||||||
|
public long delayMillisBeforeRetry() {
|
||||||
|
return delayDuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The active flag.
|
||||||
|
*/
|
||||||
|
protected boolean isSchedulerShutdown() {
|
||||||
|
return job.isSchedulerShutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setNextProcedure(String nextProcedure) {
|
||||||
|
this.nextProcedure = nextProcedure;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setJob(BalanceJob job) {
|
||||||
|
this.job = job;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the next procedure.
|
||||||
|
*/
|
||||||
|
public String nextProcedure() {
|
||||||
|
return nextProcedure;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the procedure name.
|
||||||
|
*/
|
||||||
|
public String name() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
if (nextProcedure == null) {
|
||||||
|
Text.writeString(out, NEXT_PROCEDURE_NONE);
|
||||||
|
} else {
|
||||||
|
Text.writeString(out, nextProcedure);
|
||||||
|
}
|
||||||
|
Text.writeString(out, name);
|
||||||
|
new LongWritable(delayDuration).write(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
nextProcedure = Text.readString(in);
|
||||||
|
name = Text.readString(in);
|
||||||
|
delayDuration = readLong(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static long readLong(DataInput in) throws IOException {
|
||||||
|
LongWritable delayWritable = new LongWritable();
|
||||||
|
delayWritable.readFields(in);
|
||||||
|
return delayWritable.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return new HashCodeBuilder(17, 37)
|
||||||
|
.append(nextProcedure)
|
||||||
|
.append(name)
|
||||||
|
.append(delayDuration)
|
||||||
|
.toHashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (obj == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (obj == this) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (obj.getClass() != getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
BalanceProcedure rhs = (BalanceProcedure) obj;
|
||||||
|
return new EqualsBuilder()
|
||||||
|
.append(nextProcedure, rhs.nextProcedure)
|
||||||
|
.append(name, rhs.name)
|
||||||
|
.append(delayDuration, rhs.delayDuration)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return name + ":" + this.getClass().getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The RetryException represents the current procedure should be delayed then
|
||||||
|
* retried.
|
||||||
|
*/
|
||||||
|
public static class RetryException extends Exception {
|
||||||
|
public RetryException() {}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class contains constants for configuration keys and default values
|
||||||
|
* used in hdfs procedure.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public final class BalanceProcedureConfigKeys {
|
||||||
|
/* The worker threads number of the BalanceProcedureScheduler */
|
||||||
|
public static final String WORK_THREAD_NUM =
|
||||||
|
"hadoop.hdfs.procedure.work.thread.num";
|
||||||
|
public static final int WORK_THREAD_NUM_DEFAULT = 10;
|
||||||
|
/* The uri of the journal */
|
||||||
|
public static final String SCHEDULER_JOURNAL_URI =
|
||||||
|
"hadoop.hdfs.procedure.scheduler.journal.uri";
|
||||||
|
public static final String JOB_PREFIX = "JOB-";
|
||||||
|
public static final String TMP_TAIL = ".tmp";
|
||||||
|
public static final String JOURNAL_CLASS =
|
||||||
|
"hadoop.hdfs.procedure.journal.class";
|
||||||
|
|
||||||
|
private BalanceProcedureConfigKeys() {}
|
||||||
|
}
|
|
@ -0,0 +1,450 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
import java.util.concurrent.DelayQueue;
|
||||||
|
import java.util.concurrent.Delayed;
|
||||||
|
import java.util.concurrent.LinkedBlockingDeque;
|
||||||
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.builder.CompareToBuilder;
|
||||||
|
import org.apache.commons.lang3.builder.EqualsBuilder;
|
||||||
|
import org.apache.commons.lang3.builder.HashCodeBuilder;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.procedure.BalanceProcedureConfigKeys.WORK_THREAD_NUM;
|
||||||
|
import static org.apache.hadoop.hdfs.procedure.BalanceProcedureConfigKeys.WORK_THREAD_NUM_DEFAULT;
|
||||||
|
import static org.apache.hadoop.hdfs.procedure.BalanceProcedureConfigKeys.JOURNAL_CLASS;
|
||||||
|
/**
|
||||||
|
* The state machine framework consist of:
|
||||||
|
* Job: The state machine. It implements the basic logic of the
|
||||||
|
* state machine.
|
||||||
|
* Procedure: The components of the job. It implements the custom
|
||||||
|
* logic.
|
||||||
|
* ProcedureScheduler: The multi-thread model responsible for running,
|
||||||
|
* recovering, handling errors and job persistence.
|
||||||
|
* Journal: It handles the job persistence and recover.
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* Job.Builder builder = new Job.Builder<>();
|
||||||
|
* builder.nextProcedure(new WaitProcedure("wait", 1000, 30 * 1000));
|
||||||
|
* Job job = builder.build();
|
||||||
|
*
|
||||||
|
* ProcedureScheduler scheduler = new ProcedureScheduler(CONF);
|
||||||
|
* scheduler.init();
|
||||||
|
* scheduler.submit(job);
|
||||||
|
* scheduler.waitUntilDone(job);
|
||||||
|
*/
|
||||||
|
public class BalanceProcedureScheduler {
|
||||||
|
public static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(BalanceProcedureScheduler.class);
|
||||||
|
// The set containing all the jobs, including submitted and recovered ones.
|
||||||
|
private ConcurrentHashMap<BalanceJob, BalanceJob> jobSet;
|
||||||
|
// Containing jobs pending for running.
|
||||||
|
private LinkedBlockingQueue<BalanceJob> runningQueue;
|
||||||
|
// Containing jobs pending for wake up.
|
||||||
|
private DelayQueue<DelayWrapper> delayQueue;
|
||||||
|
// Containing jobs pending for recovery.
|
||||||
|
private LinkedBlockingQueue<BalanceJob> recoverQueue;
|
||||||
|
private Configuration conf;
|
||||||
|
private BalanceJournal journal; // handle jobs' journals.
|
||||||
|
|
||||||
|
private Thread readerThread; // consume the runningQueue and send to workers.
|
||||||
|
private ThreadPoolExecutor workersPool; // the real threads running the jobs.
|
||||||
|
private Thread roosterThread; // wake up the jobs in the delayQueue.
|
||||||
|
private Thread recoverThread; // recover the jobs in the recoverQueue.
|
||||||
|
// The running state of this scheduler.
|
||||||
|
private AtomicBoolean running = new AtomicBoolean(true);
|
||||||
|
|
||||||
|
public BalanceProcedureScheduler(Configuration conf) {
|
||||||
|
this.conf = conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Init the scheduler.
|
||||||
|
*
|
||||||
|
* @param recoverJobs whether to recover all the jobs from journal or not.
|
||||||
|
*/
|
||||||
|
public synchronized void init(boolean recoverJobs) throws IOException {
|
||||||
|
this.runningQueue = new LinkedBlockingQueue<>();
|
||||||
|
this.delayQueue = new DelayQueue<>();
|
||||||
|
this.recoverQueue = new LinkedBlockingQueue<>();
|
||||||
|
this.jobSet = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
// start threads.
|
||||||
|
this.roosterThread = new Rooster();
|
||||||
|
this.roosterThread.setDaemon(true);
|
||||||
|
roosterThread.start();
|
||||||
|
this.recoverThread = new Recover();
|
||||||
|
this.recoverThread.setDaemon(true);
|
||||||
|
recoverThread.start();
|
||||||
|
int workerNum = conf.getInt(WORK_THREAD_NUM, WORK_THREAD_NUM_DEFAULT);
|
||||||
|
workersPool = new ThreadPoolExecutor(workerNum, workerNum * 2, 1,
|
||||||
|
TimeUnit.MILLISECONDS, new LinkedBlockingDeque<>());
|
||||||
|
this.readerThread = new Reader();
|
||||||
|
this.readerThread.start();
|
||||||
|
|
||||||
|
// init journal.
|
||||||
|
Class<BalanceJournal> clazz = (Class<BalanceJournal>) conf
|
||||||
|
.getClass(JOURNAL_CLASS, BalanceJournalInfoHDFS.class);
|
||||||
|
journal = ReflectionUtils.newInstance(clazz, conf);
|
||||||
|
|
||||||
|
if (recoverJobs) {
|
||||||
|
recoverAllJobs();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Submit the job.
|
||||||
|
*/
|
||||||
|
public synchronized void submit(BalanceJob job) throws IOException {
|
||||||
|
if (!running.get()) {
|
||||||
|
throw new IOException("Scheduler is shutdown.");
|
||||||
|
}
|
||||||
|
String jobId = allocateJobId();
|
||||||
|
job.setId(jobId);
|
||||||
|
job.setScheduler(this);
|
||||||
|
journal.saveJob(job);
|
||||||
|
jobSet.put(job, job);
|
||||||
|
runningQueue.add(job);
|
||||||
|
LOG.info("Add new job={}", job);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove the job from scheduler if it finishes.
|
||||||
|
*/
|
||||||
|
public BalanceJob remove(BalanceJob job) {
|
||||||
|
BalanceJob inner = findJob(job);
|
||||||
|
if (inner == null) {
|
||||||
|
return null;
|
||||||
|
} else if (job.isJobDone()) {
|
||||||
|
synchronized (this) {
|
||||||
|
return jobSet.remove(inner);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find job in scheduler.
|
||||||
|
*
|
||||||
|
* @return the job in scheduler. Null if the schedule has no job with the
|
||||||
|
* same id.
|
||||||
|
*/
|
||||||
|
public BalanceJob findJob(BalanceJob job) {
|
||||||
|
BalanceJob found = null;
|
||||||
|
for (BalanceJob j : jobSet.keySet()) {
|
||||||
|
if (j.getId().equals(job.getId())) {
|
||||||
|
found = j;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return all jobs in the scheduler.
|
||||||
|
*/
|
||||||
|
public Collection<BalanceJob> getAllJobs() {
|
||||||
|
return jobSet.values();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait permanently until the job is done.
|
||||||
|
*/
|
||||||
|
public void waitUntilDone(BalanceJob job) {
|
||||||
|
BalanceJob found = findJob(job);
|
||||||
|
if (found == null || found.isJobDone()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
while (!found.isJobDone()) {
|
||||||
|
try {
|
||||||
|
found.waitJobDone();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delay this job.
|
||||||
|
*/
|
||||||
|
void delay(BalanceJob job, long delayInMilliseconds) {
|
||||||
|
delayQueue.add(new DelayWrapper(job, delayInMilliseconds));
|
||||||
|
LOG.info("Need delay {}ms. Add to delayQueue. job={}", delayInMilliseconds,
|
||||||
|
job);
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean jobDone(BalanceJob job) {
|
||||||
|
try {
|
||||||
|
journal.clear(job);
|
||||||
|
if (job.shouldRemoveAfterDone()) {
|
||||||
|
jobSet.remove(job);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Clear journal failed, add to recoverQueue. job=" + job, e);
|
||||||
|
recoverQueue.add(job);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save current status to journal.
|
||||||
|
*/
|
||||||
|
boolean writeJournal(BalanceJob job) {
|
||||||
|
try {
|
||||||
|
journal.saveJob(job);
|
||||||
|
return true;
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.warn("Save procedure failed, add to recoverQueue. job=" + job, e);
|
||||||
|
recoverQueue.add(job);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The running state of the scheduler.
|
||||||
|
*/
|
||||||
|
public boolean isRunning() {
|
||||||
|
return running.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shutdown the scheduler.
|
||||||
|
*/
|
||||||
|
public synchronized void shutDown() {
|
||||||
|
if (!running.get()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
running.set(false);
|
||||||
|
readerThread.interrupt();
|
||||||
|
roosterThread.interrupt();
|
||||||
|
recoverThread.interrupt();
|
||||||
|
workersPool.shutdownNow();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shutdown scheduler and wait at most timeout seconds for procedures to
|
||||||
|
* finish.
|
||||||
|
* @param timeout Wait at most timeout seconds for procedures to finish.
|
||||||
|
*/
|
||||||
|
public synchronized void shutDownAndWait(int timeout) {
|
||||||
|
shutDown();
|
||||||
|
while (readerThread.isAlive()) {
|
||||||
|
try {
|
||||||
|
readerThread.join();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (roosterThread.isAlive()) {
|
||||||
|
try {
|
||||||
|
roosterThread.join();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (recoverThread.isAlive()) {
|
||||||
|
try {
|
||||||
|
recoverThread.join();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (!workersPool.isTerminated()) {
|
||||||
|
try {
|
||||||
|
workersPool.awaitTermination(timeout, TimeUnit.SECONDS);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search all jobs and add them to recoverQueue. It's called once after the
|
||||||
|
* scheduler starts.
|
||||||
|
*/
|
||||||
|
private void recoverAllJobs() throws IOException {
|
||||||
|
BalanceJob[] jobs = journal.listAllJobs();
|
||||||
|
for (BalanceJob job : jobs) {
|
||||||
|
recoverQueue.add(job);
|
||||||
|
jobSet.put(job, job);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
static String allocateJobId() {
|
||||||
|
return "job-" + UUID.randomUUID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public void setJournal(BalanceJournal journal) {
|
||||||
|
this.journal = journal;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This thread consumes the delayQueue and move the jobs to the runningQueue.
|
||||||
|
*/
|
||||||
|
class Rooster extends Thread {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
while (running.get()) {
|
||||||
|
try {
|
||||||
|
DelayWrapper dJob = delayQueue.take();
|
||||||
|
runningQueue.add(dJob.getJob());
|
||||||
|
LOG.info("Wake up job={}", dJob.getJob());
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
// ignore interrupt exception.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This thread consumes the runningQueue and give the job to the workers.
|
||||||
|
*/
|
||||||
|
class Reader extends Thread {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
while (running.get()) {
|
||||||
|
try {
|
||||||
|
final BalanceJob job = runningQueue.poll(500, TimeUnit.MILLISECONDS);
|
||||||
|
if (job != null) {
|
||||||
|
workersPool.submit(() -> {
|
||||||
|
LOG.info("Start job. job_msg={}", job.getDetailMessage());
|
||||||
|
job.execute();
|
||||||
|
if (!running.get()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (job.isJobDone()) {
|
||||||
|
if (job.getError() == null) {
|
||||||
|
LOG.info("Job done. job={}", job);
|
||||||
|
} else {
|
||||||
|
LOG.warn("Job failed. job=" + job, job.getError());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
// ignore interrupt exception.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This thread consumes the recoverQueue, recovers the job the adds it to the
|
||||||
|
* runningQueue.
|
||||||
|
*/
|
||||||
|
class Recover extends Thread {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
while (running.get()) {
|
||||||
|
BalanceJob job = null;
|
||||||
|
try {
|
||||||
|
job = recoverQueue.poll(500, TimeUnit.MILLISECONDS);
|
||||||
|
} catch (InterruptedException ie) {
|
||||||
|
// ignore interrupt exception.
|
||||||
|
}
|
||||||
|
if (job != null) {
|
||||||
|
try {
|
||||||
|
journal.recoverJob(job);
|
||||||
|
job.setScheduler(BalanceProcedureScheduler.this);
|
||||||
|
runningQueue.add(job);
|
||||||
|
LOG.info("Recover success, add to runningQueue. job={}", job);
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Recover failed, re-add to recoverQueue. job=" + job, e);
|
||||||
|
recoverQueue.add(job);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrap the delayed BalanceJob.
|
||||||
|
*/
|
||||||
|
private static class DelayWrapper implements Delayed {
|
||||||
|
private BalanceJob job;
|
||||||
|
private long time;
|
||||||
|
|
||||||
|
DelayWrapper(BalanceJob job, long delayInMilliseconds) {
|
||||||
|
this.job = job;
|
||||||
|
this.time = Time.monotonicNow() + delayInMilliseconds;
|
||||||
|
}
|
||||||
|
|
||||||
|
BalanceJob getJob() {
|
||||||
|
return job;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getDelay(TimeUnit unit) {
|
||||||
|
long delay = time - Time.monotonicNow();
|
||||||
|
if (delay < 0) {
|
||||||
|
delay = 0;
|
||||||
|
}
|
||||||
|
return unit.convert(delay, TimeUnit.MILLISECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(Delayed o) {
|
||||||
|
DelayWrapper dw = (DelayWrapper) o;
|
||||||
|
return new CompareToBuilder()
|
||||||
|
.append(time, dw.time)
|
||||||
|
.append(job, dw.job)
|
||||||
|
.toComparison();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return new HashCodeBuilder(17, 37)
|
||||||
|
.append(time)
|
||||||
|
.append(job)
|
||||||
|
.toHashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (obj == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (obj == this) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (obj.getClass() != getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
DelayWrapper dw = (DelayWrapper) obj;
|
||||||
|
return new EqualsBuilder()
|
||||||
|
.appendSuper(super.equals(obj))
|
||||||
|
.append(time, dw.time)
|
||||||
|
.append(job, dw.job)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classes under this package implement a state machine used for balancing data
|
||||||
|
* across federation namespaces.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
|
@ -0,0 +1,88 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This simulates a procedure with many phases. This is used for test.
|
||||||
|
*/
|
||||||
|
public class MultiPhaseProcedure extends BalanceProcedure {
|
||||||
|
|
||||||
|
private int totalPhase;
|
||||||
|
private int currentPhase = 0;
|
||||||
|
private Configuration conf;
|
||||||
|
private FileSystem fs;
|
||||||
|
private Path path;
|
||||||
|
|
||||||
|
public MultiPhaseProcedure() {}
|
||||||
|
|
||||||
|
public MultiPhaseProcedure(String name, long delay, int totalPhase,
|
||||||
|
Configuration config, String spath) throws IOException {
|
||||||
|
super(name, delay);
|
||||||
|
this.totalPhase = totalPhase;
|
||||||
|
this.conf = config;
|
||||||
|
this.path = new Path(spath);
|
||||||
|
this.fs = path.getFileSystem(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean execute() throws IOException {
|
||||||
|
if (currentPhase < totalPhase) {
|
||||||
|
LOG.info("Current phase {}", currentPhase);
|
||||||
|
Path phase = new Path(path, "phase-" + currentPhase);
|
||||||
|
if (!fs.exists(phase)) {
|
||||||
|
fs.mkdirs(phase);
|
||||||
|
}
|
||||||
|
currentPhase++;
|
||||||
|
try {
|
||||||
|
Thread.sleep(100);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
super.write(out);
|
||||||
|
out.writeInt(totalPhase);
|
||||||
|
out.writeInt(currentPhase);
|
||||||
|
conf.write(out);
|
||||||
|
Text.writeString(out, path.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
super.readFields(in);
|
||||||
|
totalPhase = in.readInt();
|
||||||
|
currentPhase = in.readInt();
|
||||||
|
conf = new Configuration(false);
|
||||||
|
conf.readFields(in);
|
||||||
|
path = new Path(Text.readString(in));
|
||||||
|
fs = path.getFileSystem(conf);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This procedure records all the finished procedures. This is used for test.
|
||||||
|
*/
|
||||||
|
public class RecordProcedure extends BalanceProcedure<RecordProcedure> {
|
||||||
|
|
||||||
|
private static List<RecordProcedure> finish = new ArrayList<>();
|
||||||
|
|
||||||
|
public RecordProcedure() {}
|
||||||
|
|
||||||
|
public RecordProcedure(String name, long delay) {
|
||||||
|
super(name, delay);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean execute() throws RetryException {
|
||||||
|
finish.add(this);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<RecordProcedure> getFinishList() {
|
||||||
|
return finish;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,66 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This simulates a procedure needs many retries. This is used for test.
|
||||||
|
*/
|
||||||
|
public class RetryProcedure extends BalanceProcedure {
|
||||||
|
|
||||||
|
private int retryTime = 1;
|
||||||
|
private int totalRetry = 0;
|
||||||
|
|
||||||
|
public RetryProcedure() {}
|
||||||
|
|
||||||
|
public RetryProcedure(String name, long delay, int retryTime) {
|
||||||
|
super(name, delay);
|
||||||
|
this.retryTime = retryTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean execute() throws RetryException {
|
||||||
|
if (retryTime > 0) {
|
||||||
|
retryTime--;
|
||||||
|
totalRetry++;
|
||||||
|
throw new RetryException();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getTotalRetry() {
|
||||||
|
return totalRetry;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
super.write(out);
|
||||||
|
out.writeInt(retryTime);
|
||||||
|
out.writeInt(totalRetry);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
super.readFields(in);
|
||||||
|
retryTime = in.readInt();
|
||||||
|
totalRetry = in.readInt();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,451 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.DataInputStream;
|
||||||
|
import java.io.DataOutputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.procedure.BalanceProcedureConfigKeys.SCHEDULER_JOURNAL_URI;
|
||||||
|
import static org.apache.hadoop.hdfs.procedure.BalanceProcedureConfigKeys.WORK_THREAD_NUM;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY;
|
||||||
|
import static org.junit.Assert.assertNull;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertNotSame;
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test BalanceProcedureScheduler.
|
||||||
|
*/
|
||||||
|
public class TestBalanceProcedureScheduler {
|
||||||
|
|
||||||
|
private static MiniDFSCluster cluster;
|
||||||
|
private static final Configuration CONF = new Configuration();
|
||||||
|
private static DistributedFileSystem fs;
|
||||||
|
private static final int DEFAULT_BLOCK_SIZE = 512;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setup() throws IOException {
|
||||||
|
CONF.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
|
||||||
|
true);
|
||||||
|
CONF.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, "hdfs:///");
|
||||||
|
CONF.setBoolean(DFS_NAMENODE_ACLS_ENABLED_KEY, true);
|
||||||
|
CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DEFAULT_BLOCK_SIZE);
|
||||||
|
CONF.setInt(WORK_THREAD_NUM, 1);
|
||||||
|
|
||||||
|
cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(3).build();
|
||||||
|
cluster.waitClusterUp();
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
fs = cluster.getFileSystem();
|
||||||
|
String workPath =
|
||||||
|
"hdfs://" + cluster.getNameNode().getHostAndPort() + "/procedure";
|
||||||
|
CONF.set(SCHEDULER_JOURNAL_URI, workPath);
|
||||||
|
fs.mkdirs(new Path(workPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void close() {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the scheduler could be shutdown correctly.
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testShutdownScheduler() throws Exception {
|
||||||
|
BalanceProcedureScheduler scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
// construct job
|
||||||
|
BalanceJob.Builder builder = new BalanceJob.Builder<>();
|
||||||
|
builder.nextProcedure(new WaitProcedure("wait", 1000, 5 * 1000));
|
||||||
|
BalanceJob job = builder.build();
|
||||||
|
|
||||||
|
scheduler.submit(job);
|
||||||
|
Thread.sleep(1000); // wait job to be scheduled.
|
||||||
|
scheduler.shutDownAndWait(30 * 1000);
|
||||||
|
|
||||||
|
BalanceJournal journal =
|
||||||
|
ReflectionUtils.newInstance(BalanceJournalInfoHDFS.class, CONF);
|
||||||
|
journal.clear(job);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test a successful job.
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testSuccessfulJob() throws Exception {
|
||||||
|
BalanceProcedureScheduler scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
try {
|
||||||
|
// construct job
|
||||||
|
List<RecordProcedure> procedures = new ArrayList<>();
|
||||||
|
BalanceJob.Builder builder = new BalanceJob.Builder<RecordProcedure>();
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
RecordProcedure r = new RecordProcedure("record-" + i, 1000L);
|
||||||
|
builder.nextProcedure(r);
|
||||||
|
procedures.add(r);
|
||||||
|
}
|
||||||
|
BalanceJob<RecordProcedure> job = builder.build();
|
||||||
|
|
||||||
|
scheduler.submit(job);
|
||||||
|
scheduler.waitUntilDone(job);
|
||||||
|
assertNull(job.getError());
|
||||||
|
// verify finish list.
|
||||||
|
assertEquals(5, RecordProcedure.getFinishList().size());
|
||||||
|
for (int i = 0; i < RecordProcedure.getFinishList().size(); i++) {
|
||||||
|
assertEquals(procedures.get(i), RecordProcedure.getFinishList().get(i));
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test a job fails and the error can be got.
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testFailedJob() throws Exception {
|
||||||
|
BalanceProcedureScheduler scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
try {
|
||||||
|
// Mock bad procedure.
|
||||||
|
BalanceProcedure badProcedure = Mockito.mock(BalanceProcedure.class);
|
||||||
|
Mockito.doThrow(new IOException("Job failed exception."))
|
||||||
|
.when(badProcedure).execute();
|
||||||
|
Mockito.doReturn("bad-procedure").when(badProcedure).name();
|
||||||
|
|
||||||
|
BalanceJob.Builder builder = new BalanceJob.Builder<>();
|
||||||
|
builder.nextProcedure(badProcedure);
|
||||||
|
BalanceJob job = builder.build();
|
||||||
|
scheduler.submit(job);
|
||||||
|
scheduler.waitUntilDone(job);
|
||||||
|
GenericTestUtils
|
||||||
|
.assertExceptionContains("Job failed exception", job.getError());
|
||||||
|
} finally {
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test recover a job. After the job is recovered, the job should start from
|
||||||
|
* the last unfinished procedure, which is the first procedure without
|
||||||
|
* journal.
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testGetJobAfterRecover() throws Exception {
|
||||||
|
BalanceProcedureScheduler scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
try {
|
||||||
|
// Construct job.
|
||||||
|
BalanceJob.Builder builder = new BalanceJob.Builder<>();
|
||||||
|
String firstProcedure = "wait0";
|
||||||
|
WaitProcedure[] procedures = new WaitProcedure[5];
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
WaitProcedure procedure = new WaitProcedure("wait" + i, 1000, 1000);
|
||||||
|
builder.nextProcedure(procedure).removeAfterDone(false);
|
||||||
|
procedures[i] = procedure;
|
||||||
|
}
|
||||||
|
BalanceJob job = builder.build();
|
||||||
|
scheduler.submit(job);
|
||||||
|
|
||||||
|
// Sleep a random time then shut down.
|
||||||
|
long randomSleepTime = Math.abs(new Random().nextInt()) % 5 * 1000 + 1000;
|
||||||
|
Thread.sleep(randomSleepTime);
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
|
||||||
|
// Current procedure is the last unfinished procedure. It is also the
|
||||||
|
// first procedure without journal.
|
||||||
|
WaitProcedure recoverProcedure = (WaitProcedure) job.getCurProcedure();
|
||||||
|
int recoverIndex = -1;
|
||||||
|
for (int i = 0; i < procedures.length; i++) {
|
||||||
|
if (procedures[i].name().equals(recoverProcedure.name())) {
|
||||||
|
recoverIndex = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restart scheduler and recover the job.
|
||||||
|
scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
scheduler.waitUntilDone(job);
|
||||||
|
|
||||||
|
// The job should be done successfully and the recoverJob should be equal
|
||||||
|
// to the original job.
|
||||||
|
BalanceJob recoverJob = scheduler.findJob(job);
|
||||||
|
assertNull(recoverJob.getError());
|
||||||
|
assertNotSame(job, recoverJob);
|
||||||
|
assertEquals(job, recoverJob);
|
||||||
|
// Verify whether the recovered job starts from the recoverProcedure.
|
||||||
|
Map<String, WaitProcedure> pTable = recoverJob.getProcedureTable();
|
||||||
|
List<WaitProcedure> recoveredProcedures =
|
||||||
|
procedureTableToList(pTable, firstProcedure);
|
||||||
|
for (int i = 0; i < recoverIndex; i++) {
|
||||||
|
// All procedures before recoverProcedure shouldn't be executed.
|
||||||
|
assertFalse(recoveredProcedures.get(i).getExecuted());
|
||||||
|
}
|
||||||
|
for (int i = recoverIndex; i < procedures.length; i++) {
|
||||||
|
// All procedures start from recoverProcedure should be executed.
|
||||||
|
assertTrue(recoveredProcedures.get(i).getExecuted());
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test RetryException is handled correctly.
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testRetry() throws Exception {
|
||||||
|
BalanceProcedureScheduler scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
try {
|
||||||
|
// construct job
|
||||||
|
BalanceJob.Builder builder = new BalanceJob.Builder<>();
|
||||||
|
RetryProcedure retryProcedure = new RetryProcedure("retry", 1000, 3);
|
||||||
|
builder.nextProcedure(retryProcedure);
|
||||||
|
BalanceJob job = builder.build();
|
||||||
|
|
||||||
|
long start = Time.monotonicNow();
|
||||||
|
scheduler.submit(job);
|
||||||
|
scheduler.waitUntilDone(job);
|
||||||
|
assertNull(job.getError());
|
||||||
|
|
||||||
|
long duration = Time.monotonicNow() - start;
|
||||||
|
assertEquals(true, duration > 1000 * 3);
|
||||||
|
assertEquals(3, retryProcedure.getTotalRetry());
|
||||||
|
} finally {
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test schedule an empty job.
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testEmptyJob() throws Exception {
|
||||||
|
BalanceProcedureScheduler scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
try {
|
||||||
|
BalanceJob job = new BalanceJob.Builder<>().build();
|
||||||
|
scheduler.submit(job);
|
||||||
|
scheduler.waitUntilDone(job);
|
||||||
|
} finally {
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test serialization and deserialization of Job.
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testJobSerializeAndDeserialize() throws Exception {
|
||||||
|
BalanceJob.Builder builder = new BalanceJob.Builder<RecordProcedure>();
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
RecordProcedure r = new RecordProcedure("record-" + i, 1000L);
|
||||||
|
builder.nextProcedure(r);
|
||||||
|
}
|
||||||
|
builder.nextProcedure(new RetryProcedure("retry", 1000, 3));
|
||||||
|
BalanceJob<RecordProcedure> job = builder.build();
|
||||||
|
job.setId(BalanceProcedureScheduler.allocateJobId());
|
||||||
|
// Serialize.
|
||||||
|
ByteArrayOutputStream bao = new ByteArrayOutputStream();
|
||||||
|
job.write(new DataOutputStream(bao));
|
||||||
|
bao.flush();
|
||||||
|
ByteArrayInputStream bai = new ByteArrayInputStream(bao.toByteArray());
|
||||||
|
// Deserialize.
|
||||||
|
BalanceJob newJob = new BalanceJob.Builder<>().build();
|
||||||
|
newJob.readFields(new DataInputStream(bai));
|
||||||
|
assertEquals(job, newJob);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test scheduler crashes and recovers.
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testSchedulerDownAndRecoverJob() throws Exception {
|
||||||
|
BalanceProcedureScheduler scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
Path parent = new Path("/testSchedulerDownAndRecoverJob");
|
||||||
|
try {
|
||||||
|
// construct job
|
||||||
|
BalanceJob.Builder builder = new BalanceJob.Builder<>();
|
||||||
|
MultiPhaseProcedure multiPhaseProcedure =
|
||||||
|
new MultiPhaseProcedure("retry", 1000, 10, CONF, parent.toString());
|
||||||
|
builder.nextProcedure(multiPhaseProcedure);
|
||||||
|
BalanceJob job = builder.build();
|
||||||
|
|
||||||
|
scheduler.submit(job);
|
||||||
|
Thread.sleep(500); // wait procedure to be scheduled.
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
|
||||||
|
assertFalse(job.isJobDone());
|
||||||
|
int len = fs.listStatus(parent).length;
|
||||||
|
assertTrue(len > 0 && len < 10);
|
||||||
|
// restart scheduler, test recovering the job.
|
||||||
|
scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
scheduler.waitUntilDone(job);
|
||||||
|
|
||||||
|
assertEquals(10, fs.listStatus(parent).length);
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
assertTrue(fs.exists(new Path(parent, "phase-" + i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
BalanceJob recoverJob = scheduler.findJob(job);
|
||||||
|
assertNull(recoverJob.getError());
|
||||||
|
assertNotSame(job, recoverJob);
|
||||||
|
assertEquals(job, recoverJob);
|
||||||
|
} finally {
|
||||||
|
if (fs.exists(parent)) {
|
||||||
|
fs.delete(parent, true);
|
||||||
|
}
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testRecoverJobFromJournal() throws Exception {
|
||||||
|
BalanceJournal journal =
|
||||||
|
ReflectionUtils.newInstance(BalanceJournalInfoHDFS.class, CONF);
|
||||||
|
BalanceJob.Builder builder = new BalanceJob.Builder<RecordProcedure>();
|
||||||
|
BalanceProcedure wait0 = new WaitProcedure("wait0", 1000, 5000);
|
||||||
|
BalanceProcedure wait1 = new WaitProcedure("wait1", 1000, 1000);
|
||||||
|
builder.nextProcedure(wait0).nextProcedure(wait1);
|
||||||
|
|
||||||
|
BalanceJob job = builder.build();
|
||||||
|
job.setId(BalanceProcedureScheduler.allocateJobId());
|
||||||
|
job.setCurrentProcedure(wait1);
|
||||||
|
job.setLastProcedure(null);
|
||||||
|
journal.saveJob(job);
|
||||||
|
|
||||||
|
long start = Time.monotonicNow();
|
||||||
|
BalanceProcedureScheduler scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
try {
|
||||||
|
scheduler.waitUntilDone(job);
|
||||||
|
long duration = Time.monotonicNow() - start;
|
||||||
|
assertTrue(duration >= 1000 && duration < 5000);
|
||||||
|
} finally {
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testClearJournalFail() throws Exception {
|
||||||
|
BalanceProcedureScheduler scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
|
||||||
|
BalanceJournal journal = Mockito.mock(BalanceJournal.class);
|
||||||
|
AtomicInteger count = new AtomicInteger(0);
|
||||||
|
Mockito.doAnswer(invocation -> {
|
||||||
|
if (count.incrementAndGet() == 1) {
|
||||||
|
throw new IOException("Mock clear failure");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}).when(journal).clear(any(BalanceJob.class));
|
||||||
|
scheduler.setJournal(journal);
|
||||||
|
|
||||||
|
try {
|
||||||
|
BalanceJob.Builder builder = new BalanceJob.Builder<>();
|
||||||
|
builder.nextProcedure(new WaitProcedure("wait", 1000, 1000));
|
||||||
|
BalanceJob job = builder.build();
|
||||||
|
scheduler.submit(job);
|
||||||
|
scheduler.waitUntilDone(job);
|
||||||
|
assertEquals(2, count.get());
|
||||||
|
} finally {
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the job will be recovered if writing journal fails.
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testJobRecoveryWhenWriteJournalFail() throws Exception {
|
||||||
|
BalanceProcedureScheduler scheduler = new BalanceProcedureScheduler(CONF);
|
||||||
|
scheduler.init(true);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// construct job
|
||||||
|
AtomicBoolean recoverFlag = new AtomicBoolean(true);
|
||||||
|
BalanceJob.Builder builder = new BalanceJob.Builder<>();
|
||||||
|
builder.nextProcedure(new WaitProcedure("wait", 1000, 1000))
|
||||||
|
.nextProcedure(
|
||||||
|
new UnrecoverableProcedure("shutdown", 1000, () -> {
|
||||||
|
cluster.restartNameNode(false);
|
||||||
|
return true;
|
||||||
|
})).nextProcedure(
|
||||||
|
new UnrecoverableProcedure("recoverFlag", 1000, () -> {
|
||||||
|
recoverFlag.set(false);
|
||||||
|
return true;
|
||||||
|
})).nextProcedure(new WaitProcedure("wait", 1000, 1000));
|
||||||
|
|
||||||
|
BalanceJob job = builder.build();
|
||||||
|
scheduler.submit(job);
|
||||||
|
scheduler.waitUntilDone(job);
|
||||||
|
assertTrue(job.isJobDone());
|
||||||
|
assertNull(job.getError());
|
||||||
|
assertTrue(recoverFlag.get());
|
||||||
|
} finally {
|
||||||
|
scheduler.shutDownAndWait(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transform the procedure map into an ordered list based on the relations
|
||||||
|
* specified by the map.
|
||||||
|
*/
|
||||||
|
<T extends BalanceProcedure> List<T> procedureTableToList(
|
||||||
|
Map<String, T> pTable, String first) {
|
||||||
|
List<T> procedures = new ArrayList<>();
|
||||||
|
T cur = pTable.get(first);
|
||||||
|
while (cur != null) {
|
||||||
|
procedures.add(cur);
|
||||||
|
cur = pTable.get(cur.nextProcedure());
|
||||||
|
}
|
||||||
|
return procedures;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,56 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This simulates a Procedure can not be recovered. This is for test only.
|
||||||
|
*
|
||||||
|
* If the job is not recovered, the handler is called. Once the job is recovered
|
||||||
|
* the procedure does nothing. We can use this to verify whether the job has
|
||||||
|
* been recovered.
|
||||||
|
*/
|
||||||
|
public class UnrecoverableProcedure extends BalanceProcedure {
|
||||||
|
|
||||||
|
public interface Call {
|
||||||
|
boolean execute() throws RetryException, IOException;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Call handler;
|
||||||
|
|
||||||
|
public UnrecoverableProcedure() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The handler will be lost if the procedure is recovered.
|
||||||
|
*/
|
||||||
|
public UnrecoverableProcedure(String name, long delay, Call handler) {
|
||||||
|
super(name, delay);
|
||||||
|
this.handler = handler;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean execute() throws RetryException,
|
||||||
|
IOException {
|
||||||
|
if (handler != null) {
|
||||||
|
return handler.execute();
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.procedure;
|
||||||
|
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This procedure waits specified period of time then finish. It simulates the
|
||||||
|
* behaviour of blocking procedures.
|
||||||
|
*/
|
||||||
|
public class WaitProcedure extends BalanceProcedure {
|
||||||
|
|
||||||
|
private long waitTime;
|
||||||
|
private boolean executed = false;
|
||||||
|
|
||||||
|
public WaitProcedure() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public WaitProcedure(String name, long delay, long waitTime) {
|
||||||
|
super(name, delay);
|
||||||
|
this.waitTime = waitTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean execute() throws IOException {
|
||||||
|
long startTime = Time.monotonicNow();
|
||||||
|
long timeLeft = waitTime;
|
||||||
|
while (timeLeft > 0) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(timeLeft);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
if (isSchedulerShutdown()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
timeLeft = waitTime - (Time.monotonicNow() - startTime);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
executed = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
super.write(out);
|
||||||
|
out.writeLong(waitTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
super.readFields(in);
|
||||||
|
waitTime = in.readLong();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean getExecuted() {
|
||||||
|
return executed;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue