HBASE-22976 [HBCK2] Add RecoveredEditsPlayer (#2504)
Make it so WALPlayer can replay recovered.edits files. hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java Allow for WAL files that do NOT have a startime in their name. Use the 'generic' WAL-filename parser instead of the one that used be local here. Implement support for 'startTime' filter. Previous was just not implemented. hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java Checkstyle. hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java Use the new general WAL name timestamp parser. hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WAL.java Utility for parsing timestamp from WAL filename. hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRecoveredEdits.java Export attributes about the local recovered.edits test file so other tests can play with it. Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
This commit is contained in:
parent
627002d279
commit
45e10d43fe
|
@ -365,7 +365,8 @@ public final class CommonFSUtils {
|
|||
if (!qualifiedWalDir.equals(rootDir)) {
|
||||
if (qualifiedWalDir.toString().startsWith(rootDir.toString() + "/")) {
|
||||
throw new IllegalStateException("Illegal WAL directory specified. " +
|
||||
"WAL directories are not permitted to be under the root directory if set.");
|
||||
"WAL directories are not permitted to be under root directory: rootDir=" +
|
||||
rootDir.toString() + ", qualifiedWALDir=" + qualifiedWalDir);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/**
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
|
@ -22,24 +22,21 @@ import java.io.DataOutput;
|
|||
import java.io.EOFException;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.apache.hadoop.hbase.wal.WALEdit;
|
||||
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
|
||||
import org.apache.hadoop.hbase.wal.WAL;
|
||||
import org.apache.hadoop.hbase.wal.WAL.Entry;
|
||||
import org.apache.hadoop.hbase.wal.WAL.Reader;
|
||||
import org.apache.hadoop.hbase.wal.WALEdit;
|
||||
import org.apache.hadoop.hbase.wal.WALKey;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.mapreduce.InputFormat;
|
||||
|
@ -49,6 +46,9 @@ import org.apache.hadoop.mapreduce.RecordReader;
|
|||
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Simple {@link InputFormat} for {@link org.apache.hadoop.hbase.wal.WAL} files.
|
||||
|
@ -77,10 +77,6 @@ public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
|
|||
* Represent an WALSplit, i.e. a single WAL file.
|
||||
* Start- and EndTime are managed by the split, so that WAL files can be
|
||||
* filtered before WALEdits are passed to the mapper(s).
|
||||
* @param logFileName
|
||||
* @param fileSize
|
||||
* @param startTime
|
||||
* @param endTime
|
||||
*/
|
||||
public WALSplit(String logFileName, long fileSize, long startTime, long endTime) {
|
||||
this.logFileName = logFileName;
|
||||
|
@ -186,7 +182,9 @@ public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
|
|||
|
||||
@Override
|
||||
public boolean nextKeyValue() throws IOException, InterruptedException {
|
||||
if (reader == null) return false;
|
||||
if (reader == null) {
|
||||
return false;
|
||||
}
|
||||
this.currentPos = reader.getPosition();
|
||||
Entry temp;
|
||||
long i = -1;
|
||||
|
@ -204,7 +202,9 @@ public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
|
|||
} while (temp != null && temp.getKey().getWriteTime() < startTime);
|
||||
|
||||
if (temp == null) {
|
||||
if (i > 0) LOG.info("Skipped " + i + " entries.");
|
||||
if (i > 0) {
|
||||
LOG.info("Skipped " + i + " entries.");
|
||||
}
|
||||
LOG.info("Reached end of file.");
|
||||
return false;
|
||||
} else if (i > 0) {
|
||||
|
@ -242,7 +242,9 @@ public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
|
|||
@Override
|
||||
public void close() throws IOException {
|
||||
LOG.info("Closing reader");
|
||||
if (reader != null) this.reader.close();
|
||||
if (reader != null) {
|
||||
this.reader.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -301,40 +303,56 @@ public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
|
|||
inpDirs.split(conf.get(WALPlayer.INPUT_FILES_SEPARATOR_KEY, ",")));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param startTime If file looks like it has a timestamp in its name, we'll check if newer
|
||||
* or equal to this value else we will filter out the file. If name does not
|
||||
* seem to have a timestamp, we will just return it w/o filtering.
|
||||
* @param endTime If file looks like it has a timestamp in its name, we'll check if older or equal
|
||||
* to this value else we will filter out the file. If name does not seem to
|
||||
* have a timestamp, we will just return it w/o filtering.
|
||||
*/
|
||||
private List<FileStatus> getFiles(FileSystem fs, Path dir, long startTime, long endTime)
|
||||
throws IOException {
|
||||
List<FileStatus> result = new ArrayList<>();
|
||||
LOG.debug("Scanning " + dir.toString() + " for WAL files");
|
||||
|
||||
RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(dir);
|
||||
if (!iter.hasNext()) return Collections.emptyList();
|
||||
if (!iter.hasNext()) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
while (iter.hasNext()) {
|
||||
LocatedFileStatus file = iter.next();
|
||||
if (file.isDirectory()) {
|
||||
// recurse into sub directories
|
||||
// Recurse into sub directories
|
||||
result.addAll(getFiles(fs, file.getPath(), startTime, endTime));
|
||||
} else {
|
||||
String name = file.getPath().toString();
|
||||
int idx = name.lastIndexOf('.');
|
||||
if (idx > 0) {
|
||||
try {
|
||||
long fileStartTime = Long.parseLong(name.substring(idx+1));
|
||||
if (fileStartTime <= endTime) {
|
||||
LOG.info("Found: " + file);
|
||||
result.add(file);
|
||||
}
|
||||
} catch (NumberFormatException x) {
|
||||
idx = 0;
|
||||
}
|
||||
}
|
||||
if (idx == 0) {
|
||||
LOG.warn("File " + name + " does not appear to be an WAL file. Skipping...");
|
||||
}
|
||||
addFile(result, file, startTime, endTime);
|
||||
}
|
||||
}
|
||||
// TODO: These results should be sorted? Results could be content of recovered.edits directory
|
||||
// -- null padded increasing numeric -- or a WAL file w/ timestamp suffix or timestamp and
|
||||
// then meta suffix. See AbstractFSWALProvider#WALStartTimeComparator
|
||||
return result;
|
||||
}
|
||||
|
||||
static void addFile(List<FileStatus> result, LocatedFileStatus lfs, long startTime,
|
||||
long endTime) {
|
||||
long timestamp = WAL.getTimestamp(lfs.getPath().getName());
|
||||
if (timestamp > 0) {
|
||||
// Looks like a valid timestamp.
|
||||
if (timestamp <= endTime && timestamp >= startTime) {
|
||||
LOG.info("Found {}", lfs.getPath());
|
||||
result.add(lfs);
|
||||
} else {
|
||||
LOG.info("Skipped {}, outside range [{}/{} - {}/{}]", lfs.getPath(),
|
||||
startTime, Instant.ofEpochMilli(startTime), endTime, Instant.ofEpochMilli(endTime));
|
||||
}
|
||||
} else {
|
||||
// If no timestamp, add it regardless.
|
||||
LOG.info("Found (no-timestamp!) {}", lfs);
|
||||
result.add(lfs);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public RecordReader<WALKey, WALEdit> createRecordReader(InputSplit split,
|
||||
TaskAttemptContext context) throws IOException, InterruptedException {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/**
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
|
@ -22,7 +22,6 @@ import java.text.ParseException;
|
|||
import java.text.SimpleDateFormat;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
@ -279,6 +278,7 @@ public class WALPlayer extends Configured implements Tool {
|
|||
super.cleanup(context);
|
||||
}
|
||||
|
||||
@SuppressWarnings("checkstyle:EmptyBlock")
|
||||
@Override
|
||||
public void setup(Context context) throws IOException {
|
||||
String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY);
|
||||
|
@ -409,17 +409,21 @@ public class WALPlayer extends Configured implements Tool {
|
|||
System.err.println(" <WAL inputdir> directory of WALs to replay.");
|
||||
System.err.println(" <tables> comma separated list of tables. If no tables specified,");
|
||||
System.err.println(" all are imported (even hbase:meta if present).");
|
||||
System.err.println(" <tableMappings> WAL entries can be mapped to a new set of tables by passing");
|
||||
System.err.println(" <tableMappings>, a comma separated list of target tables.");
|
||||
System.err.println(" If specified, each table in <tables> must have a mapping.");
|
||||
System.err.println(" <tableMappings> WAL entries can be mapped to a new set of tables by " +
|
||||
"passing");
|
||||
System.err.println(" <tableMappings>, a comma separated list of target " +
|
||||
"tables.");
|
||||
System.err.println(" If specified, each table in <tables> must have a " +
|
||||
"mapping.");
|
||||
System.err.println("To generate HFiles to bulk load instead of loading HBase directly, pass:");
|
||||
System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
|
||||
System.err.println(" Only one table can be specified, and no mapping allowed!");
|
||||
System.err.println("To specify a time range, pass:");
|
||||
System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]");
|
||||
System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]");
|
||||
System.err.println(" The start and the end date of timerange. The dates can be expressed");
|
||||
System.err.println(" in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format.");
|
||||
System.err.println(" The start and the end date of timerange (inclusive). The dates can be");
|
||||
System.err.println(" expressed in milliseconds-since-epoch or yyyy-MM-dd'T'HH:mm:ss.SS " +
|
||||
"format.");
|
||||
System.err.println(" E.g. 1234567890120 or 2009-02-13T23:32:30.12");
|
||||
System.err.println("Other options:");
|
||||
System.err.println(" -D" + JOB_NAME_CONF_KEY + "=jobName");
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapreduce;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.testclassification.MapReduceTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
@Category({ MapReduceTests.class, SmallTests.class})
|
||||
public class TestWALInputFormat {
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestWALInputFormat.class);
|
||||
|
||||
/**
|
||||
* Test the primitive start/end time filtering.
|
||||
*/
|
||||
@Test
|
||||
public void testAddFile() {
|
||||
List<FileStatus> lfss = new ArrayList<>();
|
||||
LocatedFileStatus lfs = Mockito.mock(LocatedFileStatus.class);
|
||||
long now = System.currentTimeMillis();
|
||||
Mockito.when(lfs.getPath()).thenReturn(new Path("/name." + now));
|
||||
WALInputFormat.addFile(lfss, lfs, now, now);
|
||||
assertEquals(1, lfss.size());
|
||||
WALInputFormat.addFile(lfss, lfs, now - 1, now - 1);
|
||||
assertEquals(1, lfss.size());
|
||||
WALInputFormat.addFile(lfss, lfs, now - 2, now - 1);
|
||||
assertEquals(1, lfss.size());
|
||||
WALInputFormat.addFile(lfss, lfs, now - 2, now);
|
||||
assertEquals(2, lfss.size());
|
||||
WALInputFormat.addFile(lfss, lfs, Long.MIN_VALUE, now);
|
||||
assertEquals(3, lfss.size());
|
||||
WALInputFormat.addFile(lfss, lfs, Long.MIN_VALUE, Long.MAX_VALUE);
|
||||
assertEquals(4, lfss.size());
|
||||
WALInputFormat.addFile(lfss, lfs, now, now + 2);
|
||||
assertEquals(5, lfss.size());
|
||||
WALInputFormat.addFile(lfss, lfs, now + 1, now + 2);
|
||||
assertEquals(5, lfss.size());
|
||||
Mockito.when(lfs.getPath()).thenReturn(new Path("/name"));
|
||||
WALInputFormat.addFile(lfss, lfs, Long.MIN_VALUE, Long.MAX_VALUE);
|
||||
assertEquals(6, lfss.size());
|
||||
Mockito.when(lfs.getPath()).thenReturn(new Path("/name.123"));
|
||||
WALInputFormat.addFile(lfss, lfs, Long.MIN_VALUE, Long.MAX_VALUE);
|
||||
assertEquals(7, lfss.size());
|
||||
Mockito.when(lfs.getPath()).thenReturn(new Path("/name." + now + ".meta"));
|
||||
WALInputFormat.addFile(lfss, lfs, now, now);
|
||||
assertEquals(8, lfss.size());
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
/**
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
|
@ -24,8 +24,8 @@ import static org.mockito.ArgumentMatchers.any;
|
|||
import static org.mockito.Mockito.doAnswer;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.client.Result;
|
|||
import org.apache.hadoop.hbase.client.Table;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.mapreduce.WALPlayer.WALKeyValueMapper;
|
||||
import org.apache.hadoop.hbase.regionserver.TestRecoveredEdits;
|
||||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||
import org.apache.hadoop.hbase.testclassification.MapReduceTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
@ -73,7 +74,6 @@ import org.mockito.stubbing.Answer;
|
|||
@Category({MapReduceTests.class, LargeTests.class})
|
||||
//TODO : Remove this in 3.0
|
||||
public class TestWALPlayer {
|
||||
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestWALPlayer.class);
|
||||
|
@ -91,7 +91,7 @@ public class TestWALPlayer {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
conf= TEST_UTIL.getConfiguration();
|
||||
conf = TEST_UTIL.getConfiguration();
|
||||
rootDir = TEST_UTIL.createRootDir();
|
||||
walRootDir = TEST_UTIL.createWALRootDir();
|
||||
fs = CommonFSUtils.getRootDirFileSystem(conf);
|
||||
|
@ -106,9 +106,32 @@ public class TestWALPlayer {
|
|||
logFs.delete(walRootDir, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that WALPlayer can replay recovered.edits files.
|
||||
*/
|
||||
@Test
|
||||
public void testPlayingRecoveredEdit() throws Exception {
|
||||
TableName tn = TableName.valueOf(TestRecoveredEdits.RECOVEREDEDITS_TABLENAME);
|
||||
TEST_UTIL.createTable(tn, TestRecoveredEdits.RECOVEREDEDITS_COLUMNFAMILY);
|
||||
// Copy testing recovered.edits file that is over under hbase-server test resources
|
||||
// up into a dir in our little hdfs cluster here.
|
||||
String hbaseServerTestResourcesEdits = System.getProperty("test.build.classes") +
|
||||
"/../../../hbase-server/src/test/resources/" +
|
||||
TestRecoveredEdits.RECOVEREDEDITS_PATH.getName();
|
||||
assertTrue(new File(hbaseServerTestResourcesEdits).exists());
|
||||
FileSystem dfs = TEST_UTIL.getDFSCluster().getFileSystem();
|
||||
// Target dir.
|
||||
Path targetDir = new Path("edits").makeQualified(dfs.getUri(), dfs.getHomeDirectory());
|
||||
assertTrue(dfs.mkdirs(targetDir));
|
||||
dfs.copyFromLocalFile(new Path(hbaseServerTestResourcesEdits), targetDir);
|
||||
assertEquals(0,
|
||||
ToolRunner.run(new WALPlayer(this.conf), new String [] {targetDir.toString()}));
|
||||
// I don't know how many edits are in this file for this table... so just check more than 1.
|
||||
assertTrue(TEST_UTIL.countRows(tn) > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple end-to-end test
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testWALPlayer() throws Exception {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/**
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
|
@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.mapreduce;
|
|||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.NavigableMap;
|
||||
import java.util.TreeMap;
|
||||
|
@ -42,6 +41,8 @@ import org.apache.hadoop.hbase.testclassification.MapReduceTests;
|
|||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.CommonFSUtils;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
|
||||
import org.apache.hadoop.hbase.wal.WAL;
|
||||
import org.apache.hadoop.hbase.wal.WALEdit;
|
||||
import org.apache.hadoop.hbase.wal.WALFactory;
|
||||
|
@ -122,8 +123,7 @@ public class TestWALRecordReader {
|
|||
}
|
||||
|
||||
/**
|
||||
* Test partial reads from the log based on passed time range
|
||||
* @throws Exception
|
||||
* Test partial reads from the WALs based on passed time range.
|
||||
*/
|
||||
@Test
|
||||
public void testPartialRead() throws Exception {
|
||||
|
@ -139,6 +139,7 @@ public class TestWALRecordReader {
|
|||
edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"), ts+1, value));
|
||||
log.appendData(info, getWalKeyImpl(ts+1, scopes), edit);
|
||||
log.sync();
|
||||
Threads.sleep(10);
|
||||
LOG.info("Before 1st WAL roll " + log.toString());
|
||||
log.rollWriter();
|
||||
LOG.info("Past 1st WAL roll " + log.toString());
|
||||
|
@ -163,26 +164,29 @@ public class TestWALRecordReader {
|
|||
jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
|
||||
jobConf.setLong(WALInputFormat.END_TIME_KEY, ts);
|
||||
|
||||
// only 1st file is considered, and only its 1st entry is used
|
||||
// Only 1st file is considered, and only its 1st entry is in-range.
|
||||
List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
|
||||
|
||||
assertEquals(1, splits.size());
|
||||
testSplit(splits.get(0), Bytes.toBytes("1"));
|
||||
|
||||
jobConf.setLong(WALInputFormat.START_TIME_KEY, ts+1);
|
||||
jobConf.setLong(WALInputFormat.END_TIME_KEY, ts1+1);
|
||||
splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
|
||||
// both files need to be considered
|
||||
assertEquals(2, splits.size());
|
||||
// only the 2nd entry from the 1st file is used
|
||||
testSplit(splits.get(0), Bytes.toBytes("2"));
|
||||
// only the 1nd entry from the 2nd file is used
|
||||
// Both entries from first file are in-range.
|
||||
testSplit(splits.get(0), Bytes.toBytes("1"), Bytes.toBytes("2"));
|
||||
// Only the 1st entry from the 2nd file is in-range.
|
||||
testSplit(splits.get(1), Bytes.toBytes("3"));
|
||||
|
||||
jobConf.setLong(WALInputFormat.START_TIME_KEY, ts + 1);
|
||||
jobConf.setLong(WALInputFormat.END_TIME_KEY, ts1 + 1);
|
||||
splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
|
||||
assertEquals(1, splits.size());
|
||||
// Only the 1st entry from the 2nd file is in-range.
|
||||
testSplit(splits.get(0), Bytes.toBytes("3"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test basic functionality
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testWALRecordReader() throws Exception {
|
||||
|
@ -233,11 +237,7 @@ public class TestWALRecordReader {
|
|||
jobConf.setLong(WALInputFormat.END_TIME_KEY, Long.MAX_VALUE);
|
||||
jobConf.setLong(WALInputFormat.START_TIME_KEY, thirdTs);
|
||||
splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
|
||||
// both logs need to be considered
|
||||
assertEquals(2, splits.size());
|
||||
// but both readers skip all edits
|
||||
testSplit(splits.get(0));
|
||||
testSplit(splits.get(1));
|
||||
assertTrue(splits.isEmpty());
|
||||
}
|
||||
|
||||
protected WALKeyImpl getWalKeyImpl(final long time, NavigableMap<byte[], Integer> scopes) {
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.wal;
|
||||
|
||||
import static org.apache.commons.lang3.StringUtils.isNumeric;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -440,17 +439,12 @@ public abstract class AbstractFSWALProvider<T extends AbstractFSWAL<?>> implemen
|
|||
* @return start time
|
||||
*/
|
||||
private static long getTS(Path p) {
|
||||
String name = p.getName();
|
||||
String [] splits = name.split("\\.");
|
||||
String ts = splits[splits.length - 1];
|
||||
if (!isNumeric(ts)) {
|
||||
// Its a '.meta' or a '.syncrep' suffix.
|
||||
ts = splits[splits.length - 2];
|
||||
}
|
||||
return Long.parseLong(ts);
|
||||
return WAL.getTimestamp(p.getName());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static boolean isArchivedLogFile(Path p) {
|
||||
String oldLog = Path.SEPARATOR + HConstants.HREGION_OLDLOGDIR_NAME + Path.SEPARATOR;
|
||||
return p.toString().contains(oldLog);
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.yetus.audience.InterfaceAudience;
|
|||
import org.apache.yetus.audience.InterfaceStability;
|
||||
|
||||
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
|
||||
import static org.apache.commons.lang3.StringUtils.isNumeric;
|
||||
|
||||
/**
|
||||
* A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides
|
||||
|
@ -312,4 +313,32 @@ public interface WAL extends Closeable, WALFileLengthProvider {
|
|||
return this.key + "=" + this.edit;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Split a WAL filename to get a start time. WALs usually have the time we start writing to them
|
||||
* as part of their name, usually the suffix. Sometimes there will be an extra suffix as when it
|
||||
* is a WAL for the meta table. For example, WALs might look like this
|
||||
* <code>10.20.20.171%3A60020.1277499063250</code> where <code>1277499063250</code> is the
|
||||
* timestamp. Could also be a meta WAL which adds a '.meta' suffix or a
|
||||
* synchronous replication WAL which adds a '.syncrep' suffix. Check for these. File also may have
|
||||
* no timestamp on it. For example the recovered.edits files are WALs but are named in ascending
|
||||
* order. Here is an example: 0000000000000016310. Allow for this.
|
||||
* @param name Name of the WAL file.
|
||||
* @return Timestamp or -1.
|
||||
*/
|
||||
public static long getTimestamp(String name) {
|
||||
String [] splits = name.split("\\.");
|
||||
if (splits.length <= 1) {
|
||||
return -1;
|
||||
}
|
||||
String timestamp = splits[splits.length - 1];
|
||||
if (!isNumeric(timestamp)) {
|
||||
// Its a '.meta' or a '.syncrep' suffix.
|
||||
timestamp = splits[splits.length - 2];
|
||||
if (!isNumeric(timestamp)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return Long.parseLong(timestamp);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/**
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
|
@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.regionserver;
|
|||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
@ -32,12 +31,13 @@ import org.apache.hadoop.hbase.CellComparatorImpl;
|
|||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.MemoryCompactionPolicy;
|
||||
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
|
||||
|
@ -78,6 +78,32 @@ public class TestRecoveredEdits {
|
|||
|
||||
@Rule public TestName testName = new TestName();
|
||||
|
||||
/**
|
||||
* Path to a recovered.edits file in hbase-server test resources folder.
|
||||
* This is a little fragile getting this path to a file of 10M of edits.
|
||||
*/
|
||||
@SuppressWarnings("checkstyle:VisibilityModifier")
|
||||
public static final Path RECOVEREDEDITS_PATH = new Path(
|
||||
System.getProperty("test.build.classes", "target/test-classes"),
|
||||
"0000000000000016310");
|
||||
|
||||
/**
|
||||
* Name of table referenced by edits in the recovered.edits file.
|
||||
*/
|
||||
public static final String RECOVEREDEDITS_TABLENAME = "IntegrationTestBigLinkedList";
|
||||
|
||||
/**
|
||||
* Column family referenced by edits in the recovered.edits file.
|
||||
*/
|
||||
public static final byte [] RECOVEREDEDITS_COLUMNFAMILY = Bytes.toBytes("meta");
|
||||
public static final byte[][] RECOVEREDITS_COLUMNFAMILY_ARRAY =
|
||||
new byte[][] {RECOVEREDEDITS_COLUMNFAMILY};
|
||||
public static final ColumnFamilyDescriptor RECOVEREDEDITS_CFD =
|
||||
ColumnFamilyDescriptorBuilder.newBuilder(RECOVEREDEDITS_COLUMNFAMILY).build();
|
||||
|
||||
/**
|
||||
* Name of table mentioned edits from recovered.edits
|
||||
*/
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
blockCache = BlockCacheFactory.createBlockCache(TEST_UTIL.getConfiguration());
|
||||
|
@ -88,7 +114,6 @@ public class TestRecoveredEdits {
|
|||
* Create a region. Close it. Then copy into place a file to replay, one that is bigger than
|
||||
* configured flush size so we bring on lots of flushes. Then reopen and confirm all edits
|
||||
* made it in.
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testReplayWorksThoughLotsOfFlushing() throws
|
||||
|
@ -104,33 +129,11 @@ public class TestRecoveredEdits {
|
|||
// Set it so we flush every 1M or so. Thats a lot.
|
||||
conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024*1024);
|
||||
conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY, String.valueOf(policy).toLowerCase());
|
||||
// The file of recovered edits has a column family of 'meta'. Also has an encoded regionname
|
||||
// of 4823016d8fca70b25503ee07f4c6d79f which needs to match on replay.
|
||||
final String encodedRegionName = "4823016d8fca70b25503ee07f4c6d79f";
|
||||
final String columnFamily = "meta";
|
||||
byte [][] columnFamilyAsByteArray = new byte [][] {Bytes.toBytes(columnFamily)};
|
||||
TableDescriptor tableDescriptor =
|
||||
TableDescriptorBuilder.newBuilder(TableName.valueOf(testName.getMethodName()))
|
||||
.setColumnFamily(
|
||||
ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily)).build())
|
||||
.build();
|
||||
RegionInfo hri = new HRegionInfo(tableDescriptor.getTableName()) {
|
||||
@Override
|
||||
public synchronized String getEncodedName() {
|
||||
return encodedRegionName;
|
||||
}
|
||||
|
||||
// Cache the name because lots of lookups.
|
||||
private byte[] encodedRegionNameAsBytes = null;
|
||||
|
||||
@Override
|
||||
public synchronized byte[] getEncodedNameAsBytes() {
|
||||
if (encodedRegionNameAsBytes == null) {
|
||||
this.encodedRegionNameAsBytes = Bytes.toBytes(getEncodedName());
|
||||
}
|
||||
return this.encodedRegionNameAsBytes;
|
||||
}
|
||||
};
|
||||
TableDescriptor tableDescriptor = TableDescriptorBuilder.
|
||||
newBuilder(TableName.valueOf(testName.getMethodName())).
|
||||
setColumnFamily(RECOVEREDEDITS_CFD) .build();
|
||||
RegionInfo hri = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
|
||||
final String encodedRegionName = hri.getEncodedName();
|
||||
Path hbaseRootDir = TEST_UTIL.getDataTestDir();
|
||||
FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
|
||||
Path tableDir = CommonFSUtils.getTableDir(hbaseRootDir, tableDescriptor.getTableName());
|
||||
|
@ -143,24 +146,20 @@ public class TestRecoveredEdits {
|
|||
HRegion region = HBaseTestingUtility
|
||||
.createRegionAndWAL(hri, hbaseRootDir, conf, tableDescriptor, blockCache);
|
||||
assertEquals(encodedRegionName, region.getRegionInfo().getEncodedName());
|
||||
List<String> storeFiles = region.getStoreFileList(columnFamilyAsByteArray);
|
||||
List<String> storeFiles = region.getStoreFileList(RECOVEREDITS_COLUMNFAMILY_ARRAY);
|
||||
// There should be no store files.
|
||||
assertTrue(storeFiles.isEmpty());
|
||||
region.close();
|
||||
Path regionDir = FSUtils.getRegionDirFromRootDir(hbaseRootDir, hri);
|
||||
Path recoveredEditsDir = WALSplitUtil.getRegionDirRecoveredEditsDir(regionDir);
|
||||
// This is a little fragile getting this path to a file of 10M of edits.
|
||||
Path recoveredEditsFile = new Path(
|
||||
System.getProperty("test.build.classes", "target/test-classes"),
|
||||
"0000000000000016310");
|
||||
// Copy this file under the region's recovered.edits dir so it is replayed on reopen.
|
||||
Path destination = new Path(recoveredEditsDir, recoveredEditsFile.getName());
|
||||
fs.copyToLocalFile(recoveredEditsFile, destination);
|
||||
Path destination = new Path(recoveredEditsDir, RECOVEREDEDITS_PATH.getName());
|
||||
fs.copyToLocalFile(RECOVEREDEDITS_PATH, destination);
|
||||
assertTrue(fs.exists(destination));
|
||||
// Now the file 0000000000000016310 is under recovered.edits, reopen the region to replay.
|
||||
region = HRegion.openHRegion(region, null);
|
||||
assertEquals(encodedRegionName, region.getRegionInfo().getEncodedName());
|
||||
storeFiles = region.getStoreFileList(columnFamilyAsByteArray);
|
||||
storeFiles = region.getStoreFileList(RECOVEREDITS_COLUMNFAMILY_ARRAY);
|
||||
// Our 0000000000000016310 is 10MB. Most of the edits are for one region. Lets assume that if
|
||||
// we flush at 1MB, that there are at least 3 flushed files that are there because of the
|
||||
// replay of edits.
|
||||
|
@ -170,19 +169,16 @@ public class TestRecoveredEdits {
|
|||
assertTrue("Files count=" + storeFiles.size(), storeFiles.size() > 10);
|
||||
}
|
||||
// Now verify all edits made it into the region.
|
||||
int count = verifyAllEditsMadeItIn(fs, conf, recoveredEditsFile, region);
|
||||
int count = verifyAllEditsMadeItIn(fs, conf, RECOVEREDEDITS_PATH, region);
|
||||
assertTrue(count > 0);
|
||||
LOG.info("Checked " + count + " edits made it in");
|
||||
}
|
||||
|
||||
/**
|
||||
* @param fs
|
||||
* @param conf
|
||||
* @param edits
|
||||
* @param region
|
||||
* @return Return how many edits seen.
|
||||
* @throws IOException
|
||||
*/
|
||||
private int verifyAllEditsMadeItIn(final FileSystem fs, final Configuration conf,
|
||||
// Used by TestWALPlayer over in hbase-mapreduce too.
|
||||
public static int verifyAllEditsMadeItIn(final FileSystem fs, final Configuration conf,
|
||||
final Path edits, final HRegion region) throws IOException {
|
||||
int count = 0;
|
||||
// Read all cells from recover edits
|
||||
|
|
|
@ -424,32 +424,11 @@ See <<hfile_tool>>.
|
|||
For bulk replaying WAL files or _recovered.edits_ files, see
|
||||
<<walplayer>>. For reading/verifying individual files, read on.
|
||||
|
||||
[[hlog_tool]]
|
||||
==== FSHLog tool
|
||||
|
||||
The main method on `FSHLog` offers manual split and dump facilities.
|
||||
Pass it WALs or the product of a split, the content of the _recovered.edits_.
|
||||
directory.
|
||||
|
||||
You can get a textual dump of a WAL file content by doing the following:
|
||||
|
||||
----
|
||||
$ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.FSHLog --dump hdfs://example.org:8020/hbase/WALs/example.org,60020,1283516293161/10.10.21.10%3A60020.1283973724012
|
||||
----
|
||||
|
||||
The return code will be non-zero if there are any issues with the file so you can test wholesomeness of file by redirecting `STDOUT` to `/dev/null` and testing the program return.
|
||||
|
||||
Similarly you can force a split of a log file directory by doing:
|
||||
|
||||
----
|
||||
$ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.FSHLog --split hdfs://example.org:8020/hbase/WALs/example.org,60020,1283516293161/
|
||||
----
|
||||
|
||||
[[hlog_tool.prettyprint]]
|
||||
===== WALPrettyPrinter
|
||||
==== WALPrettyPrinter
|
||||
|
||||
The `WALPrettyPrinter` is a tool with configurable options to print the contents of a WAL.
|
||||
You can invoke it via the HBase cli with the 'wal' command.
|
||||
The `WALPrettyPrinter` is a tool with configurable options to print the contents of a WAL
|
||||
or a _recovered.edits_ file. You can invoke it via the HBase cli with the 'wal' command.
|
||||
|
||||
----
|
||||
$ ./bin/hbase wal hdfs://example.org:8020/hbase/WALs/example.org,60020,1283516293161/10.10.21.10%3A60020.1283973724012
|
||||
|
@ -904,7 +883,10 @@ The output can optionally be mapped to another set of tables.
|
|||
|
||||
WALPlayer can also generate HFiles for later bulk importing, in that case only a single table and no mapping can be specified.
|
||||
|
||||
.WALPrettyPrinter/FSHLog Tool
|
||||
Finally, you can use WALPlayer to replay the content of a Regions `recovered.edits` directory (the files under
|
||||
`recovered.edits` directory have the same format as WAL files).
|
||||
|
||||
.WALPrettyPrinter
|
||||
[NOTE]
|
||||
====
|
||||
To read or verify single WAL files or _recovered.edits_ files, since they share the WAL format,
|
||||
|
@ -945,8 +927,8 @@ To generate HFiles to bulk load instead of loading HBase directly, pass:
|
|||
To specify a time range, pass:
|
||||
-Dwal.start.time=[date|ms]
|
||||
-Dwal.end.time=[date|ms]
|
||||
The start and the end date of timerange. The dates can be expressed
|
||||
in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format.
|
||||
The start and the end date of timerange (inclusive). The dates can be
|
||||
expressed in milliseconds-since-epoch or yyyy-MM-dd'T'HH:mm:ss.SS format.
|
||||
E.g. 1234567890120 or 2009-02-13T23:32:30.12
|
||||
Other options:
|
||||
-Dmapreduce.job.name=jobName
|
||||
|
|
Loading…
Reference in New Issue