HBASE-26700 The way we bypass broken track file is not enough in StoreFileListFile (#4055)

Signed-off-by: Wellington Ramos Chevreuil <wchevreuil@apache.org>
This commit is contained in:
Duo Zhang 2022-01-25 14:51:11 +08:00 committed by GitHub
parent d77ede4a29
commit 8251bd566a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 216 additions and 25 deletions

View File

@ -17,8 +17,10 @@
*/ */
package org.apache.hadoop.hbase.regionserver.storefiletracker; package org.apache.hadoop.hbase.regionserver.storefiletracker;
import java.io.EOFException;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.zip.CRC32;
import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -29,9 +31,6 @@ import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.io.ByteStreams;
import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
import org.apache.hadoop.hbase.shaded.protobuf.generated.StoreFileTrackerProtos.StoreFileList; import org.apache.hadoop.hbase.shaded.protobuf.generated.StoreFileTrackerProtos.StoreFileList;
/** /**
@ -42,18 +41,27 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.StoreFileTrackerProtos.
* other file. * other file.
* <p/> * <p/>
* So in this way, we could avoid listing when we want to load the store file list file. * So in this way, we could avoid listing when we want to load the store file list file.
* <p/>
* To prevent loading partial file, we use the first 4 bytes as file length, and also append a 4
* bytes crc32 checksum at the end. This is because the protobuf message parser sometimes can return
* without error on partial bytes if you stop at some special points, but the return message will
* have incorrect field value. We should try our best to prevent this happens because loading an
* incorrect store file list file usually leads to data loss.
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
class StoreFileListFile { class StoreFileListFile {
private static final Logger LOG = LoggerFactory.getLogger(StoreFileListFile.class); private static final Logger LOG = LoggerFactory.getLogger(StoreFileListFile.class);
private static final String TRACK_FILE_DIR = ".filelist"; static final String TRACK_FILE_DIR = ".filelist";
private static final String TRACK_FILE = "f1"; private static final String TRACK_FILE = "f1";
private static final String TRACK_FILE_ROTATE = "f2"; private static final String TRACK_FILE_ROTATE = "f2";
// 16 MB, which is big enough for a tracker file
private static final int MAX_FILE_SIZE = 16 * 1024 * 1024;
private final StoreContext ctx; private final StoreContext ctx;
private final Path trackFileDir; private final Path trackFileDir;
@ -74,16 +82,26 @@ class StoreFileListFile {
private StoreFileList load(Path path) throws IOException { private StoreFileList load(Path path) throws IOException {
FileSystem fs = ctx.getRegionFileSystem().getFileSystem(); FileSystem fs = ctx.getRegionFileSystem().getFileSystem();
byte[] bytes; byte[] data;
int expectedChecksum;
try (FSDataInputStream in = fs.open(path)) { try (FSDataInputStream in = fs.open(path)) {
bytes = ByteStreams.toByteArray(in); int length = in.readInt();
if (length <= 0 || length > MAX_FILE_SIZE) {
throw new IOException("Invalid file length " + length +
", either less than 0 or greater then max allowed size " + MAX_FILE_SIZE);
}
data = new byte[length];
in.readFully(data);
expectedChecksum = in.readInt();
} }
// Read all the bytes and then parse it, so we will only throw InvalidProtocolBufferException CRC32 crc32 = new CRC32();
// here. This is very important for upper layer to determine whether this is the normal case, crc32.update(data);
// where the file does not exist or is incomplete. If there is another type of exception, the int calculatedChecksum = (int) crc32.getValue();
// upper layer should throw it out instead of just ignoring it, otherwise it will lead to data if (expectedChecksum != calculatedChecksum) {
// loss. throw new IOException(
return StoreFileList.parseFrom(bytes); "Checksum mismatch, expected " + expectedChecksum + ", actual " + calculatedChecksum);
}
return StoreFileList.parseFrom(data);
} }
private int select(StoreFileList[] lists) { private int select(StoreFileList[] lists) {
@ -101,9 +119,9 @@ class StoreFileListFile {
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
try { try {
lists[i] = load(trackFiles[i]); lists[i] = load(trackFiles[i]);
} catch (FileNotFoundException | InvalidProtocolBufferException e) { } catch (FileNotFoundException | EOFException e) {
// this is normal case, so use info and do not log stacktrace // this is normal case, so use info and do not log stacktrace
LOG.info("Failed to load track file {}: {}", trackFiles[i], e); LOG.info("Failed to load track file {}: {}", trackFiles[i], e.toString());
} }
} }
int winnerIndex = select(lists); int winnerIndex = select(lists);
@ -124,10 +142,17 @@ class StoreFileListFile {
// we need to call load first to load the prevTimestamp and also the next file // we need to call load first to load the prevTimestamp and also the next file
load(); load();
} }
FileSystem fs = ctx.getRegionFileSystem().getFileSystem();
long timestamp = Math.max(prevTimestamp + 1, EnvironmentEdgeManager.currentTime()); long timestamp = Math.max(prevTimestamp + 1, EnvironmentEdgeManager.currentTime());
byte[] actualData = builder.setTimestamp(timestamp).build().toByteArray();
CRC32 crc32 = new CRC32();
crc32.update(actualData);
int checksum = (int) crc32.getValue();
// 4 bytes length at the beginning, plus 4 bytes checksum
FileSystem fs = ctx.getRegionFileSystem().getFileSystem();
try (FSDataOutputStream out = fs.create(trackFiles[nextTrackFile], true)) { try (FSDataOutputStream out = fs.create(trackFiles[nextTrackFile], true)) {
builder.setTimestamp(timestamp).build().writeTo(out); out.writeInt(actualData.length);
out.write(actualData);
out.writeInt(checksum);
} }
// record timestamp // record timestamp
prevTimestamp = timestamp; prevTimestamp = timestamp;

View File

@ -40,7 +40,7 @@ import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
import org.apache.hadoop.hbase.regionserver.storefiletracker.TestStoreFileTracker; import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerForTest;
import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
@ -96,7 +96,7 @@ public class TestCreateTableProcedure extends TestTableDDLProcedureBase {
final TableName tableName = TableName.valueOf(name.getMethodName()); final TableName tableName = TableName.valueOf(name.getMethodName());
ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
TableDescriptor htd = MasterProcedureTestingUtility.createHTD(tableName, F1); TableDescriptor htd = MasterProcedureTestingUtility.createHTD(tableName, F1);
String trackerName = TestStoreFileTracker.class.getName(); String trackerName = StoreFileTrackerForTest.class.getName();
htd = TableDescriptorBuilder.newBuilder(htd).setValue(TRACKER_IMPL, trackerName).build(); htd = TableDescriptorBuilder.newBuilder(htd).setValue(TRACKER_IMPL, trackerName).build();
RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, null); RegionInfo[] regions = ModifyRegionUtils.createRegionInfos(htd, null);
long procId = ProcedureTestingUtility.submitAndWait(procExec, long procId = ProcedureTestingUtility.submitAndWait(procExec,

View File

@ -43,7 +43,7 @@ import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.regionserver.storefiletracker.TestStoreFileTracker; import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerForTest;
import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
@ -86,13 +86,13 @@ public class TestMergesSplitsAddToTracker {
@Before @Before
public void setup(){ public void setup(){
TestStoreFileTracker.clear(); StoreFileTrackerForTest.clear();
} }
private TableName createTable(byte[] splitKey) throws IOException { private TableName createTable(byte[] splitKey) throws IOException {
TableDescriptor td = TableDescriptorBuilder.newBuilder(name.getTableName()) TableDescriptor td = TableDescriptorBuilder.newBuilder(name.getTableName())
.setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY_NAME)) .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY_NAME))
.setValue(TRACKER_IMPL, TestStoreFileTracker.class.getName()).build(); .setValue(TRACKER_IMPL, StoreFileTrackerForTest.class.getName()).build();
if (splitKey != null) { if (splitKey != null) {
TEST_UTIL.getAdmin().createTable(td, new byte[][] { splitKey }); TEST_UTIL.getAdmin().createTable(td, new byte[][] { splitKey });
} else { } else {
@ -241,7 +241,8 @@ public class TestMergesSplitsAddToTracker {
private void verifyFilesAreTracked(Path regionDir, FileSystem fs) throws Exception { private void verifyFilesAreTracked(Path regionDir, FileSystem fs) throws Exception {
for (FileStatus f : fs.listStatus(new Path(regionDir, FAMILY_NAME_STR))) { for (FileStatus f : fs.listStatus(new Path(regionDir, FAMILY_NAME_STR))) {
assertTrue(TestStoreFileTracker.tracked(regionDir.getName(), FAMILY_NAME_STR, f.getPath())); assertTrue(
StoreFileTrackerForTest.tracked(regionDir.getName(), FAMILY_NAME_STR, f.getPath()));
} }
} }

View File

@ -32,14 +32,14 @@ import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
public class TestStoreFileTracker extends DefaultStoreFileTracker { public class StoreFileTrackerForTest extends DefaultStoreFileTracker {
private static final Logger LOG = LoggerFactory.getLogger(TestStoreFileTracker.class); private static final Logger LOG = LoggerFactory.getLogger(StoreFileTrackerForTest.class);
private static ConcurrentMap<String, BlockingQueue<StoreFileInfo>> trackedFiles = private static ConcurrentMap<String, BlockingQueue<StoreFileInfo>> trackedFiles =
new ConcurrentHashMap<>(); new ConcurrentHashMap<>();
private String storeId; private String storeId;
public TestStoreFileTracker(Configuration conf, boolean isPrimaryReplica, StoreContext ctx) { public StoreFileTrackerForTest(Configuration conf, boolean isPrimaryReplica, StoreContext ctx) {
super(conf, isPrimaryReplica, ctx); super(conf, isPrimaryReplica, ctx);
if (ctx != null && ctx.getRegionFileSystem() != null) { if (ctx != null && ctx.getRegionFileSystem() != null) {
this.storeId = ctx.getRegionInfo().getEncodedName() + "-" + ctx.getFamily().getNameAsString(); this.storeId = ctx.getRegionInfo().getEncodedName() + "-" + ctx.getFamily().getNameAsString();

View File

@ -0,0 +1,165 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver.storefiletracker;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThrows;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.IOException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseCommonTestingUtil;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.StoreContext;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.io.ByteStreams;
import org.apache.hadoop.hbase.shaded.protobuf.generated.StoreFileTrackerProtos.StoreFileList;
@Category({ RegionServerTests.class, SmallTests.class })
public class TestStoreFileListFile {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestStoreFileListFile.class);
private static final Logger LOG = LoggerFactory.getLogger(TestStoreFileListFile.class);
private static final HBaseCommonTestingUtil UTIL = new HBaseCommonTestingUtil();
private Path testDir;
private StoreFileListFile storeFileListFile;
@Rule
public TestName name = new TestName();
@Before
public void setUp() throws IOException {
testDir = UTIL.getDataTestDir(name.getMethodName());
HRegionFileSystem hfs = mock(HRegionFileSystem.class);
when(hfs.getFileSystem()).thenReturn(FileSystem.get(UTIL.getConfiguration()));
StoreContext ctx = StoreContext.getBuilder().withFamilyStoreDirectoryPath(testDir)
.withRegionFileSystem(hfs).build();
storeFileListFile = new StoreFileListFile(ctx);
}
@AfterClass
public static void tearDown() {
UTIL.cleanupTestDir();
}
@Test
public void testEmptyLoad() throws IOException {
assertNull(storeFileListFile.load());
}
private FileStatus getOnlyTrackerFile(FileSystem fs) throws IOException {
return fs.listStatus(new Path(testDir, StoreFileListFile.TRACK_FILE_DIR))[0];
}
private byte[] readAll(FileSystem fs, Path file) throws IOException {
try (FSDataInputStream in = fs.open(file)) {
return ByteStreams.toByteArray(in);
}
}
private void write(FileSystem fs, Path file, byte[] buf, int off, int len) throws IOException {
try (FSDataOutputStream out = fs.create(file, true)) {
out.write(buf, off, len);
}
}
@Test
public void testLoadPartial() throws IOException {
StoreFileList.Builder builder = StoreFileList.newBuilder();
storeFileListFile.update(builder);
FileSystem fs = FileSystem.get(UTIL.getConfiguration());
FileStatus trackerFileStatus = getOnlyTrackerFile(fs);
// truncate it so we do not have enough data
LOG.info("Truncate file {} with size {} to {}", trackerFileStatus.getPath(),
trackerFileStatus.getLen(), trackerFileStatus.getLen() / 2);
byte[] content = readAll(fs, trackerFileStatus.getPath());
write(fs, trackerFileStatus.getPath(), content, 0, content.length / 2);
assertNull(storeFileListFile.load());
}
private void writeInt(byte[] buf, int off, int value) {
byte[] b = Bytes.toBytes(value);
for (int i = 0; i < 4; i++) {
buf[off + i] = b[i];
}
}
@Test
public void testZeroFileLength() throws IOException {
StoreFileList.Builder builder = StoreFileList.newBuilder();
storeFileListFile.update(builder);
FileSystem fs = FileSystem.get(UTIL.getConfiguration());
FileStatus trackerFileStatus = getOnlyTrackerFile(fs);
// write a zero length
byte[] content = readAll(fs, trackerFileStatus.getPath());
writeInt(content, 0, 0);
write(fs, trackerFileStatus.getPath(), content, 0, content.length);
assertThrows(IOException.class, () -> storeFileListFile.load());
}
@Test
public void testBigFileLength() throws IOException {
StoreFileList.Builder builder = StoreFileList.newBuilder();
storeFileListFile.update(builder);
FileSystem fs = FileSystem.get(UTIL.getConfiguration());
FileStatus trackerFileStatus = getOnlyTrackerFile(fs);
// write a large length
byte[] content = readAll(fs, trackerFileStatus.getPath());
writeInt(content, 0, 128 * 1024 * 1024);
write(fs, trackerFileStatus.getPath(), content, 0, content.length);
assertThrows(IOException.class, () -> storeFileListFile.load());
}
@Test
public void testChecksumMismatch() throws IOException {
StoreFileList.Builder builder = StoreFileList.newBuilder();
storeFileListFile.update(builder);
FileSystem fs = FileSystem.get(UTIL.getConfiguration());
FileStatus trackerFileStatus = getOnlyTrackerFile(fs);
// flip one byte
byte[] content = readAll(fs, trackerFileStatus.getPath());
content[5] = (byte) ~content[5];
write(fs, trackerFileStatus.getPath(), content, 0, content.length);
assertThrows(IOException.class, () -> storeFileListFile.load());
}
}