HDFS-9533. seen_txid in the shared edits directory is modified during bootstrapping. Contributed by Kihwal Lee.
(cherry picked from commit 5cb1e0118b
)
Conflicts:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
This commit is contained in:
parent
6d2914a697
commit
3c0ae5914c
|
@ -1677,6 +1677,9 @@ Release 2.7.3 - UNRELEASED
|
||||||
HDFS-9516. Truncate file fails with data dirs on multiple disks.
|
HDFS-9516. Truncate file fails with data dirs on multiple disks.
|
||||||
(Plamen Jeliazkov via shv)
|
(Plamen Jeliazkov via shv)
|
||||||
|
|
||||||
|
HDFS-9533. seen_txid in the shared edits directory is modified during
|
||||||
|
bootstrapping (kihwal)
|
||||||
|
|
||||||
Release 2.7.2 - UNRELEASED
|
Release 2.7.2 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -482,8 +482,24 @@ public class NNStorage extends Storage implements Closeable,
|
||||||
* @param txid the txid that has been reached
|
* @param txid the txid that has been reached
|
||||||
*/
|
*/
|
||||||
public void writeTransactionIdFileToStorage(long txid) {
|
public void writeTransactionIdFileToStorage(long txid) {
|
||||||
|
writeTransactionIdFileToStorage(txid, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a small file in all available storage directories that
|
||||||
|
* indicates that the namespace has reached some given transaction ID.
|
||||||
|
*
|
||||||
|
* This is used when the image is loaded to avoid accidental rollbacks
|
||||||
|
* in the case where an edit log is fully deleted but there is no
|
||||||
|
* checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
|
||||||
|
* @param txid the txid that has been reached
|
||||||
|
* @param type the type of directory
|
||||||
|
*/
|
||||||
|
public void writeTransactionIdFileToStorage(long txid,
|
||||||
|
NameNodeDirType type) {
|
||||||
// Write txid marker in all storage directories
|
// Write txid marker in all storage directories
|
||||||
for (StorageDirectory sd : storageDirs) {
|
for (Iterator<StorageDirectory> it = dirIterator(type); it.hasNext();) {
|
||||||
|
StorageDirectory sd = it.next();
|
||||||
try {
|
try {
|
||||||
writeTransactionIdFile(sd, txid);
|
writeTransactionIdFile(sd, txid);
|
||||||
} catch(IOException e) {
|
} catch(IOException e) {
|
||||||
|
|
|
@ -52,6 +52,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSImage;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNUpgradeUtil;
|
import org.apache.hadoop.hdfs.server.namenode.NNUpgradeUtil;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
|
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
|
||||||
|
@ -313,13 +314,14 @@ public class BootstrapStandby implements Tool, Configurable {
|
||||||
return ERR_CODE_LOGS_UNAVAILABLE;
|
return ERR_CODE_LOGS_UNAVAILABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
image.getStorage().writeTransactionIdFileToStorage(curTxId);
|
|
||||||
|
|
||||||
// Download that checkpoint into our storage directories.
|
// Download that checkpoint into our storage directories.
|
||||||
MD5Hash hash = TransferFsImage.downloadImageToStorage(
|
MD5Hash hash = TransferFsImage.downloadImageToStorage(
|
||||||
otherHttpAddr, imageTxId, storage, true, true);
|
otherHttpAddr, imageTxId, storage, true, true);
|
||||||
image.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE, imageTxId,
|
image.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE, imageTxId,
|
||||||
hash);
|
hash);
|
||||||
|
|
||||||
|
// Write seen_txid to the formatted image directories.
|
||||||
|
storage.writeTransactionIdFileToStorage(imageTxId, NameNodeDirType.IMAGE);
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
throw ioe;
|
throw ioe;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -568,4 +568,11 @@ public abstract class FSImageTestUtil {
|
||||||
FSImageTestUtil.assertParallelFilesAreIdentical(curDirs,
|
FSImageTestUtil.assertParallelFilesAreIdentical(curDirs,
|
||||||
ignoredFiles);
|
ignoredFiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static long getStorageTxId(NameNode node, URI storageUri)
|
||||||
|
throws IOException {
|
||||||
|
StorageDirectory sDir = getFSImage(node).getStorage().
|
||||||
|
getStorageDirectory(storageUri);
|
||||||
|
return NNStorage.readTransactionIdFile(sDir);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -135,6 +135,13 @@ public class TestBootstrapStandby {
|
||||||
.getFSImage().getMostRecentCheckpointTxId();
|
.getFSImage().getMostRecentCheckpointTxId();
|
||||||
assertEquals(6, expectedCheckpointTxId);
|
assertEquals(6, expectedCheckpointTxId);
|
||||||
|
|
||||||
|
// advance the current txid
|
||||||
|
cluster.getFileSystem(0).create(new Path("/test_txid"), (short)1).close();
|
||||||
|
|
||||||
|
// obtain the content of seen_txid
|
||||||
|
URI editsUri = cluster.getSharedEditsDir(0, 1);
|
||||||
|
long seen_txid_shared = FSImageTestUtil.getStorageTxId(nn0, editsUri);
|
||||||
|
|
||||||
int rc = BootstrapStandby.run(
|
int rc = BootstrapStandby.run(
|
||||||
new String[]{"-force"},
|
new String[]{"-force"},
|
||||||
cluster.getConfiguration(1));
|
cluster.getConfiguration(1));
|
||||||
|
@ -145,6 +152,10 @@ public class TestBootstrapStandby {
|
||||||
ImmutableList.of((int)expectedCheckpointTxId));
|
ImmutableList.of((int)expectedCheckpointTxId));
|
||||||
FSImageTestUtil.assertNNFilesMatch(cluster);
|
FSImageTestUtil.assertNNFilesMatch(cluster);
|
||||||
|
|
||||||
|
// Make sure the seen_txid was not modified by the standby
|
||||||
|
assertEquals(seen_txid_shared,
|
||||||
|
FSImageTestUtil.getStorageTxId(nn0, editsUri));
|
||||||
|
|
||||||
// We should now be able to start the standby successfully.
|
// We should now be able to start the standby successfully.
|
||||||
cluster.restartNameNode(1);
|
cluster.restartNameNode(1);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue