SOLR-9859: replication.properties cannot be updated after being written and neither eplication.properties or ndex.properties are durable in the face of a crash.

This commit is contained in:
markrmiller 2016-12-28 17:40:03 -05:00
parent dc6dcdda80
commit 96ed221fb6
6 changed files with 69 additions and 12 deletions

View File

@ -287,6 +287,9 @@ Bug Fixes
* SOLR-9699,SOLR-4668: fix exception from core status in parallel with core reload (Mikhail Khludnev)
* SOLR-9859: replication.properties cannot be updated after being written and neither replication.properties or
index.properties are durable in the face of a crash. (Pushkar Raste, Chris de Kok, Cao Manh Dat, Mark Miller)
Other Changes
----------------------

View File

@ -19,6 +19,7 @@ package org.apache.solr.core;
import java.io.Closeable;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
@ -184,6 +185,20 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
fromDir.deleteFile(fileName);
}
// sub classes perform an atomic rename if possible, otherwise fall back to delete + rename
// this is important to support for index roll over durability after crashes
public void renameWithOverwrite(Directory dir, String fileName, String toName) throws IOException {
try {
dir.deleteFile(toName);
} catch (FileNotFoundException e) {
} catch (Exception e) {
log.error("Exception deleting file", e);
}
dir.rename(fileName, toName);
}
/**
* Returns the Directory for a given path, using the specified rawLockType.
* Will return the same Directory instance for the same path.

View File

@ -29,8 +29,10 @@ import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.security.UserGroupInformation;
@ -568,4 +570,11 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory implements Sol
}
}
}
// perform an atomic rename if possible
public void renameWithOverwrite(Directory dir, String fileName, String toName) throws IOException {
String hdfsDirPath = getPath(dir);
FileContext fileContext = FileContext.getFileContext(getConf());
fileContext.rename(new Path(hdfsDirPath + "/" + fileName), new Path(hdfsDirPath + "/" + toName), Options.Rename.OVERWRITE);
}
}

View File

@ -18,6 +18,11 @@ package org.apache.solr.core;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.nio.file.AtomicMoveNotSupportedException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.Locale;
import org.apache.commons.io.FileUtils;
@ -151,5 +156,22 @@ public class StandardDirectoryFactory extends CachingDirectoryFactory {
return baseDir;
}
// perform an atomic rename if possible
public void renameWithOverwrite(Directory dir, String fileName, String toName) throws IOException {
Directory baseDir = getBaseDir(dir);
if (baseDir instanceof FSDirectory) {
Path path = ((FSDirectory) baseDir).getDirectory().toAbsolutePath();
try {
Files.move(FileSystems.getDefault().getPath(path.toString(), fileName),
FileSystems.getDefault().getPath(path.toString(), toName), StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING);
} catch (AtomicMoveNotSupportedException e) {
Files.move(FileSystems.getDefault().getPath(path.toString(), fileName),
FileSystems.getDefault().getPath(path.toString(), toName), StandardCopyOption.REPLACE_EXISTING);
}
} else {
super.renameWithOverwrite(dir, fileName, toName);
}
}
}

View File

@ -685,15 +685,19 @@ public class IndexFetcher {
sb = readToStringBuilder(replicationTime, props.getProperty(REPLICATION_FAILED_AT_LIST));
props.setProperty(REPLICATION_FAILED_AT_LIST, sb.toString());
}
final IndexOutput out = dir.createOutput(REPLICATION_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE);
String tmpFileName = REPLICATION_PROPERTIES + "." + System.nanoTime();
final IndexOutput out = dir.createOutput(tmpFileName, DirectoryFactory.IOCONTEXT_NO_CACHE);
Writer outFile = new OutputStreamWriter(new PropertiesOutputStream(out), StandardCharsets.UTF_8);
try {
props.store(outFile, "Replication details");
dir.sync(Collections.singleton(REPLICATION_PROPERTIES));
dir.sync(Collections.singleton(tmpFileName));
} finally {
IOUtils.closeQuietly(outFile);
}
solrCore.getDirectoryFactory().renameWithOverwrite(dir, tmpFileName, REPLICATION_PROPERTIES);
} catch (Exception e) {
LOG.warn("Exception while updating statistics", e);
} finally {
@ -1206,24 +1210,23 @@ public class IndexFetcher {
IOUtils.closeQuietly(is);
}
}
try {
dir.deleteFile(IndexFetcher.INDEX_PROPERTIES);
} catch (IOException e) {
// no problem
}
final IndexOutput out = dir.createOutput(IndexFetcher.INDEX_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE);
String tmpFileName = IndexFetcher.INDEX_PROPERTIES + "." + System.nanoTime();
final IndexOutput out = dir.createOutput(tmpFileName, DirectoryFactory.IOCONTEXT_NO_CACHE);
p.put("index", tmpIdxDirName);
Writer os = null;
try {
os = new OutputStreamWriter(new PropertiesOutputStream(out), StandardCharsets.UTF_8);
p.store(os, IndexFetcher.INDEX_PROPERTIES);
dir.sync(Collections.singleton(INDEX_PROPERTIES));
p.store(os, tmpFileName);
dir.sync(Collections.singleton(tmpFileName));
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unable to write " + IndexFetcher.INDEX_PROPERTIES, e);
} finally {
IOUtils.closeQuietly(os);
}
solrCore.getDirectoryFactory().renameWithOverwrite(dir, tmpFileName, IndexFetcher.INDEX_PROPERTIES);
return true;
} catch (IOException e1) {

View File

@ -35,6 +35,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
@ -304,7 +305,11 @@ public class TestReplicationHandler extends SolrTestCaseJ4 {
// check details on the slave a couple of times before & after fetching
for (int i = 0; i < 3; i++) {
NamedList<Object> details = getDetails(slaveClient);
List replicatedAtCount = (List) ((NamedList) details.get("slave")).get("indexReplicatedAtList");
if (i > 0) {
assertEquals(i, replicatedAtCount.size());
}
assertEquals("slave isMaster?",
"false", details.get("isMaster"));
assertEquals("slave isSlave?",