SOLR-11920: Differential file copy for IndexFetcher

This commit is contained in:
Ishan Chattopadhyaya 2018-03-15 19:31:15 +05:30
parent 759b0d092f
commit fbdf7fa736
3 changed files with 42 additions and 15 deletions

View File

@ -33,6 +33,7 @@ grant {
permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete";
permission java.io.FilePermission "${clover.db.dir}${/}-", "read,execute,write,delete";
permission java.io.FilePermission "${tests.linedocsfile}", "read";
permission java.nio.file.LinkPermission "hard";
// all possibilities of accepting/binding connections on localhost with ports >=1024:
permission java.net.SocketPermission "localhost:1024-", "accept,listen";

View File

@ -62,6 +62,10 @@ Bug Fixes
Optimizations
----------------------
* SOLR-11920: IndexFetcher now fetches only those files (from master/leader) that are different. This
differential fetching now speeds up recovery times when full index replication is needed, but only
a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher)
Other Changes
----------------------

View File

@ -60,6 +60,8 @@ import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@ -360,7 +362,7 @@ public class IndexFetcher {
boolean successfulInstall = false;
markReplicationStart();
Directory tmpIndexDir = null;
String tmpIndex;
String tmpIndexDirPath;
Directory indexDir = null;
String indexDirPath;
boolean deleteTmpIdxDir = true;
@ -496,9 +498,9 @@ public class IndexFetcher {
String timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
String tmpIdxDirName = "index." + timestamp;
tmpIndex = solrCore.getDataDir() + tmpIdxDirName;
tmpIndexDirPath = solrCore.getDataDir() + tmpIdxDirName;
tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
// tmp dir for tlog files
if (tlogFilesToDownload != null) {
@ -511,8 +513,9 @@ public class IndexFetcher {
try {
//We will compare all the index files from the master vs the index files on disk to see if there is a mismatch
//in the metadata. If there is a mismatch for the same index file then we download the entire index again.
// We will compare all the index files from the master vs the index files on disk to see if there is a mismatch
// in the metadata. If there is a mismatch for the same index file then we download the entire index
// (except when differential copy is applicable) again.
if (!isFullCopyNeeded && isIndexStale(indexDir)) {
isFullCopyNeeded = true;
}
@ -563,7 +566,8 @@ public class IndexFetcher {
LOG.info("Starting download (fullCopy={}) to {}", isFullCopyNeeded, tmpIndexDir);
successfulInstall = false;
long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration);
long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir,
tmpIndexDir, indexDirPath, tmpIndexDirPath, latestGeneration);
if (tlogFilesToDownload != null) {
bytesDownloaded += downloadTlogFiles(tmpTlogDir, latestGeneration);
reloadCore = true; // reload update log
@ -983,18 +987,26 @@ public class IndexFetcher {
* Download the index files. If a new index is needed, download all the files.
*
* @param downloadCompleteIndex is it a fresh index copy
* @param tmpIndexDir the directory to which files need to be downloadeed to
* @param indexDir the indexDir to be merged to
* @param tmpIndexDir the directory to which files need to be downloaded to
* @param indexDirPath the path of indexDir
* @param latestGeneration the version number
*
* @return number of bytes downloaded
*/
private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, long latestGeneration)
private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir,
String indexDirPath, String tmpIndexDirPath, long latestGeneration)
throws Exception {
if (LOG.isDebugEnabled()) {
LOG.debug("Download files to dir: " + Arrays.asList(indexDir.listAll()));
}
long bytesDownloaded = 0;
long bytesSkippedCopying = 0;
boolean doDifferentialCopy = (indexDir instanceof FSDirectory ||
(indexDir instanceof FilterDirectory && FilterDirectory.unwrap(indexDir) instanceof FSDirectory))
&& (tmpIndexDir instanceof FSDirectory ||
(tmpIndexDir instanceof FilterDirectory && FilterDirectory.unwrap(tmpIndexDir) instanceof FSDirectory));
for (Map<String,Object> file : filesToDownload) {
String filename = (String) file.get(NAME);
long size = (Long) file.get(SIZE);
@ -1002,17 +1014,27 @@ public class IndexFetcher {
boolean alwaysDownload = filesToAlwaysDownloadIfNoChecksums(filename, size, compareResult);
LOG.debug("Downloading file={} size={} checksum={} alwaysDownload={}", filename, size, file.get(CHECKSUM), alwaysDownload);
if (!compareResult.equal || downloadCompleteIndex || alwaysDownload) {
if (downloadCompleteIndex && doDifferentialCopy && compareResult.equal && compareResult.checkSummed) {
File localFile = new File(indexDirPath, filename);
LOG.info("Don't need to download this file. Local file's path is: {}, checksum is: {}",
localFile.getAbsolutePath(), file.get(CHECKSUM));
// A hard link here should survive the eventual directory move, and should be more space efficient as
// compared to a file copy. TODO: Maybe we could do a move safely here?
Files.createLink(new File(tmpIndexDirPath, filename).toPath(), localFile.toPath());
bytesSkippedCopying += localFile.length();
} else {
dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file,
(String) file.get(NAME), FILE, latestGeneration);
currentFile = file;
dirFileFetcher.fetchFile();
bytesDownloaded += dirFileFetcher.getBytesDownloaded();
}
filesDownloaded.add(new HashMap<>(file));
} else {
LOG.info("Skipping download for " + file.get(NAME)
+ " because it already exists");
LOG.info("Skipping download for {} because it already exists", file.get(NAME));
}
}
LOG.info("Bytes downloaded: {}, Bytes skipped downloading: {}", bytesDownloaded, bytesSkippedCopying);
return bytesDownloaded;
}