mirror of https://github.com/apache/lucene.git
SOLR-11920: Differential file copy for IndexFetcher
This commit is contained in:
parent
759b0d092f
commit
fbdf7fa736
|
@ -33,6 +33,7 @@ grant {
|
|||
permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete";
|
||||
permission java.io.FilePermission "${clover.db.dir}${/}-", "read,execute,write,delete";
|
||||
permission java.io.FilePermission "${tests.linedocsfile}", "read";
|
||||
permission java.nio.file.LinkPermission "hard";
|
||||
|
||||
// all possibilities of accepting/binding connections on localhost with ports >=1024:
|
||||
permission java.net.SocketPermission "localhost:1024-", "accept,listen";
|
||||
|
|
|
@ -62,6 +62,10 @@ Bug Fixes
|
|||
Optimizations
|
||||
----------------------
|
||||
|
||||
* SOLR-11920: IndexFetcher now fetches only those files (from master/leader) that are different. This
|
||||
differential fetching now speeds up recovery times when full index replication is needed, but only
|
||||
a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -60,6 +60,8 @@ import org.apache.lucene.index.IndexCommit;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.FilterDirectory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
@ -360,7 +362,7 @@ public class IndexFetcher {
|
|||
boolean successfulInstall = false;
|
||||
markReplicationStart();
|
||||
Directory tmpIndexDir = null;
|
||||
String tmpIndex;
|
||||
String tmpIndexDirPath;
|
||||
Directory indexDir = null;
|
||||
String indexDirPath;
|
||||
boolean deleteTmpIdxDir = true;
|
||||
|
@ -496,9 +498,9 @@ public class IndexFetcher {
|
|||
|
||||
String timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
|
||||
String tmpIdxDirName = "index." + timestamp;
|
||||
tmpIndex = solrCore.getDataDir() + tmpIdxDirName;
|
||||
tmpIndexDirPath = solrCore.getDataDir() + tmpIdxDirName;
|
||||
|
||||
tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
|
||||
tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
// tmp dir for tlog files
|
||||
if (tlogFilesToDownload != null) {
|
||||
|
@ -511,8 +513,9 @@ public class IndexFetcher {
|
|||
|
||||
try {
|
||||
|
||||
//We will compare all the index files from the master vs the index files on disk to see if there is a mismatch
|
||||
//in the metadata. If there is a mismatch for the same index file then we download the entire index again.
|
||||
// We will compare all the index files from the master vs the index files on disk to see if there is a mismatch
|
||||
// in the metadata. If there is a mismatch for the same index file then we download the entire index
|
||||
// (except when differential copy is applicable) again.
|
||||
if (!isFullCopyNeeded && isIndexStale(indexDir)) {
|
||||
isFullCopyNeeded = true;
|
||||
}
|
||||
|
@ -563,7 +566,8 @@ public class IndexFetcher {
|
|||
LOG.info("Starting download (fullCopy={}) to {}", isFullCopyNeeded, tmpIndexDir);
|
||||
successfulInstall = false;
|
||||
|
||||
long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration);
|
||||
long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir,
|
||||
tmpIndexDir, indexDirPath, tmpIndexDirPath, latestGeneration);
|
||||
if (tlogFilesToDownload != null) {
|
||||
bytesDownloaded += downloadTlogFiles(tmpTlogDir, latestGeneration);
|
||||
reloadCore = true; // reload update log
|
||||
|
@ -983,18 +987,26 @@ public class IndexFetcher {
|
|||
* Download the index files. If a new index is needed, download all the files.
|
||||
*
|
||||
* @param downloadCompleteIndex is it a fresh index copy
|
||||
* @param tmpIndexDir the directory to which files need to be downloadeed to
|
||||
* @param indexDir the indexDir to be merged to
|
||||
* @param tmpIndexDir the directory to which files need to be downloaded to
|
||||
* @param indexDirPath the path of indexDir
|
||||
* @param latestGeneration the version number
|
||||
*
|
||||
* @return number of bytes downloaded
|
||||
*/
|
||||
private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, long latestGeneration)
|
||||
private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir,
|
||||
String indexDirPath, String tmpIndexDirPath, long latestGeneration)
|
||||
throws Exception {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Download files to dir: " + Arrays.asList(indexDir.listAll()));
|
||||
}
|
||||
long bytesDownloaded = 0;
|
||||
long bytesSkippedCopying = 0;
|
||||
boolean doDifferentialCopy = (indexDir instanceof FSDirectory ||
|
||||
(indexDir instanceof FilterDirectory && FilterDirectory.unwrap(indexDir) instanceof FSDirectory))
|
||||
&& (tmpIndexDir instanceof FSDirectory ||
|
||||
(tmpIndexDir instanceof FilterDirectory && FilterDirectory.unwrap(tmpIndexDir) instanceof FSDirectory));
|
||||
|
||||
for (Map<String,Object> file : filesToDownload) {
|
||||
String filename = (String) file.get(NAME);
|
||||
long size = (Long) file.get(SIZE);
|
||||
|
@ -1002,17 +1014,27 @@ public class IndexFetcher {
|
|||
boolean alwaysDownload = filesToAlwaysDownloadIfNoChecksums(filename, size, compareResult);
|
||||
LOG.debug("Downloading file={} size={} checksum={} alwaysDownload={}", filename, size, file.get(CHECKSUM), alwaysDownload);
|
||||
if (!compareResult.equal || downloadCompleteIndex || alwaysDownload) {
|
||||
dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file,
|
||||
(String) file.get(NAME), FILE, latestGeneration);
|
||||
currentFile = file;
|
||||
dirFileFetcher.fetchFile();
|
||||
bytesDownloaded += dirFileFetcher.getBytesDownloaded();
|
||||
if (downloadCompleteIndex && doDifferentialCopy && compareResult.equal && compareResult.checkSummed) {
|
||||
File localFile = new File(indexDirPath, filename);
|
||||
LOG.info("Don't need to download this file. Local file's path is: {}, checksum is: {}",
|
||||
localFile.getAbsolutePath(), file.get(CHECKSUM));
|
||||
// A hard link here should survive the eventual directory move, and should be more space efficient as
|
||||
// compared to a file copy. TODO: Maybe we could do a move safely here?
|
||||
Files.createLink(new File(tmpIndexDirPath, filename).toPath(), localFile.toPath());
|
||||
bytesSkippedCopying += localFile.length();
|
||||
} else {
|
||||
dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file,
|
||||
(String) file.get(NAME), FILE, latestGeneration);
|
||||
currentFile = file;
|
||||
dirFileFetcher.fetchFile();
|
||||
bytesDownloaded += dirFileFetcher.getBytesDownloaded();
|
||||
}
|
||||
filesDownloaded.add(new HashMap<>(file));
|
||||
} else {
|
||||
LOG.info("Skipping download for " + file.get(NAME)
|
||||
+ " because it already exists");
|
||||
LOG.info("Skipping download for {} because it already exists", file.get(NAME));
|
||||
}
|
||||
}
|
||||
LOG.info("Bytes downloaded: {}, Bytes skipped downloading: {}", bytesDownloaded, bytesSkippedCopying);
|
||||
return bytesDownloaded;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue