From fbdf7fa73684230d4865de8ef8ec524e66f4e838 Mon Sep 17 00:00:00 2001 From: Ishan Chattopadhyaya Date: Thu, 15 Mar 2018 19:31:15 +0530 Subject: [PATCH] SOLR-11920: Differential file copy for IndexFetcher --- lucene/tools/junit4/solr-tests.policy | 1 + solr/CHANGES.txt | 4 ++ .../org/apache/solr/handler/IndexFetcher.java | 52 +++++++++++++------ 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/lucene/tools/junit4/solr-tests.policy b/lucene/tools/junit4/solr-tests.policy index 9c8a39f9a1f..1c46a78eaa9 100644 --- a/lucene/tools/junit4/solr-tests.policy +++ b/lucene/tools/junit4/solr-tests.policy @@ -33,6 +33,7 @@ grant { permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete"; permission java.io.FilePermission "${clover.db.dir}${/}-", "read,execute,write,delete"; permission java.io.FilePermission "${tests.linedocsfile}", "read"; + permission java.nio.file.LinkPermission "hard"; // all possibilities of accepting/binding connections on localhost with ports >=1024: permission java.net.SocketPermission "localhost:1024-", "accept,listen"; diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 853859c3233..31f441654e2 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -62,6 +62,10 @@ Bug Fixes Optimizations ---------------------- +* SOLR-11920: IndexFetcher now fetches only those files (from master/leader) that are different. This + differential fetching now speeds up recovery times when full index replication is needed, but only + a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java index 7837bdccca7..2476a81f0af 100644 --- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java +++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java @@ -60,6 +60,8 @@ import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -360,7 +362,7 @@ public class IndexFetcher { boolean successfulInstall = false; markReplicationStart(); Directory tmpIndexDir = null; - String tmpIndex; + String tmpIndexDirPath; Directory indexDir = null; String indexDirPath; boolean deleteTmpIdxDir = true; @@ -496,9 +498,9 @@ public class IndexFetcher { String timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date()); String tmpIdxDirName = "index." + timestamp; - tmpIndex = solrCore.getDataDir() + tmpIdxDirName; + tmpIndexDirPath = solrCore.getDataDir() + tmpIdxDirName; - tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType); + tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType); // tmp dir for tlog files if (tlogFilesToDownload != null) { @@ -511,8 +513,9 @@ public class IndexFetcher { try { - //We will compare all the index files from the master vs the index files on disk to see if there is a mismatch - //in the metadata. If there is a mismatch for the same index file then we download the entire index again. + // We will compare all the index files from the master vs the index files on disk to see if there is a mismatch + // in the metadata. If there is a mismatch for the same index file then we download the entire index + // (except when differential copy is applicable) again. if (!isFullCopyNeeded && isIndexStale(indexDir)) { isFullCopyNeeded = true; } @@ -563,7 +566,8 @@ public class IndexFetcher { LOG.info("Starting download (fullCopy={}) to {}", isFullCopyNeeded, tmpIndexDir); successfulInstall = false; - long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration); + long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir, + tmpIndexDir, indexDirPath, tmpIndexDirPath, latestGeneration); if (tlogFilesToDownload != null) { bytesDownloaded += downloadTlogFiles(tmpTlogDir, latestGeneration); reloadCore = true; // reload update log @@ -983,18 +987,26 @@ public class IndexFetcher { * Download the index files. If a new index is needed, download all the files. * * @param downloadCompleteIndex is it a fresh index copy - * @param tmpIndexDir the directory to which files need to be downloadeed to * @param indexDir the indexDir to be merged to + * @param tmpIndexDir the directory to which files need to be downloaded to + * @param indexDirPath the path of indexDir * @param latestGeneration the version number * * @return number of bytes downloaded */ - private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, long latestGeneration) + private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, + String indexDirPath, String tmpIndexDirPath, long latestGeneration) throws Exception { if (LOG.isDebugEnabled()) { LOG.debug("Download files to dir: " + Arrays.asList(indexDir.listAll())); } long bytesDownloaded = 0; + long bytesSkippedCopying = 0; + boolean doDifferentialCopy = (indexDir instanceof FSDirectory || + (indexDir instanceof FilterDirectory && FilterDirectory.unwrap(indexDir) instanceof FSDirectory)) + && (tmpIndexDir instanceof FSDirectory || + (tmpIndexDir instanceof FilterDirectory && FilterDirectory.unwrap(tmpIndexDir) instanceof FSDirectory)); + for (Map file : filesToDownload) { String filename = (String) file.get(NAME); long size = (Long) file.get(SIZE); @@ -1002,17 +1014,27 @@ public class IndexFetcher { boolean alwaysDownload = filesToAlwaysDownloadIfNoChecksums(filename, size, compareResult); LOG.debug("Downloading file={} size={} checksum={} alwaysDownload={}", filename, size, file.get(CHECKSUM), alwaysDownload); if (!compareResult.equal || downloadCompleteIndex || alwaysDownload) { - dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file, - (String) file.get(NAME), FILE, latestGeneration); - currentFile = file; - dirFileFetcher.fetchFile(); - bytesDownloaded += dirFileFetcher.getBytesDownloaded(); + if (downloadCompleteIndex && doDifferentialCopy && compareResult.equal && compareResult.checkSummed) { + File localFile = new File(indexDirPath, filename); + LOG.info("Don't need to download this file. Local file's path is: {}, checksum is: {}", + localFile.getAbsolutePath(), file.get(CHECKSUM)); + // A hard link here should survive the eventual directory move, and should be more space efficient as + // compared to a file copy. TODO: Maybe we could do a move safely here? + Files.createLink(new File(tmpIndexDirPath, filename).toPath(), localFile.toPath()); + bytesSkippedCopying += localFile.length(); + } else { + dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file, + (String) file.get(NAME), FILE, latestGeneration); + currentFile = file; + dirFileFetcher.fetchFile(); + bytesDownloaded += dirFileFetcher.getBytesDownloaded(); + } filesDownloaded.add(new HashMap<>(file)); } else { - LOG.info("Skipping download for " + file.get(NAME) - + " because it already exists"); + LOG.info("Skipping download for {} because it already exists", file.get(NAME)); } } + LOG.info("Bytes downloaded: {}, Bytes skipped downloading: {}", bytesDownloaded, bytesSkippedCopying); return bytesDownloaded; }