SOLR-1068 -- Use a single threaded executor to fsync replicated index and configuration files before moving them

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@761577 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2009-04-03 09:08:37 +00:00
parent c7c9ef6a63
commit 6639925275
3 changed files with 73 additions and 37 deletions

View File

@ -369,7 +369,7 @@ Other Changes
21. Upgraded to Lucene 2.9-dev r752164 (shalin)
22. SOLR-1068: Use fsync on replicated index and configuration files (yonik, shalin)
22. SOLR-1068: Use fsync on replicated index and configuration files (yonik, Noble Paul, shalin)
23. SOLR-952: Cleanup duplicated code in deprecated HighlightingUtils (hossman)

View File

@ -20,6 +20,7 @@ package org.apache.solr.common.util;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.FileNotFoundException;
/**
* @version $Id$
@ -50,6 +51,9 @@ public class FileUtils {
* @throws IOException if the file could not be synced
*/
public static void sync(File fullFile) throws IOException {
if (fullFile == null || !fullFile.exists())
throw new FileNotFoundException("File does not exist " + fullFile);
boolean success = false;
int retryCount = 0;
IOException exc = null;

View File

@ -38,9 +38,7 @@ import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -87,6 +85,8 @@ public class SnapPuller {
private volatile FileFetcher fileFetcher;
private volatile ExecutorService fsyncService;
private volatile boolean stop = false;
private boolean useInternal = false;
@ -247,6 +247,8 @@ public class SnapPuller {
fetchFileList(latestVersion);
LOG.info("Number of files in latest snapshot in master: " + filesToDownload.size());
// Create the sync service
fsyncService = Executors.newSingleThreadExecutor();
// use a synchronized list because the list is read by other threads (to show details)
filesDownloaded = Collections.synchronizedList(new ArrayList<Map<String, Object>>());
// if the generateion of master is older than that of the slave , it means they are not compatible to be copied
@ -261,7 +263,7 @@ public class SnapPuller {
downloadIndexFiles(isSnapNeeded, tmpIndexDir, latestVersion);
LOG.info("Total time taken for download : " + ((System.currentTimeMillis() - replicationStartTime) / 1000) + " secs");
Collection<Map<String, Object>> modifiedConfFiles = getModifiedConfFiles(confFilesToDownload);
if (modifiedConfFiles != null && !modifiedConfFiles.isEmpty()) {
if (!modifiedConfFiles.isEmpty()) {
downloadConfFiles(confFilesToDownload, latestVersion);
if (isSnapNeeded) {
modifyIndexProps(tmpIndexDir.getName());
@ -274,6 +276,7 @@ public class SnapPuller {
reloadCore();
}
} else {
terminateAndWaitFsyncService();
LOG.info("Conf files are not downloaded or are in sync");
if (isSnapNeeded) {
modifyIndexProps(tmpIndexDir.getName());
@ -302,10 +305,29 @@ public class SnapPuller {
filesToDownload = filesDownloaded = confFilesDownloaded = confFilesToDownload = null;
replicationStartTime = 0;
fileFetcher = null;
fsyncService = null;
stop = false;
fsyncException = null;
}
}
private volatile Exception fsyncException;
/**
* terminate the fsync service and wait for all the tasks to complete. If it is already terminated
*
* @throws Exception
*/
private void terminateAndWaitFsyncService() throws Exception {
if (fsyncService.isTerminated()) return;
fsyncService.shutdown();
// give a long wait say 1 hr
fsyncService.awaitTermination(3600, TimeUnit.SECONDS);
// if any fsync failed, throw that exception back
Exception fsyncExceptionCopy = fsyncException;
if (fsyncExceptionCopy != null) throw fsyncExceptionCopy;
}
/**
* Helper method to record the last replication's details so that we can show them on the statistics page across
* restarts.
@ -394,19 +416,26 @@ public class SnapPuller {
LOG.info("Starting download of configuration files from master: " + confFilesToDownload);
confFilesDownloaded = Collections.synchronizedList(new ArrayList<Map<String, Object>>());
File tmpconfDir = new File(solrCore.getResourceLoader().getConfigDir(), "conf." + getDateAsStr(new Date()));
boolean status = tmpconfDir.mkdirs();
if (!status) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Failed to create temporary config folder: " + tmpconfDir.getName());
try {
boolean status = tmpconfDir.mkdirs();
if (!status) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Failed to create temporary config folder: " + tmpconfDir.getName());
}
for (Map<String, Object> file : confFilesToDownload) {
String saveAs = (String) (file.get(ALIAS) == null ? file.get(NAME) : file.get(ALIAS));
fileFetcher = new FileFetcher(tmpconfDir, file, saveAs, true, latestVersion);
currentFile = file;
fileFetcher.fetchFile();
confFilesDownloaded.add(new HashMap<String, Object>(file));
}
// this is called before copying the files to the original conf dir
// so that if there is an exception avoid corrupting the original files.
terminateAndWaitFsyncService();
copyTmpConfFiles2Conf(tmpconfDir);
} finally {
delTree(tmpconfDir);
}
for (Map<String, Object> file : confFilesToDownload) {
String saveAs = (String) (file.get(ALIAS) == null ? file.get(NAME) : file.get(ALIAS));
fileFetcher = new FileFetcher(tmpconfDir, file, saveAs, true, latestVersion);
currentFile = file;
fileFetcher.fetchFile();
confFilesDownloaded.add(new HashMap<String, Object>(file));
}
copyTmpConfFiles2Conf(tmpconfDir);
}
/**
@ -491,13 +520,11 @@ public class SnapPuller {
continue;
}
if (!copyAFile(snapDir, indexDir, fname, copiedfiles)) return false;
FileUtils.sync(new File(indexDir, fname));
copiedfiles.add(fname);
}
//copy the segments file last
if (segmentsFile != null) {
if (!copyAFile(snapDir, indexDir, segmentsFile, copiedfiles)) return false;
FileUtils.sync(new File(indexDir, segmentsFile));
}
return true;
}
@ -507,27 +534,22 @@ public class SnapPuller {
*/
private void copyTmpConfFiles2Conf(File tmpconfDir) throws IOException {
File confDir = new File(solrCore.getResourceLoader().getConfigDir());
try {
for (File file : tmpconfDir.listFiles()) {
File oldFile = new File(confDir, file.getName());
if (oldFile.exists()) {
File backupFile = new File(confDir, oldFile.getName() + "." + getDateAsStr(new Date(oldFile.lastModified())));
boolean status = oldFile.renameTo(backupFile);
if (!status) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unable to rename: " + oldFile + " to: " + backupFile);
}
}
boolean status = file.renameTo(oldFile);
if (status) {
FileUtils.sync(oldFile);
} else {
for (File file : tmpconfDir.listFiles()) {
File oldFile = new File(confDir, file.getName());
if (oldFile.exists()) {
File backupFile = new File(confDir, oldFile.getName() + "." + getDateAsStr(new Date(oldFile.lastModified())));
boolean status = oldFile.renameTo(backupFile);
if (!status) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unable to rename: " + file + " to: " + oldFile);
"Unable to rename: " + oldFile + " to: " + backupFile);
}
}
} finally {
delTree(tmpconfDir);
boolean status = file.renameTo(oldFile);
if (status) {
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unable to rename: " + file + " to: " + oldFile);
}
}
}
@ -786,6 +808,16 @@ public class SnapPuller {
}
} finally {
cleanup();
//if cleanup suceeds . The file is downloaded fully. do an fsync
fsyncService.submit(new Runnable(){
public void run() {
try {
FileUtils.sync(file);
} catch (IOException e) {
fsyncException = e;
}
}
});
}
}