mirror of https://github.com/apache/lucene.git
SOLR-6920, SOLR-6640: A replicated index can end up corrupted when small files end up with the same file name and size.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1657969 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bd3753e965
commit
dfeca5ca16
|
@ -616,6 +616,9 @@ Bug Fixes
|
|||
* olap.* in AnalyticsComponent
|
||||
(Alexandre Rafalovitch & hossman)
|
||||
|
||||
* SOLR-6920: A replicated index can end up corrupted when small files end up with the same
|
||||
file name and size. (Varun Thacker, Mark Miller)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -33,7 +33,6 @@ import java.util.Collection;
|
|||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
@ -45,10 +44,13 @@ import java.util.zip.Checksum;
|
|||
import java.util.zip.DeflaterOutputStream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.SegmentCommitInfo;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -425,27 +427,49 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
List<Map<String, Object>> result = new ArrayList<>();
|
||||
Directory dir = null;
|
||||
try {
|
||||
// get all the files in the commit
|
||||
// use a set to workaround possible Lucene bug which returns same file
|
||||
// name multiple times
|
||||
Collection<String> files = new HashSet<>(commit.getFileNames());
|
||||
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
try {
|
||||
|
||||
for (String fileName : files) {
|
||||
if (fileName.endsWith(".lock")) continue;
|
||||
SegmentInfos infos = SegmentInfos.readCommit(dir, commit.getSegmentsFileName());
|
||||
for (SegmentCommitInfo commitInfo : infos) {
|
||||
for (String file : commitInfo.files()) {
|
||||
Map<String,Object> fileMeta = new HashMap<>();
|
||||
fileMeta.put(NAME, fileName);
|
||||
fileMeta.put(SIZE, dir.fileLength(fileName));
|
||||
fileMeta.put(NAME, file);
|
||||
fileMeta.put(SIZE, dir.fileLength(file));
|
||||
|
||||
try (final IndexInput in = dir.openInput(file, IOContext.READONCE)) {
|
||||
try {
|
||||
long checksum = CodecUtil.retrieveChecksum(in);
|
||||
fileMeta.put(CHECKSUM, checksum);
|
||||
} catch(Exception e) {
|
||||
LOG.warn("Could not read checksum from index file.", e);
|
||||
}
|
||||
}
|
||||
|
||||
result.add(fileMeta);
|
||||
}
|
||||
} finally {
|
||||
core.getDirectoryFactory().release(dir);
|
||||
}
|
||||
|
||||
// add the segments_N file
|
||||
Map<String,Object> fileMeta = new HashMap<>();
|
||||
fileMeta.put(NAME, infos.getSegmentsFileName());
|
||||
fileMeta.put(SIZE, dir.fileLength(infos.getSegmentsFileName()));
|
||||
if (infos.getId() != null) {
|
||||
try (final IndexInput in = dir.openInput(infos.getSegmentsFileName(), IOContext.READONCE)) {
|
||||
fileMeta.put(CHECKSUM, CodecUtil.retrieveChecksum(in));
|
||||
}
|
||||
}
|
||||
result.add(fileMeta);
|
||||
} catch (IOException e) {
|
||||
rsp.add("status", "unable to get file names for given index generation");
|
||||
rsp.add("exception", e);
|
||||
LOG.error("Unable to get file names for indexCommit generation: " + gen, e);
|
||||
} finally {
|
||||
if (dir != null) {
|
||||
try {
|
||||
core.getDirectoryFactory().release(dir);
|
||||
} catch (IOException e) {
|
||||
SolrException.log(LOG, "Could not release directory after fetching file list", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
rsp.add(CMD_GET_FILE_LIST, result);
|
||||
if (confFileNameAlias.size() < 1 || core.getCoreDescriptor().getCoreContainer().isZooKeeperAware())
|
||||
|
|
|
@ -16,43 +16,25 @@
|
|||
*/
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.SegmentCommitInfo;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.HttpClientUtil;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.FastInputStream;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.ReplicationHandler.FileInfo;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.update.CommitUpdateCommand;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.solr.util.FileUtils;
|
||||
import org.apache.solr.util.PropertiesInputStream;
|
||||
import org.apache.solr.util.PropertiesOutputStream;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import static org.apache.solr.handler.ReplicationHandler.ALIAS;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CHECKSUM;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE_LIST;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_INDEX_VERSION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.COMMAND;
|
||||
import static org.apache.solr.handler.ReplicationHandler.COMPRESSION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CONF_FILES;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CONF_FILE_SHORT;
|
||||
import static org.apache.solr.handler.ReplicationHandler.EXTERNAL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.FILE;
|
||||
import static org.apache.solr.handler.ReplicationHandler.FILE_STREAM;
|
||||
import static org.apache.solr.handler.ReplicationHandler.GENERATION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.INTERNAL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.MASTER_URL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.NAME;
|
||||
import static org.apache.solr.handler.ReplicationHandler.OFFSET;
|
||||
import static org.apache.solr.handler.ReplicationHandler.SIZE;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
@ -94,25 +76,43 @@ import java.util.zip.Adler32;
|
|||
import java.util.zip.Checksum;
|
||||
import java.util.zip.InflaterInputStream;
|
||||
|
||||
import static org.apache.solr.handler.ReplicationHandler.ALIAS;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CHECKSUM;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE_LIST;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_INDEX_VERSION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.COMMAND;
|
||||
import static org.apache.solr.handler.ReplicationHandler.COMPRESSION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CONF_FILES;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CONF_FILE_SHORT;
|
||||
import static org.apache.solr.handler.ReplicationHandler.EXTERNAL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.FILE;
|
||||
import static org.apache.solr.handler.ReplicationHandler.FILE_STREAM;
|
||||
import static org.apache.solr.handler.ReplicationHandler.GENERATION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.INTERNAL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.MASTER_URL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.NAME;
|
||||
import static org.apache.solr.handler.ReplicationHandler.OFFSET;
|
||||
import static org.apache.solr.handler.ReplicationHandler.SIZE;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.HttpClientUtil;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.FastInputStream;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.ReplicationHandler.FileInfo;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.update.CommitUpdateCommand;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.solr.util.FileUtils;
|
||||
import org.apache.solr.util.PropertiesInputStream;
|
||||
import org.apache.solr.util.PropertiesOutputStream;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* <p/> Provides functionality of downloading changed index files as well as config files and a timer for scheduling fetches from the
|
||||
|
@ -445,8 +445,7 @@ public class SnapPuller {
|
|||
+ isFullCopyNeeded);
|
||||
successfulInstall = false;
|
||||
|
||||
downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir,
|
||||
latestGeneration);
|
||||
downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration);
|
||||
LOG.info("Total time taken for download : "
|
||||
+ ((System.currentTimeMillis() - replicationStartTime) / 1000)
|
||||
+ " secs");
|
||||
|
@ -796,15 +795,17 @@ public class SnapPuller {
|
|||
* @param indexDir the indexDir to be merged to
|
||||
* @param latestGeneration the version number
|
||||
*/
|
||||
private void downloadIndexFiles(boolean downloadCompleteIndex,
|
||||
Directory indexDir, Directory tmpIndexDir, long latestGeneration)
|
||||
private void downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, long latestGeneration)
|
||||
throws Exception {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Download files to dir: " + Arrays.asList(indexDir.listAll()));
|
||||
}
|
||||
for (Map<String,Object> file : filesToDownload) {
|
||||
if (!slowFileExists(indexDir, (String) file.get(NAME))
|
||||
|| downloadCompleteIndex) {
|
||||
String filename = (String) file.get(NAME);
|
||||
CompareResult compareResult = compareFile(indexDir, filename, (Long) file.get(SIZE), (Long) file.get(CHECKSUM));
|
||||
if (!compareResult.equal || downloadCompleteIndex
|
||||
|| (!compareResult.checkSummed && (filename.endsWith(".si") || filename.endsWith(".liv")
|
||||
|| filename.startsWith("segments_")))) {
|
||||
dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file,
|
||||
(String) file.get(NAME), false, latestGeneration);
|
||||
currentFile = file;
|
||||
|
@ -817,6 +818,54 @@ public class SnapPuller {
|
|||
}
|
||||
}
|
||||
|
||||
static class CompareResult {
|
||||
boolean equal = false;
|
||||
boolean checkSummed = false;
|
||||
}
|
||||
|
||||
private CompareResult compareFile(Directory indexDir, String filename, Long backupIndexFileLen, Long backupIndexFileChecksum) {
|
||||
CompareResult compareResult = new CompareResult();
|
||||
try {
|
||||
try (final IndexInput indexInput = indexDir.openInput(filename, IOContext.READONCE)) {
|
||||
long indexFileLen = indexInput.length();
|
||||
long indexFileChecksum = 0;
|
||||
|
||||
try {
|
||||
indexFileChecksum = CodecUtil.retrieveChecksum(indexInput);
|
||||
compareResult.checkSummed = true;
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Could not retrieve checksum from file.", e);
|
||||
|
||||
if (indexFileLen == backupIndexFileLen) {
|
||||
compareResult.equal = true;
|
||||
return compareResult;
|
||||
} else {
|
||||
LOG.warn("File {} did not match. expected checksum is {} and actual is checksum {}. " +
|
||||
"expected length is {} and actual length is {}", filename, backupIndexFileChecksum, indexFileChecksum,
|
||||
backupIndexFileLen, indexFileLen);
|
||||
compareResult.equal = false;
|
||||
return compareResult;
|
||||
}
|
||||
}
|
||||
|
||||
if (indexFileLen == backupIndexFileLen && indexFileChecksum == backupIndexFileChecksum) {
|
||||
compareResult.equal = true;
|
||||
return compareResult;
|
||||
} else {
|
||||
LOG.warn("File {} did not match. expected checksum is {} and actual is checksum {}. " +
|
||||
"expected length is {} and actual length is {}", filename, backupIndexFileChecksum, indexFileChecksum,
|
||||
backupIndexFileLen, indexFileLen);
|
||||
compareResult.equal = false;
|
||||
return compareResult;
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.error("Could not read file " + filename + ". Downloading it again", e);
|
||||
compareResult.equal = false;
|
||||
return compareResult;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true if the file exists (can be opened), false
|
||||
* if it cannot be opened, and (unlike Java's
|
||||
* File.exists) throws IOException if there's some
|
||||
|
@ -839,14 +888,23 @@ public class SnapPuller {
|
|||
*/
|
||||
private boolean isIndexStale(Directory dir) throws IOException {
|
||||
for (Map<String, Object> file : filesToDownload) {
|
||||
if (slowFileExists(dir, (String) file.get(NAME))
|
||||
&& dir.fileLength((String) file.get(NAME)) != (Long) file.get(SIZE)) {
|
||||
LOG.warn("File " + file.get(NAME) + " expected to be " + file.get(SIZE)
|
||||
+ " while it is " + dir.fileLength((String) file.get(NAME)));
|
||||
// file exists and size is different, therefore we must assume
|
||||
// corrupted index
|
||||
String filename = (String) file.get(NAME);
|
||||
Long length = (Long) file.get(SIZE);
|
||||
Long checksum = (Long) file.get(CHECKSUM);
|
||||
if (slowFileExists(dir, filename)) {
|
||||
if (checksum != null) {
|
||||
if (!(compareFile(dir, filename, length, checksum).equal)) {
|
||||
// file exists and size or checksum is different, therefore we must download it again
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (length != dir.fileLength(filename)) {
|
||||
LOG.warn("File {} did not match. expected length is {} and actual length is {}",
|
||||
filename, length, dir.fileLength(filename));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -138,7 +138,7 @@ public class HdfsTestUtil {
|
|||
String dir = uri.toString()
|
||||
+ "/"
|
||||
+ new File(dataDir).toString().replaceAll(":", "_")
|
||||
.replaceAll("/", "_");
|
||||
.replaceAll("/", "_").replaceAll(" ", "_");
|
||||
return dir;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue