mirror of https://github.com/apache/lucene.git
LUCENE-9260: Verify checksums of CFS files. (#1311)
This commit is contained in:
parent
aa605b3c70
commit
0aa4ba7ccb
|
@ -283,6 +283,9 @@ Optimizations
|
|||
|
||||
* LUCENE-9237: Faster UniformSplit intersect TermsEnum. (Bruno Roustant)
|
||||
|
||||
* LUCENE-9260: LeafReader#checkIntegrity verifies checksums of CFS files.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-9068: FuzzyQuery builds its Automaton up-front (Alan Woodward, Mike Drob)
|
||||
|
||||
* LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand)
|
||||
|
|
|
@ -24,11 +24,11 @@ import java.text.DecimalFormat;
|
|||
import java.text.DecimalFormatSymbols;
|
||||
import java.text.ParseException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.CompoundDirectory;
|
||||
import org.apache.lucene.codecs.CompoundFormat;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -37,7 +37,6 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Lock;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -55,7 +54,7 @@ public class SimpleTextCompoundFormat extends CompoundFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException {
|
||||
public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException {
|
||||
String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
|
||||
final IndexInput in = dir.openInput(dataFile, context);
|
||||
|
||||
|
@ -103,7 +102,7 @@ public class SimpleTextCompoundFormat extends CompoundFormat {
|
|||
endOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLEEND));
|
||||
}
|
||||
|
||||
return new Directory() {
|
||||
return new CompoundDirectory() {
|
||||
|
||||
private int getIndex(String name) throws IOException {
|
||||
int index = Arrays.binarySearch(fileNames, name);
|
||||
|
@ -143,28 +142,10 @@ public class SimpleTextCompoundFormat extends CompoundFormat {
|
|||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
// write methods: disabled
|
||||
|
||||
@Override
|
||||
public IndexOutput createOutput(String name, IOContext context) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public void sync(Collection<String> names) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public void deleteFile(String name) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public void rename(String source, String dest) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public void syncMetaData() { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public Lock obtainLock(String name) { throw new UnsupportedOperationException(); }
|
||||
public void checkIntegrity() throws IOException {
|
||||
// No checksums for SimpleText
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -37,4 +37,9 @@ public class TestSimpleTextCompoundFormat extends BaseCompoundFormatTestCase {
|
|||
public void testMissingCodecHeadersAreCaught() {
|
||||
// SimpleText does not catch broken sub-files in CFS!
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testCheckIntegrity() {
|
||||
// SimpleText does not catch broken sub-files in CFS!
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Lock;
|
||||
|
||||
/**
|
||||
* A read-only {@link Directory} that consists of a view over a compound file.
|
||||
* @see CompoundFormat
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class CompoundDirectory extends Directory {
|
||||
|
||||
/** Sole constructor. */
|
||||
protected CompoundDirectory() {}
|
||||
|
||||
/**
|
||||
* Checks consistency of this directory.
|
||||
* <p>
|
||||
* Note that this may be costly in terms of I/O, e.g.
|
||||
* may involve computing a checksum value against large data files.
|
||||
*/
|
||||
public abstract void checkIntegrity() throws IOException;
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException always: not supported by CFS */
|
||||
@Override
|
||||
public final void deleteFile(String name) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException always: not supported by CFS */
|
||||
@Override
|
||||
public final void rename(String from, String to) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void syncMetaData() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public final IndexOutput createOutput(String name, IOContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void sync(Collection<String> names) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Lock obtainLock(String name) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
|
@ -40,8 +40,8 @@ public abstract class CompoundFormat {
|
|||
/**
|
||||
* Returns a Directory view (read-only) for the compound files in this segment
|
||||
*/
|
||||
public abstract Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException;
|
||||
|
||||
public abstract CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Packs the provided segment's files into a compound format. All files referenced
|
||||
* by the provided {@link SegmentInfo} must have {@link CodecUtil#writeIndexHeader}
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.codecs.lucene50;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.CompoundDirectory;
|
||||
import org.apache.lucene.codecs.CompoundFormat;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
|
@ -66,7 +67,7 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException {
|
||||
public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException {
|
||||
return new Lucene50CompoundReader(dir, si, context);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,13 +19,13 @@ package org.apache.lucene.codecs.lucene50;
|
|||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.CompoundDirectory;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
|
@ -33,8 +33,6 @@ import org.apache.lucene.store.ChecksumIndexInput;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Lock;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
@ -43,7 +41,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
* Directory methods that would normally modify data throw an exception.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
final class Lucene50CompoundReader extends Directory {
|
||||
final class Lucene50CompoundReader extends CompoundDirectory {
|
||||
|
||||
/** Offset/Length for a slice inside of a compound file */
|
||||
public static final class FileEntry {
|
||||
|
@ -160,24 +158,6 @@ final class Lucene50CompoundReader extends Directory {
|
|||
return res;
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException always: not supported by CFS */
|
||||
@Override
|
||||
public void deleteFile(String name) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException always: not supported by CFS */
|
||||
@Override
|
||||
public void rename(String from, String to) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void syncMetaData() {
|
||||
}
|
||||
|
||||
/** Returns the length of a file in the directory.
|
||||
* @throws IOException if the file does not exist */
|
||||
@Override
|
||||
|
@ -188,26 +168,6 @@ final class Lucene50CompoundReader extends Directory {
|
|||
throw new FileNotFoundException(name);
|
||||
return e.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createOutput(String name, IOContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sync(Collection<String> names) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Lock obtainLock(String name) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
@ -218,4 +178,9 @@ final class Lucene50CompoundReader extends Directory {
|
|||
public Set<String> getPendingDeletions() {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
CodecUtil.checksumEntireFile(handle);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.Set;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CompoundDirectory;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PointsReader;
|
||||
|
@ -60,7 +61,7 @@ final class SegmentCoreReaders {
|
|||
final StoredFieldsReader fieldsReaderOrig;
|
||||
final TermVectorsReader termVectorsReaderOrig;
|
||||
final PointsReader pointsReader;
|
||||
final Directory cfsReader;
|
||||
final CompoundDirectory cfsReader;
|
||||
final String segment;
|
||||
/**
|
||||
* fieldinfos for this core: means gen=-1.
|
||||
|
|
|
@ -366,4 +366,12 @@ public final class SegmentReader extends CodecReader {
|
|||
public Bits getHardLiveDocs() {
|
||||
return hardLiveDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
super.checkIntegrity();
|
||||
if (core.cfsReader != null) {
|
||||
core.cfsReader.checkIntegrity();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.cranky;
|
|||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.codecs.CompoundDirectory;
|
||||
import org.apache.lucene.codecs.CompoundFormat;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -34,7 +35,7 @@ class CrankyCompoundFormat extends CompoundFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException {
|
||||
public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException {
|
||||
return delegate.getCompoundReader(dir, si, context);
|
||||
}
|
||||
|
||||
|
|
|
@ -21,13 +21,17 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.CompoundDirectory;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FilterDirectory;
|
||||
import org.apache.lucene.store.FlushInfo;
|
||||
|
@ -36,6 +40,7 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.store.NRTCachingDirectory;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -821,4 +826,41 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
assertTrue(e.getMessage().contains("checksum failed (hardware problem?)"));
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testCheckIntegrity() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
String subFile = "_123.xyz";
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
try (IndexOutput os = dir.createOutput(subFile, newIOContext(random()))) {
|
||||
CodecUtil.writeIndexHeader(os, "Foo", 0, si.getId(), "suffix");
|
||||
for (int i = 0; i < 1024; i++) {
|
||||
os.writeByte((byte) i);
|
||||
}
|
||||
os.writeInt(CodecUtil.FOOTER_MAGIC);
|
||||
os.writeInt(0);
|
||||
long checksum = os.getChecksum();
|
||||
os.writeLong(checksum);
|
||||
}
|
||||
|
||||
si.setFiles(Collections.singletonList(subFile));
|
||||
|
||||
FileTrackingDirectoryWrapper writeTrackingDir = new FileTrackingDirectoryWrapper(dir);
|
||||
si.getCodec().compoundFormat().write(writeTrackingDir, si, IOContext.DEFAULT);
|
||||
final Set<String> createdFiles = writeTrackingDir.getFiles();
|
||||
|
||||
ReadBytesDirectoryWrapper readTrackingDir = new ReadBytesDirectoryWrapper(dir);
|
||||
CompoundDirectory compoundDir = si.getCodec().compoundFormat().getCompoundReader(readTrackingDir, si, IOContext.READ);
|
||||
compoundDir.checkIntegrity();
|
||||
Map<String,FixedBitSet> readBytes = readTrackingDir.getReadBytes();
|
||||
assertEquals(createdFiles, readBytes.keySet());
|
||||
for (Map.Entry<String, FixedBitSet> entry : readBytes.entrySet()) {
|
||||
final String file = entry.getKey();
|
||||
final FixedBitSet set = entry.getValue().clone();
|
||||
set.flip(0, set.length());
|
||||
final int next = set.nextSetBit(0);
|
||||
assertEquals("Byte at offset " + next + " of " + file + " was not read", DocIdSetIterator.NO_MORE_DOCS, next);
|
||||
}
|
||||
compoundDir.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -728,15 +728,20 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
|
|||
return r;
|
||||
}
|
||||
|
||||
private static class FileTrackingDirectoryWrapper extends FilterDirectory {
|
||||
/**
|
||||
* A directory that tracks created files that haven't been deleted.
|
||||
*/
|
||||
protected static class FileTrackingDirectoryWrapper extends FilterDirectory {
|
||||
|
||||
private final Set<String> files = Collections.newSetFromMap(new ConcurrentHashMap<String,Boolean>());
|
||||
|
||||
/** Sole constructor. */
|
||||
FileTrackingDirectoryWrapper(Directory in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
Set<String> getFiles() {
|
||||
/** Get the set of created files. */
|
||||
public Set<String> getFiles() {
|
||||
return Set.copyOf(files);
|
||||
}
|
||||
|
||||
|
@ -820,15 +825,18 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
|
|||
|
||||
}
|
||||
|
||||
private static class ReadBytesDirectoryWrapper extends FilterDirectory {
|
||||
/** A directory that tracks read bytes. */
|
||||
protected static class ReadBytesDirectoryWrapper extends FilterDirectory {
|
||||
|
||||
ReadBytesDirectoryWrapper(Directory in) {
|
||||
/** Sole constructor. */
|
||||
public ReadBytesDirectoryWrapper(Directory in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
private final Map<String, FixedBitSet> readBytes = new ConcurrentHashMap<>();
|
||||
|
||||
Map<String, FixedBitSet> getReadBytes() {
|
||||
/** Get information about which bytes have been read. */
|
||||
public Map<String, FixedBitSet> getReadBytes() {
|
||||
return Map.copyOf(readBytes);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue