mirror of https://github.com/apache/lucene.git
LUCENE-3728: remove separateFiles mess, remove CFX from IndexFileNames, preflex codec handles this hair itself
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237820 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3cab173f7c
commit
4e620c58da
|
@ -60,14 +60,8 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
|
|||
docValuesFormat().files(info, files);
|
||||
normsFormat().files(info, files);
|
||||
}
|
||||
}
|
||||
|
||||
/** Populates <code>files</code> with any filenames that are
|
||||
* stored outside of CFS for the <code>info</code> segment.
|
||||
*/
|
||||
// TODO: can we somehow totally remove this?
|
||||
public void separateFiles(SegmentInfo info, Set<String> files) throws IOException {
|
||||
liveDocsFormat().separateFiles(info, files);
|
||||
// never inside CFS
|
||||
liveDocsFormat().files(info, files);
|
||||
}
|
||||
|
||||
/** Encodes/decodes postings */
|
||||
|
|
|
@ -37,5 +37,5 @@ public abstract class LiveDocsFormat {
|
|||
public abstract Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException;
|
||||
/** writes bits to a file */
|
||||
public abstract void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException;
|
||||
public abstract void separateFiles(SegmentInfo info, Set<String> files) throws IOException;
|
||||
public abstract void files(SegmentInfo info, Set<String> files) throws IOException;
|
||||
}
|
||||
|
|
|
@ -61,6 +61,9 @@ public class Lucene3xCodec extends Codec {
|
|||
|
||||
private final Lucene3xNormsFormat normsFormat = new Lucene3xNormsFormat();
|
||||
|
||||
/** Extension of compound file for doc store files*/
|
||||
static final String COMPOUND_FILE_STORE_EXTENSION = "cfx";
|
||||
|
||||
// TODO: this should really be a different impl
|
||||
private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat() {
|
||||
@Override
|
||||
|
@ -125,31 +128,30 @@ public class Lucene3xCodec extends Codec {
|
|||
return liveDocsFormat;
|
||||
}
|
||||
|
||||
// overrides the default implementation in codec.java to handle CFS without CFE
|
||||
// overrides the default implementation in codec.java to handle CFS without CFE,
|
||||
// shared doc stores, compound doc stores, separate norms, etc
|
||||
@Override
|
||||
public void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||
if (info.getUseCompoundFile()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
|
||||
// NOTE: we don't add the CFE extension: because 3.x format doesn't use it.
|
||||
} else {
|
||||
super.files(info, files);
|
||||
postingsFormat().files(info, "", files);
|
||||
storedFieldsFormat().files(info, files);
|
||||
termVectorsFormat().files(info, files);
|
||||
fieldInfosFormat().files(info, files);
|
||||
// TODO: segmentInfosFormat should be allowed to declare additional files
|
||||
// if it wants, in addition to segments_N
|
||||
docValuesFormat().files(info, files);
|
||||
normsFormat().files(info, files);
|
||||
}
|
||||
}
|
||||
|
||||
// override the default implementation in codec.java to handle separate norms files, and shared compound docstores
|
||||
@Override
|
||||
public void separateFiles(SegmentInfo info, Set<String> files) throws IOException {
|
||||
super.separateFiles(info, files);
|
||||
// never inside CFS
|
||||
liveDocsFormat().files(info, files);
|
||||
normsFormat().separateFiles(info, files);
|
||||
|
||||
// shared docstores: these guys check the hair
|
||||
if (info.getDocStoreOffset() != -1) {
|
||||
// We are sharing doc stores (stored fields, term
|
||||
// vectors) with other segments
|
||||
assert info.getDocStoreSegment() != null;
|
||||
if (info.getDocStoreIsCompoundFile()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION));
|
||||
}
|
||||
// otherwise, if its not a compound docstore, storedfieldsformat/termvectorsformat are each adding their relevant files
|
||||
storedFieldsFormat().files(info, files);
|
||||
termVectorsFormat().files(info, files);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -58,7 +58,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader {
|
|||
if (si.getDocStoreIsCompoundFile()) {
|
||||
dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(
|
||||
si.getDocStoreSegment(), "",
|
||||
IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false);
|
||||
Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
|
||||
}
|
||||
} else if (si.getUseCompoundFile()) {
|
||||
dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(
|
||||
|
@ -144,7 +144,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader {
|
|||
if (docStoreOffset != -1) {
|
||||
storesSegment = docStoreSegment;
|
||||
storeIsCompoundFile = docStoreIsCompoundFile;
|
||||
ext = IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
|
||||
ext = Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION;
|
||||
} else {
|
||||
storesSegment = name;
|
||||
storeIsCompoundFile = isCompoundFile;
|
||||
|
|
|
@ -147,7 +147,7 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme
|
|||
try {
|
||||
if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) {
|
||||
d = storeCFSReader = new CompoundFileDirectory(si.dir,
|
||||
IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false);
|
||||
IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
|
||||
} else {
|
||||
storeCFSReader = null;
|
||||
}
|
||||
|
@ -327,14 +327,18 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme
|
|||
return fieldsStream;
|
||||
}
|
||||
|
||||
// note: if there are shared docstores, we are also called by Lucene3xCodec even in
|
||||
// the CFS case. so logic here must handle this.
|
||||
public static void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||
if (info.getDocStoreOffset() != -1) {
|
||||
assert info.getDocStoreSegment() != null;
|
||||
if (!info.getDocStoreIsCompoundFile()) {
|
||||
if (info.getDocStoreIsCompoundFile()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
|
||||
} else {
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION));
|
||||
}
|
||||
} else {
|
||||
} else if (!info.getUseCompoundFile()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION));
|
||||
}
|
||||
|
|
|
@ -123,7 +123,7 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
|||
try {
|
||||
if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) {
|
||||
d = storeCFSReader = new CompoundFileDirectory(si.dir,
|
||||
IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false);
|
||||
IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
|
||||
} else {
|
||||
storeCFSReader = null;
|
||||
}
|
||||
|
@ -690,16 +690,20 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
|
|||
return new Lucene3xTermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, docStoreOffset, format);
|
||||
}
|
||||
|
||||
// note: if there are shared docstores, we are also called by Lucene3xCodec even in
|
||||
// the CFS case. so logic here must handle this.
|
||||
public static void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||
if (info.getHasVectors()) {
|
||||
if (info.getDocStoreOffset() != -1) {
|
||||
assert info.getDocStoreSegment() != null;
|
||||
if (!info.getDocStoreIsCompoundFile()) {
|
||||
if (info.getDocStoreIsCompoundFile()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
|
||||
} else {
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_INDEX_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_FIELDS_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_DOCUMENTS_EXTENSION));
|
||||
}
|
||||
} else {
|
||||
} else if (!info.getUseCompoundFile()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_INDEX_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_FIELDS_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_DOCUMENTS_EXTENSION));
|
||||
|
|
|
@ -48,7 +48,7 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void separateFiles(SegmentInfo info, Set<String> files) throws IOException {
|
||||
public void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||
if (info.hasDeletions()) {
|
||||
files.add(IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen()));
|
||||
}
|
||||
|
|
|
@ -138,7 +138,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void separateFiles(SegmentInfo info, Set<String> files) throws IOException {
|
||||
public void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||
if (info.hasDeletions()) {
|
||||
files.add(IndexFileNames.fileNameFromGeneration(info.name, LIVEDOCS_EXTENSION, info.getDelGen()));
|
||||
}
|
||||
|
|
|
@ -54,9 +54,6 @@ public final class IndexFileNames {
|
|||
/** Extension of compound file entries */
|
||||
public static final String COMPOUND_FILE_ENTRIES_EXTENSION = "cfe";
|
||||
|
||||
/** Extension of compound file for doc store files*/
|
||||
public static final String COMPOUND_FILE_STORE_EXTENSION = "cfx";
|
||||
|
||||
/**
|
||||
* This array contains all filename extensions used by
|
||||
* Lucene's index files, with one exception, namely the
|
||||
|
@ -68,7 +65,6 @@ public final class IndexFileNames {
|
|||
COMPOUND_FILE_EXTENSION,
|
||||
COMPOUND_FILE_ENTRIES_EXTENSION,
|
||||
GEN_EXTENSION,
|
||||
COMPOUND_FILE_STORE_EXTENSION,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -2565,7 +2565,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
// Copy the segment files
|
||||
for (String file: info.files()) {
|
||||
final String newFileName;
|
||||
if (codecDocStoreFiles.contains(file) || file.endsWith(IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)) {
|
||||
if (codecDocStoreFiles.contains(file)) {
|
||||
newFileName = newDsName + IndexFileNames.stripSegmentName(file);
|
||||
if (dsFilesCopied.contains(newFileName)) {
|
||||
continue;
|
||||
|
@ -4070,12 +4070,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
*/
|
||||
static final Collection<String> createCompoundFile(Directory directory, String fileName, CheckAbort checkAbort, final SegmentInfo info, IOContext context)
|
||||
throws IOException {
|
||||
|
||||
assert info.getDocStoreOffset() == -1;
|
||||
// Now merge all added files
|
||||
Collection<String> files = info.files();
|
||||
CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true);
|
||||
try {
|
||||
assert assertNoSeparateFiles(files, directory, info);
|
||||
// nocommit: we could make a crappy regex like before...
|
||||
// assert assertNoSeparateFiles(files, directory, info);
|
||||
for (String file : files) {
|
||||
directory.copy(cfsDir, file, file, context);
|
||||
checkAbort.work(directory.fileLength(file));
|
||||
|
@ -4086,20 +4087,4 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
|
||||
return files;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* used only by assert: checks that filenames about to be put in cfs belong.
|
||||
*/
|
||||
private static boolean assertNoSeparateFiles(Collection<String> files,
|
||||
Directory dir, SegmentInfo info) throws IOException {
|
||||
// maybe this is overkill, but codec naming clashes would be bad.
|
||||
Set<String> separateFiles = new HashSet<String>();
|
||||
info.getCodec().separateFiles(info, separateFiles);
|
||||
|
||||
for (String file : files) {
|
||||
assert !separateFiles.contains(file) : file + " should not go in CFS!";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -422,9 +422,6 @@ public final class SegmentInfo implements Cloneable {
|
|||
|
||||
codec.files(this, fileSet);
|
||||
|
||||
// regardless of compound file setting: these files are always in the directory
|
||||
codec.separateFiles(this, fileSet);
|
||||
|
||||
files = new ArrayList<String>(fileSet);
|
||||
|
||||
return files;
|
||||
|
|
|
@ -117,14 +117,13 @@ public class PreFlexRWCodec extends Lucene3xCodec {
|
|||
if (info.getUseCompoundFile() && LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) {
|
||||
// because we don't fully emulate 3.x codec, PreFlexRW actually writes 4.x format CFS files.
|
||||
// so we must check segment version here to see if its a "real" 3.x segment or a "fake"
|
||||
// one that we wrote with a 4.x-format CFS+CFE
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
|
||||
// one that we wrote with a 4.x-format CFS+CFE, in this case we must add the .CFE
|
||||
String version = info.getVersion();
|
||||
if (version != null && StringHelper.getVersionComparator().compare("4.0", version) <= 0) {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
|
||||
}
|
||||
} else {
|
||||
super.files(info, files);
|
||||
}
|
||||
|
||||
super.files(info, files);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -135,34 +135,4 @@ public class TestSegmentMerger extends LuceneTestCase {
|
|||
TestSegmentReader.checkNorms(mergedReader);
|
||||
mergedReader.close();
|
||||
}
|
||||
|
||||
// LUCENE-3143
|
||||
public void testInvalidFilesToCreateCompound() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
|
||||
// Create an index w/ .del file
|
||||
w.addDocument(new Document());
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("c", "test"));
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
w.deleteDocuments(new Term("c", "test"));
|
||||
w.close();
|
||||
|
||||
// Assert that SM fails if .del exists
|
||||
SegmentMerger sm = new SegmentMerger(InfoStream.getDefault(), dir, 1, "a", MergeState.CheckAbort.NONE, null, null, Codec.getDefault(), newIOContext(random));
|
||||
boolean doFail = false;
|
||||
try {
|
||||
IndexWriter.createCompoundFile(dir, "b1", MergeState.CheckAbort.NONE, w.segmentInfos.info(0), newIOContext(random));
|
||||
doFail = true; // should never get here
|
||||
} catch (AssertionError e) {
|
||||
// expected
|
||||
}
|
||||
assertFalse("should not have been able to create a .cfs with .del and .s* files", doFail);
|
||||
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue