LUCENE-3728: remove separateFiles mess, remove CFX from IndexFileNames, preflex codec handles this hair itself

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237820 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-01-30 17:33:52 +00:00
parent 3cab173f7c
commit 4e620c58da
13 changed files with 48 additions and 97 deletions

View File

@ -60,14 +60,8 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
docValuesFormat().files(info, files); docValuesFormat().files(info, files);
normsFormat().files(info, files); normsFormat().files(info, files);
} }
} // never inside CFS
liveDocsFormat().files(info, files);
/** Populates <code>files</code> with any filenames that are
* stored outside of CFS for the <code>info</code> segment.
*/
// TODO: can we somehow totally remove this?
public void separateFiles(SegmentInfo info, Set<String> files) throws IOException {
liveDocsFormat().separateFiles(info, files);
} }
/** Encodes/decodes postings */ /** Encodes/decodes postings */

View File

@ -37,5 +37,5 @@ public abstract class LiveDocsFormat {
public abstract Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException; public abstract Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException;
/** writes bits to a file */ /** writes bits to a file */
public abstract void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException; public abstract void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException;
public abstract void separateFiles(SegmentInfo info, Set<String> files) throws IOException; public abstract void files(SegmentInfo info, Set<String> files) throws IOException;
} }

View File

@ -61,6 +61,9 @@ public class Lucene3xCodec extends Codec {
private final Lucene3xNormsFormat normsFormat = new Lucene3xNormsFormat(); private final Lucene3xNormsFormat normsFormat = new Lucene3xNormsFormat();
/** Extension of compound file for doc store files*/
static final String COMPOUND_FILE_STORE_EXTENSION = "cfx";
// TODO: this should really be a different impl // TODO: this should really be a different impl
private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat() { private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat() {
@Override @Override
@ -125,31 +128,30 @@ public class Lucene3xCodec extends Codec {
return liveDocsFormat; return liveDocsFormat;
} }
// overrides the default implementation in codec.java to handle CFS without CFE // overrides the default implementation in codec.java to handle CFS without CFE,
// shared doc stores, compound doc stores, separate norms, etc
@Override @Override
public void files(SegmentInfo info, Set<String> files) throws IOException { public void files(SegmentInfo info, Set<String> files) throws IOException {
if (info.getUseCompoundFile()) { if (info.getUseCompoundFile()) {
files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
// NOTE: we don't add the CFE extension: because 3.x format doesn't use it.
} else { } else {
super.files(info, files); postingsFormat().files(info, "", files);
storedFieldsFormat().files(info, files);
termVectorsFormat().files(info, files);
fieldInfosFormat().files(info, files);
// TODO: segmentInfosFormat should be allowed to declare additional files
// if it wants, in addition to segments_N
docValuesFormat().files(info, files);
normsFormat().files(info, files);
} }
} // never inside CFS
liveDocsFormat().files(info, files);
// override the default implementation in codec.java to handle separate norms files, and shared compound docstores
@Override
public void separateFiles(SegmentInfo info, Set<String> files) throws IOException {
super.separateFiles(info, files);
normsFormat().separateFiles(info, files); normsFormat().separateFiles(info, files);
// shared docstores: these guys check the hair
if (info.getDocStoreOffset() != -1) { if (info.getDocStoreOffset() != -1) {
// We are sharing doc stores (stored fields, term storedFieldsFormat().files(info, files);
// vectors) with other segments termVectorsFormat().files(info, files);
assert info.getDocStoreSegment() != null;
if (info.getDocStoreIsCompoundFile()) {
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION));
}
// otherwise, if its not a compound docstore, storedfieldsformat/termvectorsformat are each adding their relevant files
} }
} }
} }

View File

@ -58,7 +58,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader {
if (si.getDocStoreIsCompoundFile()) { if (si.getDocStoreIsCompoundFile()) {
dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(
si.getDocStoreSegment(), "", si.getDocStoreSegment(), "",
IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
} }
} else if (si.getUseCompoundFile()) { } else if (si.getUseCompoundFile()) {
dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(
@ -144,7 +144,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader {
if (docStoreOffset != -1) { if (docStoreOffset != -1) {
storesSegment = docStoreSegment; storesSegment = docStoreSegment;
storeIsCompoundFile = docStoreIsCompoundFile; storeIsCompoundFile = docStoreIsCompoundFile;
ext = IndexFileNames.COMPOUND_FILE_STORE_EXTENSION; ext = Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION;
} else { } else {
storesSegment = name; storesSegment = name;
storeIsCompoundFile = isCompoundFile; storeIsCompoundFile = isCompoundFile;

View File

@ -147,7 +147,7 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme
try { try {
if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) { if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) {
d = storeCFSReader = new CompoundFileDirectory(si.dir, d = storeCFSReader = new CompoundFileDirectory(si.dir,
IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
} else { } else {
storeCFSReader = null; storeCFSReader = null;
} }
@ -327,14 +327,18 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme
return fieldsStream; return fieldsStream;
} }
// note: if there are shared docstores, we are also called by Lucene3xCodec even in
// the CFS case. so logic here must handle this.
public static void files(SegmentInfo info, Set<String> files) throws IOException { public static void files(SegmentInfo info, Set<String> files) throws IOException {
if (info.getDocStoreOffset() != -1) { if (info.getDocStoreOffset() != -1) {
assert info.getDocStoreSegment() != null; assert info.getDocStoreSegment() != null;
if (!info.getDocStoreIsCompoundFile()) { if (info.getDocStoreIsCompoundFile()) {
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
} else {
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION));
} }
} else { } else if (!info.getUseCompoundFile()) {
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION));
} }

View File

@ -123,7 +123,7 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
try { try {
if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) { if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) {
d = storeCFSReader = new CompoundFileDirectory(si.dir, d = storeCFSReader = new CompoundFileDirectory(si.dir,
IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false);
} else { } else {
storeCFSReader = null; storeCFSReader = null;
} }
@ -690,16 +690,20 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader {
return new Lucene3xTermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, docStoreOffset, format); return new Lucene3xTermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, docStoreOffset, format);
} }
// note: if there are shared docstores, we are also called by Lucene3xCodec even in
// the CFS case. so logic here must handle this.
public static void files(SegmentInfo info, Set<String> files) throws IOException { public static void files(SegmentInfo info, Set<String> files) throws IOException {
if (info.getHasVectors()) { if (info.getHasVectors()) {
if (info.getDocStoreOffset() != -1) { if (info.getDocStoreOffset() != -1) {
assert info.getDocStoreSegment() != null; assert info.getDocStoreSegment() != null;
if (!info.getDocStoreIsCompoundFile()) { if (info.getDocStoreIsCompoundFile()) {
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION));
} else {
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_FIELDS_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_DOCUMENTS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_DOCUMENTS_EXTENSION));
} }
} else { } else if (!info.getUseCompoundFile()) {
files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_FIELDS_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_DOCUMENTS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_DOCUMENTS_EXTENSION));

View File

@ -48,7 +48,7 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat {
} }
@Override @Override
public void separateFiles(SegmentInfo info, Set<String> files) throws IOException { public void files(SegmentInfo info, Set<String> files) throws IOException {
if (info.hasDeletions()) { if (info.hasDeletions()) {
files.add(IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen())); files.add(IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen()));
} }

View File

@ -138,7 +138,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat {
} }
@Override @Override
public void separateFiles(SegmentInfo info, Set<String> files) throws IOException { public void files(SegmentInfo info, Set<String> files) throws IOException {
if (info.hasDeletions()) { if (info.hasDeletions()) {
files.add(IndexFileNames.fileNameFromGeneration(info.name, LIVEDOCS_EXTENSION, info.getDelGen())); files.add(IndexFileNames.fileNameFromGeneration(info.name, LIVEDOCS_EXTENSION, info.getDelGen()));
} }

View File

@ -54,9 +54,6 @@ public final class IndexFileNames {
/** Extension of compound file entries */ /** Extension of compound file entries */
public static final String COMPOUND_FILE_ENTRIES_EXTENSION = "cfe"; public static final String COMPOUND_FILE_ENTRIES_EXTENSION = "cfe";
/** Extension of compound file for doc store files*/
public static final String COMPOUND_FILE_STORE_EXTENSION = "cfx";
/** /**
* This array contains all filename extensions used by * This array contains all filename extensions used by
* Lucene's index files, with one exception, namely the * Lucene's index files, with one exception, namely the
@ -68,7 +65,6 @@ public final class IndexFileNames {
COMPOUND_FILE_EXTENSION, COMPOUND_FILE_EXTENSION,
COMPOUND_FILE_ENTRIES_EXTENSION, COMPOUND_FILE_ENTRIES_EXTENSION,
GEN_EXTENSION, GEN_EXTENSION,
COMPOUND_FILE_STORE_EXTENSION,
}; };
/** /**

View File

@ -2565,7 +2565,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// Copy the segment files // Copy the segment files
for (String file: info.files()) { for (String file: info.files()) {
final String newFileName; final String newFileName;
if (codecDocStoreFiles.contains(file) || file.endsWith(IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)) { if (codecDocStoreFiles.contains(file)) {
newFileName = newDsName + IndexFileNames.stripSegmentName(file); newFileName = newDsName + IndexFileNames.stripSegmentName(file);
if (dsFilesCopied.contains(newFileName)) { if (dsFilesCopied.contains(newFileName)) {
continue; continue;
@ -4070,12 +4070,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
*/ */
static final Collection<String> createCompoundFile(Directory directory, String fileName, CheckAbort checkAbort, final SegmentInfo info, IOContext context) static final Collection<String> createCompoundFile(Directory directory, String fileName, CheckAbort checkAbort, final SegmentInfo info, IOContext context)
throws IOException { throws IOException {
assert info.getDocStoreOffset() == -1;
// Now merge all added files // Now merge all added files
Collection<String> files = info.files(); Collection<String> files = info.files();
CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true);
try { try {
assert assertNoSeparateFiles(files, directory, info); // nocommit: we could make a crappy regex like before...
// assert assertNoSeparateFiles(files, directory, info);
for (String file : files) { for (String file : files) {
directory.copy(cfsDir, file, file, context); directory.copy(cfsDir, file, file, context);
checkAbort.work(directory.fileLength(file)); checkAbort.work(directory.fileLength(file));
@ -4086,20 +4087,4 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
return files; return files;
} }
/**
* used only by assert: checks that filenames about to be put in cfs belong.
*/
private static boolean assertNoSeparateFiles(Collection<String> files,
Directory dir, SegmentInfo info) throws IOException {
// maybe this is overkill, but codec naming clashes would be bad.
Set<String> separateFiles = new HashSet<String>();
info.getCodec().separateFiles(info, separateFiles);
for (String file : files) {
assert !separateFiles.contains(file) : file + " should not go in CFS!";
}
return true;
}
} }

View File

@ -421,9 +421,6 @@ public final class SegmentInfo implements Cloneable {
final Set<String> fileSet = new HashSet<String>(); final Set<String> fileSet = new HashSet<String>();
codec.files(this, fileSet); codec.files(this, fileSet);
// regardless of compound file setting: these files are always in the directory
codec.separateFiles(this, fileSet);
files = new ArrayList<String>(fileSet); files = new ArrayList<String>(fileSet);

View File

@ -117,14 +117,13 @@ public class PreFlexRWCodec extends Lucene3xCodec {
if (info.getUseCompoundFile() && LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { if (info.getUseCompoundFile() && LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) {
// because we don't fully emulate 3.x codec, PreFlexRW actually writes 4.x format CFS files. // because we don't fully emulate 3.x codec, PreFlexRW actually writes 4.x format CFS files.
// so we must check segment version here to see if its a "real" 3.x segment or a "fake" // so we must check segment version here to see if its a "real" 3.x segment or a "fake"
// one that we wrote with a 4.x-format CFS+CFE // one that we wrote with a 4.x-format CFS+CFE, in this case we must add the .CFE
files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
String version = info.getVersion(); String version = info.getVersion();
if (version != null && StringHelper.getVersionComparator().compare("4.0", version) <= 0) { if (version != null && StringHelper.getVersionComparator().compare("4.0", version) <= 0) {
files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
} }
} else {
super.files(info, files);
} }
super.files(info, files);
} }
} }

View File

@ -135,34 +135,4 @@ public class TestSegmentMerger extends LuceneTestCase {
TestSegmentReader.checkNorms(mergedReader); TestSegmentReader.checkNorms(mergedReader);
mergedReader.close(); mergedReader.close();
} }
// LUCENE-3143
public void testInvalidFilesToCreateCompound() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
IndexWriter w = new IndexWriter(dir, iwc);
// Create an index w/ .del file
w.addDocument(new Document());
Document doc = new Document();
doc.add(new TextField("c", "test"));
w.addDocument(doc);
w.commit();
w.deleteDocuments(new Term("c", "test"));
w.close();
// Assert that SM fails if .del exists
SegmentMerger sm = new SegmentMerger(InfoStream.getDefault(), dir, 1, "a", MergeState.CheckAbort.NONE, null, null, Codec.getDefault(), newIOContext(random));
boolean doFail = false;
try {
IndexWriter.createCompoundFile(dir, "b1", MergeState.CheckAbort.NONE, w.segmentInfos.info(0), newIOContext(random));
doFail = true; // should never get here
} catch (AssertionError e) {
// expected
}
assertFalse("should not have been able to create a .cfs with .del and .s* files", doFail);
dir.close();
}
} }