From 4e620c58da0230c333fe33f4df2e5981dfe3a298 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 17:33:52 +0000 Subject: [PATCH] LUCENE-3728: remove separateFiles mess, remove CFX from IndexFileNames, preflex codec handles this hair itself git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237820 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/codecs/Codec.java | 10 +---- .../apache/lucene/codecs/LiveDocsFormat.java | 2 +- .../lucene/codecs/lucene3x/Lucene3xCodec.java | 38 ++++++++++--------- .../lucene3x/Lucene3xSegmentInfosReader.java | 4 +- .../lucene3x/Lucene3xStoredFieldsReader.java | 10 +++-- .../lucene3x/Lucene3xTermVectorsReader.java | 10 +++-- .../lucene40/Lucene40LiveDocsFormat.java | 2 +- .../simpletext/SimpleTextLiveDocsFormat.java | 2 +- .../apache/lucene/index/IndexFileNames.java | 4 -- .../org/apache/lucene/index/IndexWriter.java | 23 ++--------- .../org/apache/lucene/index/SegmentInfo.java | 3 -- .../codecs/preflexrw/PreFlexRWCodec.java | 7 ++-- .../lucene/index/TestSegmentMerger.java | 30 --------------- 13 files changed, 48 insertions(+), 97 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/Codec.java b/lucene/src/java/org/apache/lucene/codecs/Codec.java index bc8e626f049..288d6489557 100644 --- a/lucene/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/codecs/Codec.java @@ -60,14 +60,8 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { docValuesFormat().files(info, files); normsFormat().files(info, files); } - } - - /** Populates files with any filenames that are - * stored outside of CFS for the info segment. - */ - // TODO: can we somehow totally remove this? - public void separateFiles(SegmentInfo info, Set files) throws IOException { - liveDocsFormat().separateFiles(info, files); + // never inside CFS + liveDocsFormat().files(info, files); } /** Encodes/decodes postings */ diff --git a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java index 7b0e7e9fd6f..6c8ee7bb9e7 100644 --- a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java @@ -37,5 +37,5 @@ public abstract class LiveDocsFormat { public abstract Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException; /** writes bits to a file */ public abstract void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException; - public abstract void separateFiles(SegmentInfo info, Set files) throws IOException; + public abstract void files(SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index f636b2956ff..888c1b8b666 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -61,6 +61,9 @@ public class Lucene3xCodec extends Codec { private final Lucene3xNormsFormat normsFormat = new Lucene3xNormsFormat(); + /** Extension of compound file for doc store files*/ + static final String COMPOUND_FILE_STORE_EXTENSION = "cfx"; + // TODO: this should really be a different impl private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat() { @Override @@ -125,31 +128,30 @@ public class Lucene3xCodec extends Codec { return liveDocsFormat; } - // overrides the default implementation in codec.java to handle CFS without CFE + // overrides the default implementation in codec.java to handle CFS without CFE, + // shared doc stores, compound doc stores, separate norms, etc @Override public void files(SegmentInfo info, Set files) throws IOException { if (info.getUseCompoundFile()) { files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); - // NOTE: we don't add the CFE extension: because 3.x format doesn't use it. } else { - super.files(info, files); + postingsFormat().files(info, "", files); + storedFieldsFormat().files(info, files); + termVectorsFormat().files(info, files); + fieldInfosFormat().files(info, files); + // TODO: segmentInfosFormat should be allowed to declare additional files + // if it wants, in addition to segments_N + docValuesFormat().files(info, files); + normsFormat().files(info, files); } - } - - // override the default implementation in codec.java to handle separate norms files, and shared compound docstores - @Override - public void separateFiles(SegmentInfo info, Set files) throws IOException { - super.separateFiles(info, files); + // never inside CFS + liveDocsFormat().files(info, files); normsFormat().separateFiles(info, files); + + // shared docstores: these guys check the hair if (info.getDocStoreOffset() != -1) { - // We are sharing doc stores (stored fields, term - // vectors) with other segments - assert info.getDocStoreSegment() != null; - if (info.getDocStoreIsCompoundFile()) { - files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)); - } - // otherwise, if its not a compound docstore, storedfieldsformat/termvectorsformat are each adding their relevant files + storedFieldsFormat().files(info, files); + termVectorsFormat().files(info, files); } - } - + } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java index 53e2f3efb44..624dcddc043 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java @@ -58,7 +58,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader { if (si.getDocStoreIsCompoundFile()) { dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( si.getDocStoreSegment(), "", - IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); + Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } } else if (si.getUseCompoundFile()) { dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( @@ -144,7 +144,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader { if (docStoreOffset != -1) { storesSegment = docStoreSegment; storeIsCompoundFile = docStoreIsCompoundFile; - ext = IndexFileNames.COMPOUND_FILE_STORE_EXTENSION; + ext = Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION; } else { storesSegment = name; storeIsCompoundFile = isCompoundFile; diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java index 9b96f1a457a..c3e5026310b 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java @@ -147,7 +147,7 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme try { if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) { d = storeCFSReader = new CompoundFileDirectory(si.dir, - IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); + IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { storeCFSReader = null; } @@ -327,14 +327,18 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme return fieldsStream; } + // note: if there are shared docstores, we are also called by Lucene3xCodec even in + // the CFS case. so logic here must handle this. public static void files(SegmentInfo info, Set files) throws IOException { if (info.getDocStoreOffset() != -1) { assert info.getDocStoreSegment() != null; - if (!info.getDocStoreIsCompoundFile()) { + if (info.getDocStoreIsCompoundFile()) { + files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); + } else { files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION)); } - } else { + } else if (!info.getUseCompoundFile()) { files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java index 0df4bb94b30..b60d36cf4d3 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java @@ -123,7 +123,7 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader { try { if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) { d = storeCFSReader = new CompoundFileDirectory(si.dir, - IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); + IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { storeCFSReader = null; } @@ -690,16 +690,20 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader { return new Lucene3xTermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, docStoreOffset, format); } + // note: if there are shared docstores, we are also called by Lucene3xCodec even in + // the CFS case. so logic here must handle this. public static void files(SegmentInfo info, Set files) throws IOException { if (info.getHasVectors()) { if (info.getDocStoreOffset() != -1) { assert info.getDocStoreSegment() != null; - if (!info.getDocStoreIsCompoundFile()) { + if (info.getDocStoreIsCompoundFile()) { + files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); + } else { files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_DOCUMENTS_EXTENSION)); } - } else { + } else if (!info.getUseCompoundFile()) { files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_DOCUMENTS_EXTENSION)); diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java index 1023cd3e357..89710ea9a29 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java @@ -48,7 +48,7 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { } @Override - public void separateFiles(SegmentInfo info, Set files) throws IOException { + public void files(SegmentInfo info, Set files) throws IOException { if (info.hasDeletions()) { files.add(IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen())); } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java index c779c2a132d..fab1fa381f7 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -138,7 +138,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { } @Override - public void separateFiles(SegmentInfo info, Set files) throws IOException { + public void files(SegmentInfo info, Set files) throws IOException { if (info.hasDeletions()) { files.add(IndexFileNames.fileNameFromGeneration(info.name, LIVEDOCS_EXTENSION, info.getDelGen())); } diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java index 1bcb493b1d8..b7b5044e3cd 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java +++ b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java @@ -54,9 +54,6 @@ public final class IndexFileNames { /** Extension of compound file entries */ public static final String COMPOUND_FILE_ENTRIES_EXTENSION = "cfe"; - /** Extension of compound file for doc store files*/ - public static final String COMPOUND_FILE_STORE_EXTENSION = "cfx"; - /** * This array contains all filename extensions used by * Lucene's index files, with one exception, namely the @@ -68,7 +65,6 @@ public final class IndexFileNames { COMPOUND_FILE_EXTENSION, COMPOUND_FILE_ENTRIES_EXTENSION, GEN_EXTENSION, - COMPOUND_FILE_STORE_EXTENSION, }; /** diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 52c483b849d..8be30d9fcfe 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -2565,7 +2565,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // Copy the segment files for (String file: info.files()) { final String newFileName; - if (codecDocStoreFiles.contains(file) || file.endsWith(IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)) { + if (codecDocStoreFiles.contains(file)) { newFileName = newDsName + IndexFileNames.stripSegmentName(file); if (dsFilesCopied.contains(newFileName)) { continue; @@ -4070,12 +4070,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { */ static final Collection createCompoundFile(Directory directory, String fileName, CheckAbort checkAbort, final SegmentInfo info, IOContext context) throws IOException { - + assert info.getDocStoreOffset() == -1; // Now merge all added files Collection files = info.files(); CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); try { - assert assertNoSeparateFiles(files, directory, info); + // nocommit: we could make a crappy regex like before... + // assert assertNoSeparateFiles(files, directory, info); for (String file : files) { directory.copy(cfsDir, file, file, context); checkAbort.work(directory.fileLength(file)); @@ -4086,20 +4087,4 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { return files; } - - - /** - * used only by assert: checks that filenames about to be put in cfs belong. - */ - private static boolean assertNoSeparateFiles(Collection files, - Directory dir, SegmentInfo info) throws IOException { - // maybe this is overkill, but codec naming clashes would be bad. - Set separateFiles = new HashSet(); - info.getCodec().separateFiles(info, separateFiles); - - for (String file : files) { - assert !separateFiles.contains(file) : file + " should not go in CFS!"; - } - return true; - } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index 8db91f2c443..9acfba2667d 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -421,9 +421,6 @@ public final class SegmentInfo implements Cloneable { final Set fileSet = new HashSet(); codec.files(this, fileSet); - - // regardless of compound file setting: these files are always in the directory - codec.separateFiles(this, fileSet); files = new ArrayList(fileSet); diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java index 2013d91167c..5b7029c5fc0 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java @@ -117,14 +117,13 @@ public class PreFlexRWCodec extends Lucene3xCodec { if (info.getUseCompoundFile() && LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { // because we don't fully emulate 3.x codec, PreFlexRW actually writes 4.x format CFS files. // so we must check segment version here to see if its a "real" 3.x segment or a "fake" - // one that we wrote with a 4.x-format CFS+CFE - files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); + // one that we wrote with a 4.x-format CFS+CFE, in this case we must add the .CFE String version = info.getVersion(); if (version != null && StringHelper.getVersionComparator().compare("4.0", version) <= 0) { files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); } - } else { - super.files(info, files); } + + super.files(info, files); } } diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java index e5b6eb84114..d9445f935fa 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -135,34 +135,4 @@ public class TestSegmentMerger extends LuceneTestCase { TestSegmentReader.checkNorms(mergedReader); mergedReader.close(); } - - // LUCENE-3143 - public void testInvalidFilesToCreateCompound() throws Exception { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); - IndexWriter w = new IndexWriter(dir, iwc); - - // Create an index w/ .del file - w.addDocument(new Document()); - Document doc = new Document(); - doc.add(new TextField("c", "test")); - w.addDocument(doc); - w.commit(); - w.deleteDocuments(new Term("c", "test")); - w.close(); - - // Assert that SM fails if .del exists - SegmentMerger sm = new SegmentMerger(InfoStream.getDefault(), dir, 1, "a", MergeState.CheckAbort.NONE, null, null, Codec.getDefault(), newIOContext(random)); - boolean doFail = false; - try { - IndexWriter.createCompoundFile(dir, "b1", MergeState.CheckAbort.NONE, w.segmentInfos.info(0), newIOContext(random)); - doFail = true; // should never get here - } catch (AssertionError e) { - // expected - } - assertFalse("should not have been able to create a .cfs with .del and .s* files", doFail); - - dir.close(); - } - }