From 80bf691b3b8b0ee9b705c47f096dc6b4a483fae0 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 29 Jan 2012 12:24:47 +0000 Subject: [PATCH 01/20] LUCENE-3728: handle separate norms more privately inside 3.x codec git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237261 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/codecs/NormsFormat.java | 9 --------- .../lucene/codecs/lucene3x/Lucene3xNormsFormat.java | 8 +------- .../lucene/codecs/lucene3x/Lucene3xNormsProducer.java | 3 ++- .../java/org/apache/lucene/index/SegmentCoreReaders.java | 2 +- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java index 94e70e76c40..5dbd90d2d7f 100644 --- a/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java @@ -39,13 +39,4 @@ public abstract class NormsFormat { */ @Deprecated public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException {}; - - /** - * Note: this should not be overridden! - * @deprecated - */ - @Deprecated - public PerDocProducer docsProducer(SegmentReadState state, Directory separateNormsDir) throws IOException { - return docsProducer(state); - } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java index 54b13a3a8e2..f8f573e7b56 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java @@ -56,12 +56,6 @@ public class Lucene3xNormsFormat extends NormsFormat { @Override public PerDocProducer docsProducer(SegmentReadState state) throws IOException { - return docsProducer(state, null); - } - - @Override - public PerDocProducer docsProducer(SegmentReadState state, - Directory separateNormsDir) throws IOException { - return new Lucene3xNormsProducer(state.dir, state.segmentInfo, state.fieldInfos, state.context, separateNormsDir); + return new Lucene3xNormsProducer(state.dir, state.segmentInfo, state.fieldInfos, state.context); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java index 0be28791f93..bcc92ec4da9 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java @@ -67,7 +67,8 @@ class Lucene3xNormsProducer extends PerDocProducer { // note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front. // but we just don't do any seeks or reading yet. - public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException { + public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context) throws IOException { + Directory separateNormsDir = info.dir; // separate norms are never inside CFS maxdoc = info.docCount; String segmentName = info.name; Map normGen = info.getNormGen(); diff --git a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java index ac67c3d9747..754acaa6ac8 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -119,7 +119,7 @@ final class SegmentCoreReaders { // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! - norms = codec.normsFormat().docsProducer(segmentReadState, dir); + norms = codec.normsFormat().docsProducer(segmentReadState); perDocProducer = codec.docValuesFormat().docsProducer(segmentReadState); final Directory storeDir; From 146033c75737b0e15da91f16f634e0052345f72b Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 29 Jan 2012 12:32:32 +0000 Subject: [PATCH 02/20] LUCENE-3728: SI.files() always calls Codec.files() git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237263 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/codecs/Codec.java | 28 +++++++++++++------ .../org/apache/lucene/index/SegmentInfo.java | 13 +-------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/Codec.java b/lucene/src/java/org/apache/lucene/codecs/Codec.java index 20df870558c..9f0be1deb49 100644 --- a/lucene/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/codecs/Codec.java @@ -20,8 +20,10 @@ package org.apache.lucene.codecs; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.util.NamedSPILoader; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; @@ -48,15 +50,23 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { * the info segment. */ public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - assert (dir instanceof CompoundFileDirectory) == false; - postingsFormat().files(dir, info, "", files); - storedFieldsFormat().files(dir, info, files); - termVectorsFormat().files(dir, info, files); - fieldInfosFormat().files(dir, info, files); - // TODO: segmentInfosFormat should be allowed to declare additional files - // if it wants, in addition to segments_N - docValuesFormat().files(dir, info, files); - normsFormat().files(dir, info, files); + if (info.getUseCompoundFile()) { + files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); + // nocommit: get this out of here: 3.x codec should override this + String version = info.getVersion(); + if (version != null && StringHelper.getVersionComparator().compare("4.0", version) <= 0) { + files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + } + } else { + postingsFormat().files(dir, info, "", files); + storedFieldsFormat().files(dir, info, files); + termVectorsFormat().files(dir, info, files); + fieldInfosFormat().files(dir, info, files); + // TODO: segmentInfosFormat should be allowed to declare additional files + // if it wants, in addition to segments_N + docValuesFormat().files(dir, info, files); + normsFormat().files(dir, info, files); + } } /** Populates files with any filenames that are diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index d3b3caeddff..72fbafb9435 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -33,7 +33,6 @@ import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Constants; -import org.apache.lucene.util.StringHelper; /** * Information about a segment such as it's name, directory, and files related @@ -473,17 +472,7 @@ public final class SegmentInfo implements Cloneable { } final Set fileSet = new HashSet(); - boolean useCompoundFile = getUseCompoundFile(); - - if (useCompoundFile) { - fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); - if (version != null && StringHelper.getVersionComparator().compare("4.0", version) <= 0) { - fileSet.add(IndexFileNames.segmentFileName(name, "", - IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); - } - } else { - codec.files(dir, this, fileSet); - } + codec.files(dir, this, fileSet); // regardless of compound file setting: these files are always in the directory codec.separateFiles(dir, this, fileSet); From a5c5bbbffee31770feb6a6ba811a8523c8e84843 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 29 Jan 2012 12:43:37 +0000 Subject: [PATCH 03/20] LUCENE-3728: PreFlex codec privately handles old CFS-without-CFE in files() git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237266 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/codecs/Codec.java | 8 +------ .../lucene/codecs/lucene3x/Lucene3xCodec.java | 14 +++++++++++ .../codecs/preflexrw/PreFlexRWCodec.java | 23 +++++++++++++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/Codec.java b/lucene/src/java/org/apache/lucene/codecs/Codec.java index 9f0be1deb49..92fba4d1f92 100644 --- a/lucene/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/codecs/Codec.java @@ -23,8 +23,6 @@ import java.util.Set; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.util.NamedSPILoader; -import org.apache.lucene.util.StringHelper; -import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; /** @@ -52,11 +50,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { public void files(Directory dir, SegmentInfo info, Set files) throws IOException { if (info.getUseCompoundFile()) { files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); - // nocommit: get this out of here: 3.x codec should override this - String version = info.getVersion(); - if (version != null && StringHelper.getVersionComparator().compare("4.0", version) <= 0) { - files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); - } + files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); } else { postingsFormat().files(dir, info, "", files); storedFieldsFormat().files(dir, info, files); diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index 78aac059053..9a980ff711f 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -34,12 +34,14 @@ import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.MutableBits; +import org.apache.lucene.util.StringHelper; /** * Supports the Lucene 3.x index format (readonly) @@ -130,4 +132,16 @@ public class Lucene3xCodec extends Codec { public LiveDocsFormat liveDocsFormat() { return liveDocsFormat; } + + // overrides the default implementation in codec.java to handle CFS without CFE, and shared docstores + @Override + public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + // TODO: shared doc stores + if (info.getUseCompoundFile()) { + files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); + // NOTE: we don't add the CFE extension: because 3.x format doesn't use it. + } else { + super.files(dir, info, files); + } + } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java index aface166018..50cb0f39602 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java @@ -17,6 +17,9 @@ package org.apache.lucene.codecs.preflexrw; * limitations under the License. */ +import java.io.IOException; +import java.util.Set; + import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.NormsFormat; @@ -27,7 +30,11 @@ import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.StringHelper; /** * Writes 3.x-like indexes (not perfect emulation yet) for testing only! @@ -106,4 +113,20 @@ public class PreFlexRWCodec extends Lucene3xCodec { return super.storedFieldsFormat(); } } + + @Override + public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + if (info.getUseCompoundFile() && LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { + // because we don't fully emulate 3.x codec, PreFlexRW actually writes 4.x format CFS files. + // so we must check segment version here to see if its a "real" 3.x segment or a "fake" + // one that we wrote with a 4.x-format CFS+CFE + files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); + String version = info.getVersion(); + if (version != null && StringHelper.getVersionComparator().compare("4.0", version) <= 0) { + files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + } + } else { + super.files(dir, info, files); + } + } } From 5d96aa30c108f36d318d68e043222ff7ca84be5a Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 29 Jan 2012 13:02:32 +0000 Subject: [PATCH 04/20] LUCENE-3728: really make separate norms totally private to 3.x codec git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237272 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/codecs/Codec.java | 2 +- .../java/org/apache/lucene/codecs/NormsFormat.java | 7 ------- .../lucene/codecs/lucene3x/Lucene3xCodec.java | 14 ++++++++++---- .../codecs/lucene3x/Lucene3xNormsFormat.java | 1 - .../lucene/codecs/preflexrw/PreFlexRWCodec.java | 6 +++--- 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/Codec.java b/lucene/src/java/org/apache/lucene/codecs/Codec.java index 92fba4d1f92..b79973d929f 100644 --- a/lucene/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/codecs/Codec.java @@ -66,9 +66,9 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { /** Populates files with any filenames that are * stored outside of CFS for the info segment. */ + // TODO: can we somehow totally remove this? public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { liveDocsFormat().separateFiles(dir, info, files); - normsFormat().separateFiles(dir, info, files); } /** Encodes/decodes postings */ diff --git a/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java index 5dbd90d2d7f..eeb9473cb2f 100644 --- a/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java @@ -32,11 +32,4 @@ public abstract class NormsFormat { public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException; public abstract PerDocProducer docsProducer(SegmentReadState state) throws IOException; public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; - - /** - * Note: this should not be overridden! - * @deprecated - */ - @Deprecated - public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException {}; } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index 9a980ff711f..cd43942e2d0 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -24,7 +24,6 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.LiveDocsFormat; -import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PerDocConsumer; import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.codecs.PostingsFormat; @@ -41,7 +40,6 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.MutableBits; -import org.apache.lucene.util.StringHelper; /** * Supports the Lucene 3.x index format (readonly) @@ -67,7 +65,7 @@ public class Lucene3xCodec extends Codec { private final SegmentInfosFormat infosFormat = new Lucene3xSegmentInfosFormat(); - private final NormsFormat normsFormat = new Lucene3xNormsFormat(); + private final Lucene3xNormsFormat normsFormat = new Lucene3xNormsFormat(); // TODO: this should really be a different impl private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat() { @@ -124,7 +122,7 @@ public class Lucene3xCodec extends Codec { } @Override - public NormsFormat normsFormat() { + public Lucene3xNormsFormat normsFormat() { return normsFormat; } @@ -144,4 +142,12 @@ public class Lucene3xCodec extends Codec { super.files(dir, info, files); } } + + // override the default implementation in codec.java to handle separate norms files + @Override + public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { + super.separateFiles(dir, info, files); + normsFormat().separateFiles(dir, info, files); + } + } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java index f8f573e7b56..701e1f4baac 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java @@ -43,7 +43,6 @@ public class Lucene3xNormsFormat extends NormsFormat { Lucene3xNormsProducer.files(dir, info, files); } - @Override public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { Lucene3xNormsProducer.separateFiles(dir, info, files); } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java index 50cb0f39602..5696083bd67 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java @@ -22,12 +22,12 @@ import java.util.Set; import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.LiveDocsFormat; -import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfosFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; +import org.apache.lucene.codecs.lucene3x.Lucene3xNormsFormat; import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.index.IndexFileNames; @@ -42,7 +42,7 @@ import org.apache.lucene.util.StringHelper; */ public class PreFlexRWCodec extends Lucene3xCodec { private final PostingsFormat postings = new PreFlexRWPostingsFormat(); - private final NormsFormat norms = new PreFlexRWNormsFormat(); + private final Lucene3xNormsFormat norms = new PreFlexRWNormsFormat(); private final FieldInfosFormat fieldInfos = new PreFlexRWFieldInfosFormat(); private final TermVectorsFormat termVectors = new PreFlexRWTermVectorsFormat(); private final SegmentInfosFormat segmentInfos = new PreFlexRWSegmentInfosFormat(); @@ -61,7 +61,7 @@ public class PreFlexRWCodec extends Lucene3xCodec { } @Override - public NormsFormat normsFormat() { + public Lucene3xNormsFormat normsFormat() { if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { return norms; } else { From 8d98a6d270a1f87d2e946974ca604bf1aef875b4 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 29 Jan 2012 13:18:16 +0000 Subject: [PATCH 05/20] LUCENE-3728: 3.x codec files() privately adds compound docstores git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237280 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/lucene3x/Lucene3xCodec.java | 14 +++++++++++--- .../java/org/apache/lucene/index/SegmentInfo.java | 10 ---------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index cd43942e2d0..0bb9296b456 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -131,10 +131,9 @@ public class Lucene3xCodec extends Codec { return liveDocsFormat; } - // overrides the default implementation in codec.java to handle CFS without CFE, and shared docstores + // overrides the default implementation in codec.java to handle CFS without CFE @Override public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - // TODO: shared doc stores if (info.getUseCompoundFile()) { files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); // NOTE: we don't add the CFE extension: because 3.x format doesn't use it. @@ -143,11 +142,20 @@ public class Lucene3xCodec extends Codec { } } - // override the default implementation in codec.java to handle separate norms files + // override the default implementation in codec.java to handle separate norms files, and shared compound docstores @Override public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { super.separateFiles(dir, info, files); normsFormat().separateFiles(dir, info, files); + if (info.getDocStoreOffset() != -1) { + // We are sharing doc stores (stored fields, term + // vectors) with other segments + assert info.getDocStoreSegment() != null; + if (info.getDocStoreIsCompoundFile()) { + files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)); + } + // otherwise, if its not a compound docstore, storedfieldsformat/termvectorsformat are each adding their relevant files + } } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index 72fbafb9435..0ab77246757 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -477,16 +477,6 @@ public final class SegmentInfo implements Cloneable { // regardless of compound file setting: these files are always in the directory codec.separateFiles(dir, this, fileSet); - if (docStoreOffset != -1) { - // We are sharing doc stores (stored fields, term - // vectors) with other segments - assert docStoreSegment != null; - // TODO: push this out into preflex fieldsFormat? - if (docStoreIsCompoundFile) { - fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)); - } - } - files = new ArrayList(fileSet); return files; From 9c22385c556bc593588aacd62bc71e80714ab7b2 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 29 Jan 2012 14:07:32 +0000 Subject: [PATCH 06/20] LUCENE-3728: don't pass Directory to files, its confusingly never CFSDir like other Directory parameters, its rarely needed in files(), and redundant with SI.dir git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237294 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/BlockTermsReader.java | 2 +- .../lucene/codecs/BlockTreeTermsReader.java | 2 +- .../java/org/apache/lucene/codecs/Codec.java | 19 +++++++++---------- .../apache/lucene/codecs/DocValuesFormat.java | 3 +-- .../lucene/codecs/FieldInfosFormat.java | 3 +-- .../codecs/FixedGapTermsIndexReader.java | 2 +- .../apache/lucene/codecs/LiveDocsFormat.java | 2 +- .../org/apache/lucene/codecs/NormsFormat.java | 3 +-- .../lucene/codecs/PostingsBaseFormat.java | 3 +-- .../apache/lucene/codecs/PostingsFormat.java | 5 +---- .../lucene/codecs/StoredFieldsFormat.java | 2 +- .../lucene/codecs/TermVectorsFormat.java | 2 +- .../codecs/VariableGapTermsIndexReader.java | 2 +- .../appending/AppendingPostingsFormat.java | 7 +++---- .../lucene/codecs/lucene3x/Lucene3xCodec.java | 12 ++++++------ .../lucene3x/Lucene3xFieldInfosFormat.java | 5 ++--- .../lucene3x/Lucene3xFieldInfosReader.java | 2 +- .../codecs/lucene3x/Lucene3xFields.java | 4 ++-- .../codecs/lucene3x/Lucene3xNormsFormat.java | 8 ++++---- .../lucene3x/Lucene3xNormsProducer.java | 6 +++--- .../lucene3x/Lucene3xPostingsFormat.java | 5 ++--- .../lucene3x/Lucene3xTermVectorsFormat.java | 4 ++-- .../lucene3x/Lucene3xTermVectorsReader.java | 2 +- .../lucene40/Lucene40DocValuesConsumer.java | 6 +++--- .../lucene40/Lucene40DocValuesFormat.java | 5 ++--- .../lucene40/Lucene40FieldInfosFormat.java | 5 ++--- .../lucene40/Lucene40FieldInfosReader.java | 2 +- .../lucene40/Lucene40LiveDocsFormat.java | 2 +- .../codecs/lucene40/Lucene40NormsFormat.java | 9 +++------ .../lucene40/Lucene40PostingsBaseFormat.java | 5 ++--- .../lucene40/Lucene40PostingsFormat.java | 7 +++---- .../lucene40/Lucene40PostingsReader.java | 2 +- .../lucene40/Lucene40StoredFieldsFormat.java | 4 ++-- .../lucene40/Lucene40StoredFieldsReader.java | 2 +- .../lucene40/Lucene40TermVectorsFormat.java | 4 ++-- .../lucene40/Lucene40TermVectorsReader.java | 2 +- .../codecs/memory/MemoryPostingsFormat.java | 3 +-- .../perfield/PerFieldPostingsFormat.java | 6 +++--- .../codecs/pulsing/PulsingPostingsFormat.java | 7 +++---- .../codecs/sep/SepDocValuesConsumer.java | 5 +++-- .../SimpleTextFieldInfosFormat.java | 5 ++--- .../SimpleTextFieldInfosReader.java | 2 +- .../simpletext/SimpleTextLiveDocsFormat.java | 2 +- .../simpletext/SimpleTextNormsConsumer.java | 2 +- .../simpletext/SimpleTextNormsFormat.java | 7 ++----- .../simpletext/SimpleTextPostingsFormat.java | 3 +-- .../SimpleTextStoredFieldsFormat.java | 4 ++-- .../SimpleTextStoredFieldsReader.java | 2 +- .../SimpleTextTermVectorsFormat.java | 4 ++-- .../SimpleTextTermVectorsReader.java | 2 +- .../org/apache/lucene/index/IndexWriter.java | 2 +- .../org/apache/lucene/index/SegmentInfo.java | 10 ++++++---- .../codecs/lucene40ords/Lucene40WithOrds.java | 9 ++++----- .../MockFixedIntBlockPostingsFormat.java | 6 +++--- .../MockVariableIntBlockPostingsFormat.java | 6 +++--- .../mockrandom/MockRandomPostingsFormat.java | 14 +++++++------- .../mocksep/MockSepDocValuesFormat.java | 5 ++--- .../codecs/mocksep/MockSepPostingsFormat.java | 7 +++---- .../NestedPulsingPostingsFormat.java | 7 +++---- .../codecs/preflexrw/PreFlexRWCodec.java | 5 ++--- .../codecs/ramonly/RAMOnlyPostingsFormat.java | 3 +-- .../apache/lucene/index/TestIndexWriter.java | 2 +- .../lucene/index/TestTermVectorsReader.java | 2 +- 63 files changed, 132 insertions(+), 160 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java b/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java index 9e4237ce4e5..ee1a56572a9 100644 --- a/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/BlockTermsReader.java @@ -186,7 +186,7 @@ public class BlockTermsReader extends FieldsProducer { } } - public static void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Collection files) { + public static void files(SegmentInfo segmentInfo, String segmentSuffix, Collection files) { files.add(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java b/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java index 6201371bc52..141eaf57d8f 100644 --- a/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java @@ -200,7 +200,7 @@ public class BlockTreeTermsReader extends FieldsProducer { } } - public static void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Collection files) { + public static void files(SegmentInfo segmentInfo, String segmentSuffix, Collection files) { files.add(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION)); files.add(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/codecs/Codec.java b/lucene/src/java/org/apache/lucene/codecs/Codec.java index b79973d929f..bc8e626f049 100644 --- a/lucene/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/codecs/Codec.java @@ -23,7 +23,6 @@ import java.util.Set; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.util.NamedSPILoader; -import org.apache.lucene.store.Directory; /** * Encodes/decodes an inverted index segment @@ -47,19 +46,19 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { /** Populates files with all filenames needed for * the info segment. */ - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public void files(SegmentInfo info, Set files) throws IOException { if (info.getUseCompoundFile()) { files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); } else { - postingsFormat().files(dir, info, "", files); - storedFieldsFormat().files(dir, info, files); - termVectorsFormat().files(dir, info, files); - fieldInfosFormat().files(dir, info, files); + postingsFormat().files(info, "", files); + storedFieldsFormat().files(info, files); + termVectorsFormat().files(info, files); + fieldInfosFormat().files(info, files); // TODO: segmentInfosFormat should be allowed to declare additional files // if it wants, in addition to segments_N - docValuesFormat().files(dir, info, files); - normsFormat().files(dir, info, files); + docValuesFormat().files(info, files); + normsFormat().files(info, files); } } @@ -67,8 +66,8 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { * stored outside of CFS for the info segment. */ // TODO: can we somehow totally remove this? - public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { - liveDocsFormat().separateFiles(dir, info, files); + public void separateFiles(SegmentInfo info, Set files) throws IOException { + liveDocsFormat().separateFiles(info, files); } /** Encodes/decodes postings */ diff --git a/lucene/src/java/org/apache/lucene/codecs/DocValuesFormat.java b/lucene/src/java/org/apache/lucene/codecs/DocValuesFormat.java index 8017573a9c3..202712a73d8 100644 --- a/lucene/src/java/org/apache/lucene/codecs/DocValuesFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/DocValuesFormat.java @@ -23,10 +23,9 @@ import java.util.Set; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.store.Directory; public abstract class DocValuesFormat { public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException; public abstract PerDocProducer docsProducer(SegmentReadState state) throws IOException; - public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; + public abstract void files(SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/codecs/FieldInfosFormat.java b/lucene/src/java/org/apache/lucene/codecs/FieldInfosFormat.java index 8491e160c69..bedd1457e79 100644 --- a/lucene/src/java/org/apache/lucene/codecs/FieldInfosFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/FieldInfosFormat.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.util.Set; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.store.Directory; /** * @lucene.experimental @@ -29,5 +28,5 @@ import org.apache.lucene.store.Directory; public abstract class FieldInfosFormat { public abstract FieldInfosReader getFieldInfosReader() throws IOException; public abstract FieldInfosWriter getFieldInfosWriter() throws IOException; - public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; + public abstract void files(SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java b/lucene/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java index d58d72199be..6bcd967a3b6 100644 --- a/lucene/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java @@ -387,7 +387,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase { } } - public static void files(Directory dir, SegmentInfo info, String segmentSuffix, Collection files) { + public static void files(SegmentInfo info, String segmentSuffix, Collection files) { files.add(IndexFileNames.segmentFileName(info.name, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java index f1b654153c1..7b0e7e9fd6f 100644 --- a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java @@ -37,5 +37,5 @@ public abstract class LiveDocsFormat { public abstract Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException; /** writes bits to a file */ public abstract void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException; - public abstract void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException; + public abstract void separateFiles(SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java index eeb9473cb2f..e5924473d1f 100644 --- a/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java @@ -23,7 +23,6 @@ import java.util.Set; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.store.Directory; /** * format for normalization factors @@ -31,5 +30,5 @@ import org.apache.lucene.store.Directory; public abstract class NormsFormat { public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException; public abstract PerDocProducer docsProducer(SegmentReadState state) throws IOException; - public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; + public abstract void files(SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/codecs/PostingsBaseFormat.java b/lucene/src/java/org/apache/lucene/codecs/PostingsBaseFormat.java index 921a0fc6149..590fa3a5529 100644 --- a/lucene/src/java/org/apache/lucene/codecs/PostingsBaseFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/PostingsBaseFormat.java @@ -23,7 +23,6 @@ import java.util.Set; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.store.Directory; /** * Provides a {@link PostingsReaderBase} and {@link @@ -51,5 +50,5 @@ public abstract class PostingsBaseFormat { public abstract PostingsWriterBase postingsWriterBase(SegmentWriteState state) throws IOException; - public abstract void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException; + public abstract void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/codecs/PostingsFormat.java b/lucene/src/java/org/apache/lucene/codecs/PostingsFormat.java index db328d935b6..67ea8ba4e18 100644 --- a/lucene/src/java/org/apache/lucene/codecs/PostingsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/PostingsFormat.java @@ -25,8 +25,6 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.util.NamedSPILoader; -import org.apache.lucene.store.Directory; - /** @lucene.experimental */ public abstract class PostingsFormat implements NamedSPILoader.NamedSPI { @@ -59,12 +57,11 @@ public abstract class PostingsFormat implements NamedSPILoader.NamedSPI { /** * Gathers files associated with this segment * - * @param dir the {@link Directory} this segment was written to * @param segmentInfo the {@link SegmentInfo} for this segment * @param segmentSuffix the format's suffix within this segment * @param files the of files to add the codec files to. */ - public abstract void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException; + public abstract void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException; @Override public String toString() { diff --git a/lucene/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java b/lucene/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java index e6ec46de667..1554b814101 100644 --- a/lucene/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java @@ -31,5 +31,5 @@ import org.apache.lucene.store.IOContext; public abstract class StoredFieldsFormat { public abstract StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException; public abstract StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException; - public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; + public abstract void files(SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/codecs/TermVectorsFormat.java b/lucene/src/java/org/apache/lucene/codecs/TermVectorsFormat.java index 4d4801ca7d2..a5449f903b8 100644 --- a/lucene/src/java/org/apache/lucene/codecs/TermVectorsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/TermVectorsFormat.java @@ -31,5 +31,5 @@ import org.apache.lucene.store.IOContext; public abstract class TermVectorsFormat { public abstract TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException; public abstract TermVectorsWriter vectorsWriter(Directory directory, String segment, IOContext context) throws IOException; - public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; + public abstract void files(SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java b/lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java index abfc399bba9..7466f66af20 100644 --- a/lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java @@ -217,7 +217,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase { } } - public static void files(Directory dir, SegmentInfo info, String segmentSuffix, Collection files) { + public static void files(SegmentInfo info, String segmentSuffix, Collection files) { files.add(IndexFileNames.segmentFileName(info.name, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/codecs/appending/AppendingPostingsFormat.java b/lucene/src/java/org/apache/lucene/codecs/appending/AppendingPostingsFormat.java index 4664a9bd6b6..6a160f94bd2 100644 --- a/lucene/src/java/org/apache/lucene/codecs/appending/AppendingPostingsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/appending/AppendingPostingsFormat.java @@ -83,9 +83,8 @@ class AppendingPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) - throws IOException { - Lucene40PostingsReader.files(dir, segmentInfo, segmentSuffix, files); - BlockTreeTermsReader.files(dir, segmentInfo, segmentSuffix, files); + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + Lucene40PostingsReader.files(segmentInfo, segmentSuffix, files); + BlockTreeTermsReader.files(segmentInfo, segmentSuffix, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index 0bb9296b456..e90a4756297 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -88,7 +88,7 @@ public class Lucene3xCodec extends Codec { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException {} + public void files(SegmentInfo info, Set files) throws IOException {} }; @Override @@ -133,20 +133,20 @@ public class Lucene3xCodec extends Codec { // overrides the default implementation in codec.java to handle CFS without CFE @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public void files(SegmentInfo info, Set files) throws IOException { if (info.getUseCompoundFile()) { files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); // NOTE: we don't add the CFE extension: because 3.x format doesn't use it. } else { - super.files(dir, info, files); + super.files(info, files); } } // override the default implementation in codec.java to handle separate norms files, and shared compound docstores @Override - public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { - super.separateFiles(dir, info, files); - normsFormat().separateFiles(dir, info, files); + public void separateFiles(SegmentInfo info, Set files) throws IOException { + super.separateFiles(info, files); + normsFormat().separateFiles(info, files); if (info.getDocStoreOffset() != -1) { // We are sharing doc stores (stored fields, term // vectors) with other segments diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java index c9417e8727b..c633e35cb97 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosFormat.java @@ -24,7 +24,6 @@ import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.store.Directory; /** * Lucene3x ReadOnly FieldInfosFromat implementation @@ -47,7 +46,7 @@ public class Lucene3xFieldInfosFormat extends FieldInfosFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - Lucene3xFieldInfosReader.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + Lucene3xFieldInfosReader.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java index e219217a2e0..43d552897b4 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java @@ -117,7 +117,7 @@ public class Lucene3xFieldInfosReader extends FieldInfosReader { } } - public static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public static void files(SegmentInfo info, Set files) throws IOException { files.add(IndexFileNames.segmentFileName(info.name, "", FIELD_INFOS_EXTENSION)); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java index 7908df042ff..7f2b3baaed6 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFields.java @@ -133,7 +133,7 @@ public class Lucene3xFields extends FieldsProducer { return true; } - static void files(Directory dir, SegmentInfo info, Collection files) throws IOException { + static void files(SegmentInfo info, Collection files) throws IOException { files.add(IndexFileNames.segmentFileName(info.name, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); @@ -144,7 +144,7 @@ public class Lucene3xFields extends FieldsProducer { // file, when it should have been false. So we do the // extra check, here: final String prx = IndexFileNames.segmentFileName(info.name, "", Lucene3xPostingsFormat.PROX_EXTENSION); - if (dir.fileExists(prx)) { + if (info.dir.fileExists(prx)) { files.add(prx); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java index 701e1f4baac..9ac63a991c0 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java @@ -39,12 +39,12 @@ public class Lucene3xNormsFormat extends NormsFormat { @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - Lucene3xNormsProducer.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + Lucene3xNormsProducer.files(info, files); } - public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { - Lucene3xNormsProducer.separateFiles(dir, info, files); + public void separateFiles(SegmentInfo info, Set files) throws IOException { + Lucene3xNormsProducer.separateFiles(info, files); } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java index bcc92ec4da9..5e95e00c88c 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java @@ -193,11 +193,11 @@ class Lucene3xNormsProducer extends PerDocProducer { } - static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + static void files(SegmentInfo info, Set files) throws IOException { // TODO: This is what SI always did... but we can do this cleaner? // like first FI that has norms but doesn't have separate norms? final String normsFileName = IndexFileNames.segmentFileName(info.name, "", NORMS_EXTENSION); - if (dir.fileExists(normsFileName)) { + if (info.dir.fileExists(normsFileName)) { // only needed to do this in 3x - 4x can decide if the norms are present files.add(normsFileName); } @@ -205,7 +205,7 @@ class Lucene3xNormsProducer extends PerDocProducer { /** @deprecated */ @Deprecated - static void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { + static void separateFiles(SegmentInfo info, Set files) throws IOException { Map normGen = info.getNormGen(); if (normGen != null) { for (Entry entry : normGen.entrySet()) { diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xPostingsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xPostingsFormat.java index 520e487c539..2ce5cd47fc6 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xPostingsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xPostingsFormat.java @@ -20,7 +20,6 @@ package org.apache.lucene.codecs.lucene3x; import java.util.Set; import java.io.IOException; -import org.apache.lucene.store.Directory; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; @@ -66,8 +65,8 @@ public class Lucene3xPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo info, String segmentSuffix, Set files) throws IOException { + public void files(SegmentInfo info, String segmentSuffix, Set files) throws IOException { // preflex fields have no segmentSuffix - we ignore it here - Lucene3xFields.files(dir, info, files); + Lucene3xFields.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java index 44caac81389..c0fe31f9c04 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java @@ -48,8 +48,8 @@ public class Lucene3xTermVectorsFormat extends TermVectorsFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - Lucene3xTermVectorsReader.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + Lucene3xTermVectorsReader.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java index 25dd1f27a0d..91544ba3c41 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java @@ -674,7 +674,7 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader { return new Lucene3xTermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, docStoreOffset, format); } - public static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public static void files(SegmentInfo info, Set files) throws IOException { if (info.getHasVectors()) { if (info.getDocStoreOffset() != -1) { assert info.getDocStoreSegment() != null; diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java index 5dc92fb1225..39f7b5804a4 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java @@ -66,14 +66,14 @@ public class Lucene40DocValuesConsumer extends DocValuesWriterBase { } } - public static void files(Directory dir, SegmentInfo segmentInfo, Set files) throws IOException { + public static void files(SegmentInfo segmentInfo, Set files) throws IOException { FieldInfos fieldInfos = segmentInfo.getFieldInfos(); for (FieldInfo fieldInfo : fieldInfos) { if (fieldInfo.hasDocValues()) { files.add(IndexFileNames.segmentFileName(segmentInfo.name, DOC_VALUES_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION)); files.add(IndexFileNames.segmentFileName(segmentInfo.name, DOC_VALUES_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); - assert dir.fileExists(IndexFileNames.segmentFileName(segmentInfo.name, DOC_VALUES_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); - assert dir.fileExists(IndexFileNames.segmentFileName(segmentInfo.name, DOC_VALUES_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION)); + assert segmentInfo.dir.fileExists(IndexFileNames.segmentFileName(segmentInfo.name, DOC_VALUES_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + assert segmentInfo.dir.fileExists(IndexFileNames.segmentFileName(segmentInfo.name, DOC_VALUES_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION)); break; } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java index 90ff33494af..ce563a800b2 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java @@ -26,7 +26,6 @@ import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.store.Directory; public class Lucene40DocValuesFormat extends DocValuesFormat { @@ -41,7 +40,7 @@ public class Lucene40DocValuesFormat extends DocValuesFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - Lucene40DocValuesConsumer.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + Lucene40DocValuesConsumer.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java index 072a0c43f55..1ec82d68bd7 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java @@ -24,7 +24,6 @@ import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.store.Directory; /** * @lucene.experimental @@ -44,7 +43,7 @@ public class Lucene40FieldInfosFormat extends FieldInfosFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - Lucene40FieldInfosReader.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + Lucene40FieldInfosReader.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java index 598efe2b697..878f8002e88 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java @@ -145,7 +145,7 @@ public class Lucene40FieldInfosReader extends FieldInfosReader { } } - public static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public static void files(SegmentInfo info, Set files) throws IOException { files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40FieldInfosWriter.FIELD_INFOS_EXTENSION)); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java index 16a6dc3d220..1023cd3e357 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java @@ -48,7 +48,7 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { } @Override - public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { + public void separateFiles(SegmentInfo info, Set files) throws IOException { if (info.hasDeletions()) { files.add(IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen())); } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java index a4b0c5aacf4..5a54650b49b 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java @@ -30,7 +30,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.store.Directory; /** * Norms Format for the default codec. @@ -50,10 +49,8 @@ public class Lucene40NormsFormat extends NormsFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) - throws IOException { - Lucene40NormsDocValuesConsumer.files(dir, info, files); - + public void files(SegmentInfo info, Set files) throws IOException { + Lucene40NormsDocValuesConsumer.files(info, files); } public static class Lucene40NormsDocValuesProducer extends Lucene40DocValuesProducer { @@ -103,7 +100,7 @@ public class Lucene40NormsFormat extends NormsFormat { return info.getNormType(); } - public static void files(Directory dir, SegmentInfo segmentInfo, Set files) throws IOException { + public static void files(SegmentInfo segmentInfo, Set files) throws IOException { final String normsFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION); FieldInfos fieldInfos = segmentInfo.getFieldInfos(); for (FieldInfo fieldInfo : fieldInfos) { diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java index 41baacefa7c..f9faa267f68 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java @@ -26,7 +26,6 @@ import org.apache.lucene.codecs.PostingsWriterBase; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.Directory; /** * Provides a {@link PostingsReaderBase} and {@link @@ -52,7 +51,7 @@ public final class Lucene40PostingsBaseFormat extends PostingsBaseFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { - Lucene40PostingsReader.files(dir, segmentInfo, segmentSuffix, files); + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + Lucene40PostingsReader.files(segmentInfo, segmentSuffix, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java index a8ae8bfcf22..dd610adb4b4 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java @@ -30,7 +30,6 @@ import org.apache.lucene.codecs.PostingsWriterBase; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.Directory; /** Default codec. * @lucene.experimental */ @@ -106,9 +105,9 @@ public class Lucene40PostingsFormat extends PostingsFormat { static final String PROX_EXTENSION = "prx"; @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { - Lucene40PostingsReader.files(dir, segmentInfo, segmentSuffix, files); - BlockTreeTermsReader.files(dir, segmentInfo, segmentSuffix, files); + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + Lucene40PostingsReader.files(segmentInfo, segmentSuffix, files); + BlockTreeTermsReader.files(segmentInfo, segmentSuffix, files); } @Override diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java index 34aa6d4223d..ad054f0578f 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java @@ -74,7 +74,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { } } - public static void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Collection files) throws IOException { + public static void files(SegmentInfo segmentInfo, String segmentSuffix, Collection files) throws IOException { files.add(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION)); if (segmentInfo.getHasProx()) { files.add(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION)); diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java index 62e5887e67c..a57ddd9ef60 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java @@ -44,7 +44,7 @@ public class Lucene40StoredFieldsFormat extends StoredFieldsFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - Lucene40StoredFieldsReader.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + Lucene40StoredFieldsReader.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java index 4f15c05b55c..ca9991b3afa 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java @@ -286,7 +286,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme // TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap? // only preflex segments refer to these? - public static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public static void files(SegmentInfo info, Set files) throws IOException { if (info.getDocStoreOffset() != -1) { assert info.getDocStoreSegment() != null; if (!info.getDocStoreIsCompoundFile()) { diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java index 9eca7feaf96..8e3f68177e3 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java @@ -41,7 +41,7 @@ public class Lucene40TermVectorsFormat extends TermVectorsFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - Lucene40TermVectorsReader.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + Lucene40TermVectorsReader.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java index 6d8970c8764..0b1bf53134e 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java @@ -699,7 +699,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader { return new Lucene40TermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, format); } - public static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public static void files(SegmentInfo info, Set files) throws IOException { if (info.getHasVectors()) { files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_FIELDS_EXTENSION)); diff --git a/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java index c8c7d30f524..86238bcc47c 100644 --- a/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java @@ -43,7 +43,6 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -826,7 +825,7 @@ public class MemoryPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { files.add(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION)); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java b/lucene/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java index 07bb07d9599..f8e37ef9852 100644 --- a/lucene/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java @@ -306,8 +306,8 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { } @Override - public void files(final Directory dir, final SegmentInfo info, String segmentSuffix, final Set files) - throws IOException { + public void files(final SegmentInfo info, String segmentSuffix, final Set files) throws IOException { + final Directory dir = info.dir; final String mapFileName = IndexFileNames.segmentFileName(info.name, segmentSuffix, PER_FIELD_EXTENSION); files.add(mapFileName); @@ -316,7 +316,7 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat { new VisitPerFieldFile(dir, info.name, segmentSuffix) { @Override protected void visitOneFormat(String segmentSuffix, PostingsFormat format) throws IOException { - format.files(dir, info, segmentSuffix, files); + format.files(info, segmentSuffix, files); } @Override diff --git a/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java b/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java index e0eff4a849a..a690d1701cb 100644 --- a/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java @@ -31,7 +31,6 @@ import org.apache.lucene.codecs.PostingsWriterBase; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.Directory; /** This postings format "inlines" the postings for terms that have * low docFreq. It wraps another postings format, which is used for @@ -115,8 +114,8 @@ public abstract class PulsingPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { - wrappedPostingsBaseFormat.files(dir, segmentInfo, segmentSuffix, files); - BlockTreeTermsReader.files(dir, segmentInfo, segmentSuffix, files); + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + wrappedPostingsBaseFormat.files(segmentInfo, segmentSuffix, files); + BlockTreeTermsReader.files(segmentInfo, segmentSuffix, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java b/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java index 8c9a854b495..682c2c65137 100644 --- a/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java @@ -38,6 +38,7 @@ import org.apache.lucene.util.IOUtils; public class SepDocValuesConsumer extends DocValuesWriterBase { private final Directory directory; private final FieldInfos fieldInfos; + public SepDocValuesConsumer(PerDocWriteState state) throws IOException { super(state); this.directory = state.directory; @@ -49,9 +50,9 @@ public class SepDocValuesConsumer extends DocValuesWriterBase { return directory; } - public static void files(Directory dir, SegmentInfo segmentInfo, + public static void files(SegmentInfo segmentInfo, Set files) throws IOException { - files(dir, segmentInfo.getFieldInfos(), segmentInfo.name, files); + files(segmentInfo.dir, segmentInfo.getFieldInfos(), segmentInfo.name, files); } @SuppressWarnings("fallthrough") diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java index 8c6b5407339..7ff17f29424 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java @@ -24,7 +24,6 @@ import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.store.Directory; /** * plaintext field infos format @@ -47,7 +46,7 @@ public class SimpleTextFieldInfosFormat extends FieldInfosFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - SimpleTextFieldInfosReader.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + SimpleTextFieldInfosReader.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java index 8a7e8dfbd15..21bb7b36458 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java @@ -133,7 +133,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader { return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8); } - public static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public static void files(SegmentInfo info, Set files) throws IOException { files.add(IndexFileNames.segmentFileName(info.name, "", FIELD_INFOS_EXTENSION)); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java index a7d19d3613e..c779c2a132d 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -138,7 +138,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { } @Override - public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { + public void separateFiles(SegmentInfo info, Set files) throws IOException { if (info.hasDeletions()) { files.add(IndexFileNames.fileNameFromGeneration(info.name, LIVEDOCS_EXTENSION, info.getDelGen())); } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java index e53e4b4d06d..4232893d9ec 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java @@ -280,7 +280,7 @@ public class SimpleTextNormsConsumer extends PerDocConsumer { } } - public static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public static void files(SegmentInfo info, Set files) throws IOException { FieldInfos fieldInfos = info.getFieldInfos(); for (FieldInfo fieldInfo : fieldInfos) { diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java index 7c3052d3c94..596dd715ca5 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java @@ -26,7 +26,6 @@ import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.store.Directory; /** * plain-text norms format @@ -47,9 +46,7 @@ public class SimpleTextNormsFormat extends NormsFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) - throws IOException { - SimpleTextNormsConsumer.files(dir, info, files); - + public void files(SegmentInfo info, Set files) throws IOException { + SimpleTextNormsConsumer.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java index 1eaf256f3e5..e28c930fa3a 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java @@ -27,7 +27,6 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.store.Directory; /** For debugging, curiosity, transparency only!! Do not * use this codec in production. @@ -61,7 +60,7 @@ public class SimpleTextPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { files.add(getPostingsFileName(segmentInfo.name, segmentSuffix)); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java index 8ca53b2099d..05cc7e7b6fe 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java @@ -47,7 +47,7 @@ public class SimpleTextStoredFieldsFormat extends StoredFieldsFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - SimpleTextStoredFieldsReader.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + SimpleTextStoredFieldsReader.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java index 67642926f3b..bda81381279 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java @@ -178,7 +178,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader { } } - public static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public static void files(SegmentInfo info, Set files) throws IOException { files.add(IndexFileNames.segmentFileName(info.name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java index 4ba5e26631d..8ec0a861708 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java @@ -47,7 +47,7 @@ public class SimpleTextTermVectorsFormat extends TermVectorsFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - SimpleTextTermVectorsReader.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + SimpleTextTermVectorsReader.files(info, files); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index 6be1aead923..18406f5c6a7 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -201,7 +201,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { } } - public static void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public static void files(SegmentInfo info, Set files) throws IOException { if (info.getHasVectors()) { files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 68c7b3160af..15ec66f4609 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -4095,7 +4095,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { Directory dir, SegmentInfo info) throws IOException { // maybe this is overkill, but codec naming clashes would be bad. Set separateFiles = new HashSet(); - info.getCodec().separateFiles(dir, info, separateFiles); + info.getCodec().separateFiles(info, separateFiles); for (String file : files) { assert !separateFiles.contains(file) : file + " should not go in CFS!"; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index 0ab77246757..cb5508d0c71 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -258,10 +258,12 @@ public final class SegmentInfo implements Cloneable { } } + // nocommit: wrong to call this if (compoundFile) + // wrong to call this at all... nuke it Set codecDocStoreFiles() throws IOException { Set docStoreFiles = new HashSet(); - codec.storedFieldsFormat().files(dir, this, docStoreFiles); - codec.termVectorsFormat().files(dir, this, docStoreFiles); + codec.storedFieldsFormat().files(this, docStoreFiles); + codec.termVectorsFormat().files(this, docStoreFiles); return docStoreFiles; } @@ -472,10 +474,10 @@ public final class SegmentInfo implements Cloneable { } final Set fileSet = new HashSet(); - codec.files(dir, this, fileSet); + codec.files(this, fileSet); // regardless of compound file setting: these files are always in the directory - codec.separateFiles(dir, this, fileSet); + codec.separateFiles(this, fileSet); files = new ArrayList(fileSet); diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java index 9bd0e3dc9ea..0accf47ce60 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java @@ -36,7 +36,6 @@ import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; // TODO: we could make separate base class that can wrapp @@ -137,9 +136,9 @@ public class Lucene40WithOrds extends PostingsFormat { static final String PROX_EXTENSION = "prx"; @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { - Lucene40PostingsReader.files(dir, segmentInfo, segmentSuffix, files); - BlockTermsReader.files(dir, segmentInfo, segmentSuffix, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, segmentSuffix, files); + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + Lucene40PostingsReader.files(segmentInfo, segmentSuffix, files); + BlockTermsReader.files(segmentInfo, segmentSuffix, files); + FixedGapTermsIndexReader.files(segmentInfo, segmentSuffix, files); } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java index 653432756da..ac776b474b4 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java @@ -202,9 +202,9 @@ public class MockFixedIntBlockPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { SepPostingsReader.files(segmentInfo, segmentSuffix, files); - BlockTermsReader.files(dir, segmentInfo, segmentSuffix, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, segmentSuffix, files); + BlockTermsReader.files(segmentInfo, segmentSuffix, files); + FixedGapTermsIndexReader.files(segmentInfo, segmentSuffix, files); } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java index 470c33df37e..c06741c44c5 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java @@ -225,9 +225,9 @@ public class MockVariableIntBlockPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { SepPostingsReader.files(segmentInfo, segmentSuffix, files); - BlockTermsReader.files(dir, segmentInfo, segmentSuffix, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, segmentSuffix, files); + BlockTermsReader.files(segmentInfo, segmentSuffix, files); + FixedGapTermsIndexReader.files(segmentInfo, segmentSuffix, files); } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java index 6780b9d8107..17ebae6de24 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java @@ -413,20 +413,20 @@ public class MockRandomPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { final String seedFileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, SEED_EXT); files.add(seedFileName); SepPostingsReader.files(segmentInfo, segmentSuffix, files); - Lucene40PostingsReader.files(dir, segmentInfo, segmentSuffix, files); - BlockTermsReader.files(dir, segmentInfo, segmentSuffix, files); - BlockTreeTermsReader.files(dir, segmentInfo, segmentSuffix, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, segmentSuffix, files); - VariableGapTermsIndexReader.files(dir, segmentInfo, segmentSuffix, files); + Lucene40PostingsReader.files(segmentInfo, segmentSuffix, files); + BlockTermsReader.files(segmentInfo, segmentSuffix, files); + BlockTreeTermsReader.files(segmentInfo, segmentSuffix, files); + FixedGapTermsIndexReader.files(segmentInfo, segmentSuffix, files); + VariableGapTermsIndexReader.files(segmentInfo, segmentSuffix, files); // hackish! Iterator it = files.iterator(); while(it.hasNext()) { final String file = it.next(); - if (!dir.fileExists(file)) { + if (!segmentInfo.dir.fileExists(file)) { it.remove(); } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/mocksep/MockSepDocValuesFormat.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/mocksep/MockSepDocValuesFormat.java index e3d449f25cf..c6ac9463a70 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/mocksep/MockSepDocValuesFormat.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/mocksep/MockSepDocValuesFormat.java @@ -28,7 +28,6 @@ import org.apache.lucene.codecs.sep.SepDocValuesProducer; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.store.Directory; /** * Separate-file docvalues implementation @@ -48,7 +47,7 @@ public class MockSepDocValuesFormat extends DocValuesFormat { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { - SepDocValuesConsumer.files(dir, info, files); + public void files(SegmentInfo info, Set files) throws IOException { + SepDocValuesConsumer.files(info, files); } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java index ffd45415eea..583efc5ea94 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java @@ -37,7 +37,6 @@ import org.apache.lucene.codecs.sep.SepPostingsWriter; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; /** @@ -130,9 +129,9 @@ public class MockSepPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { SepPostingsReader.files(segmentInfo, segmentSuffix, files); - BlockTermsReader.files(dir, segmentInfo, segmentSuffix, files); - FixedGapTermsIndexReader.files(dir, segmentInfo, segmentSuffix, files); + BlockTermsReader.files(segmentInfo, segmentSuffix, files); + FixedGapTermsIndexReader.files(segmentInfo, segmentSuffix, files); } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java index 4ed6afc5db6..37925b808e6 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java @@ -34,7 +34,6 @@ import org.apache.lucene.codecs.pulsing.PulsingPostingsWriter; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.store.Directory; /** * Pulsing(1, Pulsing(2, Lucene40)) @@ -92,8 +91,8 @@ public class NestedPulsingPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { - Lucene40PostingsReader.files(dir, segmentInfo, segmentSuffix, files); - BlockTreeTermsReader.files(dir, segmentInfo, segmentSuffix, files); + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) throws IOException { + Lucene40PostingsReader.files(segmentInfo, segmentSuffix, files); + BlockTreeTermsReader.files(segmentInfo, segmentSuffix, files); } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java index 5696083bd67..d946ad72c77 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java @@ -32,7 +32,6 @@ import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.StringHelper; @@ -115,7 +114,7 @@ public class PreFlexRWCodec extends Lucene3xCodec { } @Override - public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + public void files(SegmentInfo info, Set files) throws IOException { if (info.getUseCompoundFile() && LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { // because we don't fully emulate 3.x codec, PreFlexRW actually writes 4.x format CFS files. // so we must check segment version here to see if its a "real" 3.x segment or a "fake" @@ -126,7 +125,7 @@ public class PreFlexRWCodec extends Lucene3xCodec { files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); } } else { - super.files(dir, info, files); + super.files(info, files); } } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java index 0d8f9a644ac..824b6b5a7b2 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java @@ -46,7 +46,6 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.Bits; @@ -592,7 +591,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat { } @Override - public void files(Directory dir, SegmentInfo segmentInfo, String segmentSuffix, Set files) { + public void files(SegmentInfo segmentInfo, String segmentSuffix, Set files) { final String idFileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, ID_EXTENSION); files.add(idFileName); } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index a1974304b1f..be50e4e8899 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -1548,7 +1548,7 @@ public class TestIndexWriter extends LuceneTestCase { SegmentInfo s = ((SegmentReader) r).getSegmentInfo(); assertFalse(s.getHasVectors()); Set files = new HashSet(); - s.getCodec().termVectorsFormat().files(dir, s, files); + s.getCodec().termVectorsFormat().files(s, files); assertTrue(files.isEmpty()); } diff --git a/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java b/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java index a708ff96613..5eb0730a41e 100644 --- a/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java @@ -189,7 +189,7 @@ public class TestTermVectorsReader extends LuceneTestCase { SegmentInfo s = ((SegmentReader) r).getSegmentInfo(); assertTrue(s.getHasVectors()); Set files = new HashSet(); - s.getCodec().termVectorsFormat().files(dir, s, files); + s.getCodec().termVectorsFormat().files(s, files); assertFalse(files.isEmpty()); for (String file : files) { assertTrue(dir.fileExists(file)); From 438ec3ce0ba401a9239f7e4a37d68de673c36ba9 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 29 Jan 2012 14:13:11 +0000 Subject: [PATCH 07/20] LUCENE-3728: consistently deprecate all 3.x codec classes git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237295 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java | 2 ++ .../lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java | 2 ++ .../lucene/codecs/lucene3x/Lucene3xNormsFormat.java | 1 - .../lucene/codecs/lucene3x/Lucene3xNormsProducer.java | 8 +++----- .../codecs/lucene3x/Lucene3xSegmentInfosReader.java | 2 ++ .../lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java | 2 ++ .../lucene/codecs/lucene3x/TermInfosReaderIndex.java | 2 ++ 7 files changed, 13 insertions(+), 6 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index e90a4756297..e144b29ac5e 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -43,7 +43,9 @@ import org.apache.lucene.util.MutableBits; /** * Supports the Lucene 3.x index format (readonly) + * @deprecated */ +@Deprecated public class Lucene3xCodec extends Codec { public Lucene3xCodec() { super("Lucene3x"); diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java index 43d552897b4..d59e5f40a87 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xFieldInfosReader.java @@ -34,7 +34,9 @@ import org.apache.lucene.store.IndexInput; /** * @lucene.experimental + * @deprecated */ +@Deprecated public class Lucene3xFieldInfosReader extends FieldInfosReader { /** Extension of field infos */ static final String FIELD_INFOS_EXTENSION = "fnm"; diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java index 9ac63a991c0..1a7fef6ed32 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java @@ -26,7 +26,6 @@ import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.store.Directory; /** * Lucene3x ReadOnly NormsFormat implementation diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java index 5e95e00c88c..1583c7f4df9 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java @@ -43,7 +43,9 @@ import org.apache.lucene.util.StringHelper; /** * Reads Lucene 3.x norms format and exposes it via DocValues API * @lucene.experimental + * @deprecated */ +@Deprecated class Lucene3xNormsProducer extends PerDocProducer { /** norms header placeholder */ @@ -52,9 +54,7 @@ class Lucene3xNormsProducer extends PerDocProducer { /** Extension of norms file */ static final String NORMS_EXTENSION = "nrm"; - /** Extension of separate norms file - * @deprecated */ - @Deprecated + /** Extension of separate norms file */ static final String SEPARATE_NORMS_EXTENSION = "s"; final Map norms = new HashMap(); @@ -203,8 +203,6 @@ class Lucene3xNormsProducer extends PerDocProducer { } } - /** @deprecated */ - @Deprecated static void separateFiles(SegmentInfo info, Set files) throws IOException { Map normGen = info.getNormGen(); if (normGen != null) { diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java index 9f3124c5bb7..7737456d831 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java @@ -37,7 +37,9 @@ import org.apache.lucene.store.IOContext; /** * Lucene 3x implementation of {@link SegmentInfosReader}. * @lucene.experimental + * @deprecated */ +@Deprecated public class Lucene3xSegmentInfosReader extends SegmentInfosReader { @Override diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java index 91544ba3c41..bd5695b81fd 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java @@ -45,6 +45,8 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; +/** @deprecated */ +@Deprecated public class Lucene3xTermVectorsReader extends TermVectorsReader { // NOTE: if you make a new format, it must be larger than diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java index e9d713011cd..62ff23e2f19 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/TermInfosReaderIndex.java @@ -36,7 +36,9 @@ import org.apache.lucene.util.packed.PackedInts; * index segment. Pairs are accessed either by Term or by ordinal position the * set. The Terms and TermInfo are actually serialized and stored into a byte * array and pointers to the position of each are stored in a int array. + * @deprecated */ +@Deprecated class TermInfosReaderIndex { private static final int MAX_PAGE_BITS = 18; // 256 KB block From d9a73590a8bfafb1ee8a6e32643fe6db12016519 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 12:52:29 +0000 Subject: [PATCH 08/20] LUCENE-3728: stop invading the codecs files() for this assert git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237627 13f79535-47bb-0310-9956-ffa450edef68 --- .../index/DocumentsWriterPerThread.java | 7 ++- .../org/apache/lucene/index/SegmentInfo.java | 54 ++----------------- .../apache/lucene/index/TestSegmentInfo.java | 47 ---------------- 3 files changed, 7 insertions(+), 101 deletions(-) delete mode 100644 lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index 30b0c6d937a..c4a02455e11 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -494,14 +494,13 @@ public class DocumentsWriterPerThread { } if (infoStream.isEnabled("DWPT")) { - final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.; - final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.; + final double newSegmentSize = newSegment.sizeInBytes()/1024./1024.; + // nocommit: some of this is confusing since it includes docstores infoStream.message("DWPT", "flushed: segment=" + newSegment + " ramUsed=" + nf.format(startMBUsed) + " MB" + " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + - " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" + " docs/MB=" + nf.format(flushedDocCount / newSegmentSize) + - " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%"); + " new/old=" + nf.format(100.0 * newSegmentSize / startMBUsed) + "%"); } doAfterFlush(); success = true; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index cb5508d0c71..b68d965109f 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -73,8 +73,7 @@ public final class SegmentInfo implements Cloneable { private volatile List files; // cached list of files that this segment uses // in the Directory - private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand) - private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand) + private volatile long sizeInBytes = -1; // total byte size of all files (computed on demand) //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) private int docStoreOffset; // if this segment shares stored fields & vectors, this @@ -212,50 +211,12 @@ public final class SegmentInfo implements Cloneable { * Returns total size in bytes of all of files used by this segment */ public long sizeInBytes() throws IOException { - return sizeInBytes(true); - } - - /** - * Returns total size in bytes of all of files used by this segment (if - * {@code includeDocStores} is true), or the size of all files except the - * store files otherwise. - *

- * NOTE: includeDocStores=false should only be used for debugging. - * Theoretically a codec could combine its files however it wants (after- - * the-fact or something), and this calculation is not particularly - * efficient. - */ - long sizeInBytes(boolean includeDocStores) throws IOException { - // TODO: based on how this is used, can't we just forget about all this docstore crap? - // its really an abstraction violation into the codec - if (includeDocStores) { - if (sizeInBytesWithStore != -1) { - return sizeInBytesWithStore; - } long sum = 0; for (final String fileName : files()) { - // We don't count bytes used by a shared doc store - // against this segment - if (docStoreOffset == -1 || !isDocStoreFile(fileName)) { - sum += dir.fileLength(fileName); - } - } - sizeInBytesWithStore = sum; - return sizeInBytesWithStore; - } else { - if (sizeInBytesNoStore != -1) { - return sizeInBytesNoStore; - } - long sum = 0; - for (final String fileName : files()) { - if (isDocStoreFile(fileName)) { - continue; - } sum += dir.fileLength(fileName); } - sizeInBytesNoStore = sum; - return sizeInBytesNoStore; - } + sizeInBytes = sum; + return sizeInBytes; } // nocommit: wrong to call this if (compoundFile) @@ -267,12 +228,6 @@ public final class SegmentInfo implements Cloneable { return docStoreFiles; } - // TODO: a little messy, but sizeInBytes above that uses this is the real problem. - private boolean isDocStoreFile(String fileName) throws IOException { - Set docStoreFiles = codecDocStoreFiles(); - return fileName.endsWith(IndexFileNames.COMPOUND_FILE_STORE_EXTENSION) || docStoreFiles.contains(fileName); - } - public boolean getHasVectors() throws IOException { return hasVectors == CHECK_FIELDINFO ? getFieldInfos().hasVectors() : hasVectors == YES; } @@ -488,8 +443,7 @@ public final class SegmentInfo implements Cloneable { * files this segment has. */ private void clearFilesCache() { files = null; - sizeInBytesNoStore = -1; - sizeInBytesWithStore = -1; + sizeInBytes = -1; } /** {@inheritDoc} */ diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java b/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java deleted file mode 100644 index 4112dfcb232..00000000000 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java +++ /dev/null @@ -1,47 +0,0 @@ -package org.apache.lucene.index; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.TextField; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -public class TestSegmentInfo extends LuceneTestCase { - - public void testSizeInBytesCache() throws Exception { - Directory dir = newDirectory(); - IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()); - IndexWriter writer = new IndexWriter(dir, conf); - Document doc = new Document(); - doc.add(new Field("a", "value", TextField.TYPE_STORED)); - writer.addDocument(doc); - writer.close(); - - SegmentInfos sis = new SegmentInfos(); - sis.read(dir); - SegmentInfo si = sis.info(0); - long sizeInBytesNoStore = si.sizeInBytes(false); - long sizeInBytesWithStore = si.sizeInBytes(true); - assertTrue("sizeInBytesNoStore=" + sizeInBytesNoStore + " sizeInBytesWithStore=" + sizeInBytesWithStore, sizeInBytesWithStore > sizeInBytesNoStore); - dir.close(); - } - -} From 7487fb30f3e0b3c824f693a2eb51ca6702ebe0eb Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 13:10:47 +0000 Subject: [PATCH 09/20] LUCENE-3728: tidy up copySegmentAsIs/sharedDocStore logic git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237637 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/index/IndexWriter.java | 20 +++++++++---------- .../org/apache/lucene/index/SegmentInfo.java | 9 --------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 15ec66f4609..25efb9eb557 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -2545,23 +2545,21 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // only relevant for segments that share doc store with others, // because the DS might have been copied already, in which case we // just want to update the DS name of this SegmentInfo. - // NOTE: pre-3x segments include a null DSName if they don't share doc - // store. The following code ensures we don't accidentally insert - // 'null' to the map. String dsName = info.getDocStoreSegment(); + assert dsName != null; final String newDsName; - if (dsName != null) { - if (dsNames.containsKey(dsName)) { - newDsName = dsNames.get(dsName); - } else { - dsNames.put(dsName, segName); - newDsName = segName; - } + if (dsNames.containsKey(dsName)) { + newDsName = dsNames.get(dsName); } else { + dsNames.put(dsName, segName); newDsName = segName; } - Set codecDocStoreFiles = info.codecDocStoreFiles(); + // nocommit: remove this + Set codecDocStoreFiles = new HashSet(); + codec.storedFieldsFormat().files(info, codecDocStoreFiles); + codec.termVectorsFormat().files(info, codecDocStoreFiles); + // Copy the segment files for (String file: info.files()) { final String newFileName; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index b68d965109f..8db91f2c443 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -218,15 +218,6 @@ public final class SegmentInfo implements Cloneable { sizeInBytes = sum; return sizeInBytes; } - - // nocommit: wrong to call this if (compoundFile) - // wrong to call this at all... nuke it - Set codecDocStoreFiles() throws IOException { - Set docStoreFiles = new HashSet(); - codec.storedFieldsFormat().files(this, docStoreFiles); - codec.termVectorsFormat().files(this, docStoreFiles); - return docStoreFiles; - } public boolean getHasVectors() throws IOException { return hasVectors == CHECK_FIELDINFO ? getFieldInfos().hasVectors() : hasVectors == YES; From dae9070d1f0c6d4dc640126d46b0e5defbcdebbe Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 13:20:09 +0000 Subject: [PATCH 10/20] LUCENE-3728: remove nocommit git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237641 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/IndexWriter.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 25efb9eb557..52c483b849d 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -2555,10 +2555,12 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { newDsName = segName; } - // nocommit: remove this Set codecDocStoreFiles = new HashSet(); - codec.storedFieldsFormat().files(info, codecDocStoreFiles); - codec.termVectorsFormat().files(info, codecDocStoreFiles); + if (info.getDocStoreOffset() != -1) { + // only violate the codec this way if its preflex + codec.storedFieldsFormat().files(info, codecDocStoreFiles); + codec.termVectorsFormat().files(info, codecDocStoreFiles); + } // Copy the segment files for (String file: info.files()) { From 1e60692bd722ff9be586e5360c7da2fdf28de86f Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 14:36:38 +0000 Subject: [PATCH 11/20] consistent error messages for size mismatch git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237682 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java | 2 +- .../lucene/codecs/lucene40/Lucene40TermVectorsWriter.java | 2 +- .../lucene/codecs/preflexrw/PreFlexRWTermVectorsWriter.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java index 5586adeac53..952a92c0a24 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java @@ -211,7 +211,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter { // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. - throw new RuntimeException("mergeFields produced an invalid result: docCount is " + numDocs + " but fdx file size is " + indexStream.getFilePointer() + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption"); + throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexStream.getFilePointer() + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption"); } @Override diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java index fc002ceebd6..a22e8562b8e 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java @@ -355,7 +355,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter { // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. - throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + numDocs + " but tvx size is " + tvx.getFilePointer() + " file=" + tvx.toString() + "; now aborting this merge to prevent index corruption"); + throw new RuntimeException("tvx size mismatch: mergedDocs is " + numDocs + " but tvx size is " + tvx.getFilePointer() + " file=" + tvx.toString() + "; now aborting this merge to prevent index corruption"); } /** Close all streams. */ diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWTermVectorsWriter.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWTermVectorsWriter.java index 1bb97d32b87..b53fee545ec 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWTermVectorsWriter.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWTermVectorsWriter.java @@ -202,7 +202,7 @@ public final class PreFlexRWTermVectorsWriter extends TermVectorsWriter { // throw an exception to prevent the corruption from // entering the index. See LUCENE-1282 for // details. - throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + numDocs + " but tvx size is " + tvx.getFilePointer() + " file=" + tvx.toString() + "; now aborting this merge to prevent index corruption"); + throw new RuntimeException("tvx size mismatch: mergedDocs is " + numDocs + " but tvx size is " + tvx.getFilePointer() + " file=" + tvx.toString() + "; now aborting this merge to prevent index corruption"); } /** Close all streams. */ From c99756201d332853232a8872c4c0bc5c196ce8d0 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 15:19:32 +0000 Subject: [PATCH 12/20] LUCENE-3728: split stored fields into 3x/4x so more docstore stuff can go in 3x git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237713 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/lucene3x/Lucene3xCodec.java | 10 +- .../lucene3x/Lucene3xSegmentInfosReader.java | 2 +- .../lucene3x/Lucene3xStoredFieldsFormat.java | 51 +++ .../lucene3x/Lucene3xStoredFieldsReader.java | 333 ++++++++++++++++++ .../lucene40/Lucene40StoredFieldsReader.java | 90 +---- .../lucene40/Lucene40StoredFieldsWriter.java | 7 +- .../codecs/preflexrw/PreFlexRWCodec.java | 3 +- .../PreFlexRWStoredFieldsFormat.java | 17 + .../PreFlexRWStoredFieldsWriter.java | 156 ++++++++ 9 files changed, 580 insertions(+), 89 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java create mode 100644 lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java create mode 100644 lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java create mode 100644 lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index e144b29ac5e..f636b2956ff 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -29,10 +29,8 @@ import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfosFormat; import org.apache.lucene.codecs.StoredFieldsFormat; -import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; @@ -53,13 +51,7 @@ public class Lucene3xCodec extends Codec { private final PostingsFormat postingsFormat = new Lucene3xPostingsFormat(); - // TODO: this should really be a different impl - private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat() { - @Override - public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException { - throw new UnsupportedOperationException("this codec can only be used for reading"); - } - }; + private final StoredFieldsFormat fieldsFormat = new Lucene3xStoredFieldsFormat(); private final TermVectorsFormat vectorsFormat = new Lucene3xTermVectorsFormat(); diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java index 7737456d831..53e2f3efb44 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java @@ -66,7 +66,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader { } try { - Lucene40StoredFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment()); + Lucene3xStoredFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment()); } finally { // If we opened the directory, close it if (dir != directory) dir.close(); diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java new file mode 100644 index 00000000000..dae458b95f3 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java @@ -0,0 +1,51 @@ +package org.apache.lucene.codecs.lucene3x; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.StoredFieldsReader; +import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +/** @deprecated */ +@Deprecated +public class Lucene3xStoredFieldsFormat extends StoredFieldsFormat { + + @Override + public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, + FieldInfos fn, IOContext context) throws IOException { + return new Lucene3xStoredFieldsReader(directory, si, fn, context); + } + + @Override + public StoredFieldsWriter fieldsWriter(Directory directory, String segment, + IOContext context) throws IOException { + throw new UnsupportedOperationException("this codec can only be used for reading"); + } + + @Override + public void files(SegmentInfo info, Set files) throws IOException { + Lucene3xStoredFieldsReader.files(info, files); + } +} diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java new file mode 100644 index 00000000000..b3bf47e3f67 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java @@ -0,0 +1,333 @@ +package org.apache.lucene.codecs.lucene3x; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.StoredFieldsReader; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexFormatTooNewException; +import org.apache.lucene.index.IndexFormatTooOldException; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.IOUtils; + +import java.io.Closeable; +import java.nio.charset.Charset; +import java.util.Set; + +/** + * Class responsible for access to stored document fields. + *

+ * It uses <segment>.fdt and <segment>.fdx; files. + * + * @deprecated + */ +@Deprecated +public final class Lucene3xStoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable { + private final static int FORMAT_SIZE = 4; + + /** Extension of stored fields file */ + public static final String FIELDS_EXTENSION = "fdt"; + + /** Extension of stored fields index file */ + public static final String FIELDS_INDEX_EXTENSION = "fdx"; + + // Lucene 3.0: Removal of compressed fields + static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; + + // Lucene 3.2: NumericFields are stored in binary format + static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3; + + // NOTE: if you introduce a new format, make it 1 higher + // than the current one, and always change this if you + // switch to a new format! + public static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS; + + // when removing support for old versions, leave the last supported version here + static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; + + // NOTE: bit 0 is free here! You can steal it! + public static final int FIELD_IS_BINARY = 1 << 1; + + // the old bit 1 << 2 was compressed, is now left out + + private static final int _NUMERIC_BIT_SHIFT = 3; + static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT; + + public static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT; + public static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT; + public static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT; + public static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT; + + private final FieldInfos fieldInfos; + private final IndexInput fieldsStream; + private final IndexInput indexStream; + private int numTotalDocs; + private int size; + private boolean closed; + private final int format; + + // The docID offset where our docs begin in the index + // file. This will be 0 if we have our own private file. + private int docStoreOffset; + + /** Returns a cloned FieldsReader that shares open + * IndexInputs with the original one. It is the caller's + * job not to close the original FieldsReader until all + * clones are called (eg, currently SegmentReader manages + * this logic). */ + @Override + public Lucene3xStoredFieldsReader clone() { + ensureOpen(); + return new Lucene3xStoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone()); + } + + /** Verifies that the code version which wrote the segment is supported. */ + public static void checkCodeVersion(Directory dir, String segment) throws IOException { + final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION); + IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT); + + try { + int format = idxStream.readInt(); + if (format < FORMAT_MINIMUM) + throw new IndexFormatTooOldException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT); + if (format > FORMAT_CURRENT) + throw new IndexFormatTooNewException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT); + } finally { + idxStream.close(); + } + } + + // Used only by clone + private Lucene3xStoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset, + IndexInput fieldsStream, IndexInput indexStream) { + this.fieldInfos = fieldInfos; + this.numTotalDocs = numTotalDocs; + this.size = size; + this.format = format; + this.docStoreOffset = docStoreOffset; + this.fieldsStream = fieldsStream; + this.indexStream = indexStream; + } + + public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { + final String segment = si.getDocStoreSegment(); + final int docStoreOffset = si.getDocStoreOffset(); + final int size = si.docCount; + boolean success = false; + fieldInfos = fn; + try { + fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context); + final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION); + indexStream = d.openInput(indexStreamFN, context); + + format = indexStream.readInt(); + + if (format < FORMAT_MINIMUM) + throw new IndexFormatTooOldException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT); + if (format > FORMAT_CURRENT) + throw new IndexFormatTooNewException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT); + + final long indexSize = indexStream.length() - FORMAT_SIZE; + + if (docStoreOffset != -1) { + // We read only a slice out of this shared fields file + this.docStoreOffset = docStoreOffset; + this.size = size; + + // Verify the file is long enough to hold all of our + // docs + assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset; + } else { + this.docStoreOffset = 0; + this.size = (int) (indexSize >> 3); + // Verify two sources of "maxDoc" agree: + if (this.size != si.docCount) { + throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount); + } + } + numTotalDocs = (int) (indexSize >> 3); + success = true; + } finally { + // With lock-less commits, it's entirely possible (and + // fine) to hit a FileNotFound exception above. In + // this case, we want to explicitly close any subset + // of things that were opened so that we don't have to + // wait for a GC to do so. + if (!success) { + close(); + } + } + } + + /** + * @throws AlreadyClosedException if this FieldsReader is closed + */ + private void ensureOpen() throws AlreadyClosedException { + if (closed) { + throw new AlreadyClosedException("this FieldsReader is closed"); + } + } + + /** + * Closes the underlying {@link org.apache.lucene.store.IndexInput} streams. + * This means that the Fields values will not be accessible. + * + * @throws IOException + */ + public final void close() throws IOException { + if (!closed) { + IOUtils.close(fieldsStream, indexStream); + closed = true; + } + } + + public final int size() { + return size; + } + + private void seekIndex(int docID) throws IOException { + indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L); + } + + public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException { + seekIndex(n); + fieldsStream.seek(indexStream.readLong()); + + final int numFields = fieldsStream.readVInt(); + for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) { + int fieldNumber = fieldsStream.readVInt(); + FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); + + int bits = fieldsStream.readByte() & 0xFF; + assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits); + + switch(visitor.needsField(fieldInfo)) { + case YES: + readField(visitor, fieldInfo, bits); + break; + case NO: + skipField(bits); + break; + case STOP: + return; + } + } + } + + private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException { + final int numeric = bits & FIELD_IS_NUMERIC_MASK; + if (numeric != 0) { + switch(numeric) { + case FIELD_IS_NUMERIC_INT: + visitor.intField(info, fieldsStream.readInt()); + return; + case FIELD_IS_NUMERIC_LONG: + visitor.longField(info, fieldsStream.readLong()); + return; + case FIELD_IS_NUMERIC_FLOAT: + visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt())); + return; + case FIELD_IS_NUMERIC_DOUBLE: + visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong())); + return; + default: + throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric)); + } + } else { + final int length = fieldsStream.readVInt(); + byte bytes[] = new byte[length]; + fieldsStream.readBytes(bytes, 0, length); + if ((bits & FIELD_IS_BINARY) != 0) { + visitor.binaryField(info, bytes, 0, bytes.length); + } else { + visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8)); + } + } + } + + private void skipField(int bits) throws IOException { + final int numeric = bits & FIELD_IS_NUMERIC_MASK; + if (numeric != 0) { + switch(numeric) { + case FIELD_IS_NUMERIC_INT: + case FIELD_IS_NUMERIC_FLOAT: + fieldsStream.readInt(); + return; + case FIELD_IS_NUMERIC_LONG: + case FIELD_IS_NUMERIC_DOUBLE: + fieldsStream.readLong(); + return; + default: + throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric)); + } + } else { + final int length = fieldsStream.readVInt(); + fieldsStream.seek(fieldsStream.getFilePointer() + length); + } + } + + /** Returns the length in bytes of each raw document in a + * contiguous range of length numDocs starting with + * startDocID. Returns the IndexInput (the fieldStream), + * already seeked to the starting point for startDocID.*/ + public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException { + seekIndex(startDocID); + long startOffset = indexStream.readLong(); + long lastOffset = startOffset; + int count = 0; + while (count < numDocs) { + final long offset; + final int docID = docStoreOffset + startDocID + count + 1; + assert docID <= numTotalDocs; + if (docID < numTotalDocs) + offset = indexStream.readLong(); + else + offset = fieldsStream.length(); + lengths[count++] = (int) (offset-lastOffset); + lastOffset = offset; + } + + fieldsStream.seek(startOffset); + + return fieldsStream; + } + + // TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap? + // only preflex segments refer to these? + public static void files(SegmentInfo info, Set files) throws IOException { + if (info.getDocStoreOffset() != -1) { + assert info.getDocStoreSegment() != null; + if (!info.getDocStoreIsCompoundFile()) { + files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION)); + files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION)); + } + } else { + files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION)); + files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION)); + } + } +} diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java index ca9991b3afa..a7457404333 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java @@ -24,8 +24,6 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.IndexFormatTooNewException; -import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.store.AlreadyClosedException; @@ -35,7 +33,6 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.IOUtils; import java.io.Closeable; -import java.nio.charset.Charset; import java.util.Set; /** @@ -54,11 +51,6 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme private int numTotalDocs; private int size; private boolean closed; - private final int format; - - // The docID offset where our docs begin in the index - // file. This will be 0 if we have our own private file. - private int docStoreOffset; /** Returns a cloned FieldsReader that shares open * IndexInputs with the original one. It is the caller's @@ -68,41 +60,20 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme @Override public Lucene40StoredFieldsReader clone() { ensureOpen(); - return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone()); - } - - /** Verifies that the code version which wrote the segment is supported. */ - public static void checkCodeVersion(Directory dir, String segment) throws IOException { - final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); - IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT); - - try { - int format = idxStream.readInt(); - if (format < Lucene40StoredFieldsWriter.FORMAT_MINIMUM) - throw new IndexFormatTooOldException(idxStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT); - if (format > Lucene40StoredFieldsWriter.FORMAT_CURRENT) - throw new IndexFormatTooNewException(idxStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT); - } finally { - idxStream.close(); - } + return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone()); } // Used only by clone - private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset, - IndexInput fieldsStream, IndexInput indexStream) { + private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, IndexInput fieldsStream, IndexInput indexStream) { this.fieldInfos = fieldInfos; this.numTotalDocs = numTotalDocs; this.size = size; - this.format = format; - this.docStoreOffset = docStoreOffset; this.fieldsStream = fieldsStream; this.indexStream = indexStream; } public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { - final String segment = si.getDocStoreSegment(); - final int docStoreOffset = si.getDocStoreOffset(); - final int size = si.docCount; + final String segment = si.name; boolean success = false; fieldInfos = fn; try { @@ -110,30 +81,17 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); indexStream = d.openInput(indexStreamFN, context); - format = indexStream.readInt(); - - if (format < Lucene40StoredFieldsWriter.FORMAT_MINIMUM) - throw new IndexFormatTooOldException(indexStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT); - if (format > Lucene40StoredFieldsWriter.FORMAT_CURRENT) - throw new IndexFormatTooNewException(indexStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT); + // its a 4.0 codec: so its not too-old, its corrupt. + // TODO: change this to CodecUtil.checkHeader + if (Lucene40StoredFieldsWriter.FORMAT_CURRENT != indexStream.readInt()) { + throw new CorruptIndexException("unexpected fdx header: " + indexStream); + } final long indexSize = indexStream.length() - FORMAT_SIZE; - - if (docStoreOffset != -1) { - // We read only a slice out of this shared fields file - this.docStoreOffset = docStoreOffset; - this.size = size; - - // Verify the file is long enough to hold all of our - // docs - assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset; - } else { - this.docStoreOffset = 0; - this.size = (int) (indexSize >> 3); - // Verify two sources of "maxDoc" agree: - if (this.size != si.docCount) { - throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount); - } + this.size = (int) (indexSize >> 3); + // Verify two sources of "maxDoc" agree: + if (this.size != si.docCount) { + throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount); } numTotalDocs = (int) (indexSize >> 3); success = true; @@ -176,7 +134,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme } private void seekIndex(int docID) throws IOException { - indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L); + indexStream.seek(FORMAT_SIZE + docID * 8L); } public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException { @@ -203,8 +161,6 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme } } } - - static final Charset UTF8 = Charset.forName("UTF-8"); private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException { final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK; @@ -232,7 +188,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0) { visitor.binaryField(info, bytes, 0, bytes.length); } else { - visitor.stringField(info, new String(bytes, 0, bytes.length, UTF8)); + visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8)); } } } @@ -269,7 +225,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme int count = 0; while (count < numDocs) { final long offset; - final int docID = docStoreOffset + startDocID + count + 1; + final int docID = startDocID + count + 1; assert docID <= numTotalDocs; if (docID < numTotalDocs) offset = indexStream.readLong(); @@ -283,19 +239,9 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme return fieldsStream; } - - // TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap? - // only preflex segments refer to these? + public static void files(SegmentInfo info, Set files) throws IOException { - if (info.getDocStoreOffset() != -1) { - assert info.getDocStoreSegment() != null; - if (!info.getDocStoreIsCompoundFile()) { - files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION)); - files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION)); - } - } else { - files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION)); - files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION)); - } + files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION)); + files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION)); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java index 952a92c0a24..868864833cf 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java @@ -55,10 +55,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter { // currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT; // currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT; - // Lucene 3.0: Removal of compressed fields - static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; - - // Lucene 3.2: NumericFields are stored in binary format + // (Happens to be the same as for now) Lucene 3.2: NumericFields are stored in binary format static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3; // NOTE: if you introduce a new format, make it 1 higher @@ -67,7 +64,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter { static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS; // when removing support for old versions, leave the last supported version here - static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; + static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_2_NUMERIC_FIELDS; /** Extension of stored fields file */ public static final String FIELDS_EXTENSION = "fdt"; diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java index d946ad72c77..2013d91167c 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java @@ -45,10 +45,9 @@ public class PreFlexRWCodec extends Lucene3xCodec { private final FieldInfosFormat fieldInfos = new PreFlexRWFieldInfosFormat(); private final TermVectorsFormat termVectors = new PreFlexRWTermVectorsFormat(); private final SegmentInfosFormat segmentInfos = new PreFlexRWSegmentInfosFormat(); + private final StoredFieldsFormat storedFields = new PreFlexRWStoredFieldsFormat(); // TODO: this should really be a different impl private final LiveDocsFormat liveDocs = new Lucene40LiveDocsFormat(); - // TODO: this should really be a different impl - private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat(); @Override public PostingsFormat postingsFormat() { diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java new file mode 100644 index 00000000000..76b1a224a46 --- /dev/null +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java @@ -0,0 +1,17 @@ +package org.apache.lucene.codecs.preflexrw; + +import java.io.IOException; + +import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.codecs.lucene3x.Lucene3xStoredFieldsFormat; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +public class PreFlexRWStoredFieldsFormat extends Lucene3xStoredFieldsFormat { + + @Override + public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException { + return new PreFlexRWStoredFieldsWriter(directory, segment, context); + } + +} diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java new file mode 100644 index 00000000000..7888b792c9a --- /dev/null +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java @@ -0,0 +1,156 @@ +package org.apache.lucene.codecs.preflexrw; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.codecs.lucene3x.Lucene3xStoredFieldsReader; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; + +/** @lucene.experimental */ +public final class PreFlexRWStoredFieldsWriter extends StoredFieldsWriter { + private final Directory directory; + private final String segment; + private IndexOutput fieldsStream; + private IndexOutput indexStream; + + public PreFlexRWStoredFieldsWriter(Directory directory, String segment, IOContext context) throws IOException { + assert directory != null; + this.directory = directory; + this.segment = segment; + + boolean success = false; + try { + fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context); + indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context); + + fieldsStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); + indexStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT); + + success = true; + } finally { + if (!success) { + abort(); + } + } + } + + // Writes the contents of buffer into the fields stream + // and adds a new entry for this document into the index + // stream. This assumes the buffer was already written + // in the correct fields format. + public void startDocument(int numStoredFields) throws IOException { + indexStream.writeLong(fieldsStream.getFilePointer()); + fieldsStream.writeVInt(numStoredFields); + } + + public void close() throws IOException { + try { + IOUtils.close(fieldsStream, indexStream); + } finally { + fieldsStream = indexStream = null; + } + } + + public void abort() { + try { + close(); + } catch (IOException ignored) {} + IOUtils.deleteFilesIgnoringExceptions(directory, + IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), + IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); + } + + public void writeField(FieldInfo info, IndexableField field) throws IOException { + fieldsStream.writeVInt(info.number); + int bits = 0; + final BytesRef bytes; + final String string; + // TODO: maybe a field should serialize itself? + // this way we don't bake into indexer all these + // specific encodings for different fields? and apps + // can customize... + + Number number = field.numericValue(); + if (number != null) { + if (number instanceof Byte || number instanceof Short || number instanceof Integer) { + bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT; + } else if (number instanceof Long) { + bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG; + } else if (number instanceof Float) { + bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT; + } else if (number instanceof Double) { + bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE; + } else { + throw new IllegalArgumentException("cannot store numeric type " + number.getClass()); + } + string = null; + bytes = null; + } else { + bytes = field.binaryValue(); + if (bytes != null) { + bits |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY; + string = null; + } else { + string = field.stringValue(); + if (string == null) { + throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue"); + } + } + } + + fieldsStream.writeByte((byte) bits); + + if (bytes != null) { + fieldsStream.writeVInt(bytes.length); + fieldsStream.writeBytes(bytes.bytes, bytes.offset, bytes.length); + } else if (string != null) { + fieldsStream.writeString(field.stringValue()); + } else { + if (number instanceof Byte || number instanceof Short || number instanceof Integer) { + fieldsStream.writeInt(number.intValue()); + } else if (number instanceof Long) { + fieldsStream.writeLong(number.longValue()); + } else if (number instanceof Float) { + fieldsStream.writeInt(Float.floatToIntBits(number.floatValue())); + } else if (number instanceof Double) { + fieldsStream.writeLong(Double.doubleToLongBits(number.doubleValue())); + } else { + assert false; + } + } + } + + @Override + public void finish(int numDocs) throws IOException { + if (4+((long) numDocs)*8 != indexStream.getFilePointer()) + // This is most likely a bug in Sun JRE 1.6.0_04/_05; + // we detect that the bug has struck, here, and + // throw an exception to prevent the corruption from + // entering the index. See LUCENE-1282 for + // details. + throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexStream.getFilePointer() + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption"); + } +} From 668dea8016ec2f69df457aaad36a21453cf4b308 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 15:22:11 +0000 Subject: [PATCH 13/20] LUCENE-3728: remove obselete TODO git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237714 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java index b3bf47e3f67..810a6269c15 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java @@ -316,8 +316,6 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme return fieldsStream; } - // TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap? - // only preflex segments refer to these? public static void files(SegmentInfo info, Set files) throws IOException { if (info.getDocStoreOffset() != -1) { assert info.getDocStoreSegment() != null; From 846338c0dc50952b554ea9f3abfbad886e3849e0 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 16:02:37 +0000 Subject: [PATCH 14/20] LUCENE-3728: push compound shared doc stores reading into 3.x codec (only 3.0 indexes, only a hit if you also have vectors) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237746 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene3x/Lucene3xStoredFieldsReader.java | 15 +++++++-- .../lucene3x/Lucene3xTermVectorsReader.java | 16 +++++++++- .../lucene40/Lucene40SegmentInfosReader.java | 1 - .../lucene/index/SegmentCoreReaders.java | 32 +++---------------- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java index 810a6269c15..9b96f1a457a 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java @@ -29,13 +29,13 @@ import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.IOUtils; import java.io.Closeable; -import java.nio.charset.Charset; import java.util.Set; /** @@ -93,6 +93,10 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme // The docID offset where our docs begin in the index // file. This will be 0 if we have our own private file. private int docStoreOffset; + + // when we are inside a compound share doc store (CFX), + // (lucene 3.0 indexes only), we privately open our own fd. + private final CompoundFileDirectory storeCFSReader; /** Returns a cloned FieldsReader that shares open * IndexInputs with the original one. It is the caller's @@ -131,6 +135,7 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme this.docStoreOffset = docStoreOffset; this.fieldsStream = fieldsStream; this.indexStream = indexStream; + this.storeCFSReader = null; } public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { @@ -140,6 +145,12 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme boolean success = false; fieldInfos = fn; try { + if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) { + d = storeCFSReader = new CompoundFileDirectory(si.dir, + IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); + } else { + storeCFSReader = null; + } fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context); final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION); indexStream = d.openInput(indexStreamFN, context); @@ -200,7 +211,7 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme */ public final void close() throws IOException { if (!closed) { - IOUtils.close(fieldsStream, indexStream); + IOUtils.close(fieldsStream, indexStream, storeCFSReader); closed = true; } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java index bd5695b81fd..0df4bb94b30 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java @@ -38,6 +38,7 @@ import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -90,6 +91,12 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader { // file. This will be 0 if we have our own private file. private int docStoreOffset; + // when we are inside a compound share doc store (CFX), + // (lucene 3.0 indexes only), we privately open our own fd. + // TODO: if we are worried, maybe we could eliminate the + // extra fd somehow when you also have vectors... + private final CompoundFileDirectory storeCFSReader; + private final int format; // used by clone @@ -102,6 +109,7 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader { this.numTotalDocs = numTotalDocs; this.docStoreOffset = docStoreOffset; this.format = format; + this.storeCFSReader = null; } public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) @@ -113,6 +121,12 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader { boolean success = false; try { + if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) { + d = storeCFSReader = new CompoundFileDirectory(si.dir, + IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); + } else { + storeCFSReader = null; + } String idxName = IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION); tvx = d.openInput(idxName, context); format = checkValidFormat(tvx); @@ -170,7 +184,7 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader { } public void close() throws IOException { - IOUtils.close(tvx, tvd, tvf); + IOUtils.close(tvx, tvd, tvf, storeCFSReader); } /** diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosReader.java index 0d335e9bf84..ce581ace00e 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosReader.java @@ -23,7 +23,6 @@ import java.util.Map; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.SegmentInfosReader; -import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.store.ChecksumIndexInput; diff --git a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java index 754acaa6ac8..657981e2b42 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -65,7 +65,6 @@ final class SegmentCoreReaders { final StoredFieldsReader fieldsReaderOrig; final TermVectorsReader termVectorsReaderOrig; final CompoundFileDirectory cfsReader; - final CompoundFileDirectory storeCFSReader; final CloseableThreadLocal fieldsReaderLocal = new CloseableThreadLocal() { @Override @@ -121,34 +120,11 @@ final class SegmentCoreReaders { // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! norms = codec.normsFormat().docsProducer(segmentReadState); perDocProducer = codec.docValuesFormat().docsProducer(segmentReadState); - - final Directory storeDir; - if (si.getDocStoreOffset() != -1) { - if (si.getDocStoreIsCompoundFile()) { - storeCFSReader = new CompoundFileDirectory(dir, - IndexFileNames.segmentFileName(si.getDocStoreSegment(), "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), - context, false); - storeDir = storeCFSReader; - assert storeDir != null; - } else { - storeCFSReader = null; - storeDir = dir; - assert storeDir != null; - } - } else if (si.getUseCompoundFile()) { - storeDir = cfsReader; - storeCFSReader = null; - assert storeDir != null; - } else { - storeDir = dir; - storeCFSReader = null; - assert storeDir != null; - } - - fieldsReaderOrig = si.getCodec().storedFieldsFormat().fieldsReader(storeDir, si, fieldInfos, context); + + fieldsReaderOrig = si.getCodec().storedFieldsFormat().fieldsReader(cfsDir, si, fieldInfos, context); if (si.getHasVectors()) { // open term vector files only as needed - termVectorsReaderOrig = si.getCodec().termVectorsFormat().vectorsReader(storeDir, si, fieldInfos, context); + termVectorsReaderOrig = si.getCodec().termVectorsFormat().vectorsReader(cfsDir, si, fieldInfos, context); } else { termVectorsReaderOrig = null; } @@ -175,7 +151,7 @@ final class SegmentCoreReaders { //System.out.println("core.decRef seg=" + owner.getSegmentInfo() + " rc=" + ref); if (ref.decrementAndGet() == 0) { IOUtils.close(termVectorsLocal, fieldsReaderLocal, fields, perDocProducer, - termVectorsReaderOrig, fieldsReaderOrig, cfsReader, storeCFSReader, norms); + termVectorsReaderOrig, fieldsReaderOrig, cfsReader, norms); notifyCoreClosedListeners(); } } From cdc68d0921d859dbcf5c5125c36d978a933ac742 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 16:38:50 +0000 Subject: [PATCH 15/20] LUCENE-3728: remove unnecessary code from SCR git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237768 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/SegmentCoreReaders.java | 14 +++----------- .../org/apache/lucene/index/SegmentReader.java | 6 +++--- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java index 657981e2b42..8e6815777fa 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -48,16 +48,13 @@ final class SegmentCoreReaders { // SegmentReaders: private final AtomicInteger ref = new AtomicInteger(1); - final String segment; final FieldInfos fieldInfos; final FieldsProducer fields; final PerDocProducer perDocProducer; final PerDocProducer norms; - final Directory dir; - final Directory cfsDir; - final IOContext context; + private final Directory cfsDir; final int termsIndexDivisor; private final SegmentReader owner; @@ -90,22 +87,17 @@ final class SegmentCoreReaders { throw new IllegalArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } - segment = si.name; final Codec codec = si.getCodec(); - this.context = context; - this.dir = dir; boolean success = false; try { - Directory dir0 = dir; if (si.getUseCompoundFile()) { - cfsReader = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); - dir0 = cfsReader; + cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; + cfsDir = dir; } - cfsDir = dir0; si.loadFieldInfos(cfsDir, false); // prevent opening the CFS to load fieldInfos fieldInfos = si.getFieldInfos(); diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 02ad8451076..e97447c524d 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -183,7 +183,7 @@ public final class SegmentReader extends IndexReader { public String toString() { // SegmentInfo.toString takes dir and number of // *pending* deletions; so we reverse compute that here: - return si.toString(core.dir, si.docCount - numDocs - si.getDelCount()); + return si.toString(si.dir, si.docCount - numDocs - si.getDelCount()); } @Override @@ -196,7 +196,7 @@ public final class SegmentReader extends IndexReader { * Return the name of the segment this reader is reading. */ public String getSegmentName() { - return core.segment; + return si.name; } /** @@ -212,7 +212,7 @@ public final class SegmentReader extends IndexReader { // Don't ensureOpen here -- in certain cases, when a // cloned/reopened reader needs to commit, it may call // this method on the closed original reader - return core.dir; + return si.dir; } // This is necessary so that cloned SegmentReaders (which From 3cab173f7c70170d987eb5805e76a4344cffd936 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 16:40:55 +0000 Subject: [PATCH 16/20] LUCENE-3728: doesn't need to be instance git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237771 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/index/SegmentCoreReaders.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java index 8e6815777fa..9c159872ead 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -54,7 +54,6 @@ final class SegmentCoreReaders { final PerDocProducer perDocProducer; final PerDocProducer norms; - private final Directory cfsDir; final int termsIndexDivisor; private final SegmentReader owner; @@ -88,7 +87,8 @@ final class SegmentCoreReaders { } final Codec codec = si.getCodec(); - + final Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. + boolean success = false; try { From 4e620c58da0230c333fe33f4df2e5981dfe3a298 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 17:33:52 +0000 Subject: [PATCH 17/20] LUCENE-3728: remove separateFiles mess, remove CFX from IndexFileNames, preflex codec handles this hair itself git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237820 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/codecs/Codec.java | 10 +---- .../apache/lucene/codecs/LiveDocsFormat.java | 2 +- .../lucene/codecs/lucene3x/Lucene3xCodec.java | 38 ++++++++++--------- .../lucene3x/Lucene3xSegmentInfosReader.java | 4 +- .../lucene3x/Lucene3xStoredFieldsReader.java | 10 +++-- .../lucene3x/Lucene3xTermVectorsReader.java | 10 +++-- .../lucene40/Lucene40LiveDocsFormat.java | 2 +- .../simpletext/SimpleTextLiveDocsFormat.java | 2 +- .../apache/lucene/index/IndexFileNames.java | 4 -- .../org/apache/lucene/index/IndexWriter.java | 23 ++--------- .../org/apache/lucene/index/SegmentInfo.java | 3 -- .../codecs/preflexrw/PreFlexRWCodec.java | 7 ++-- .../lucene/index/TestSegmentMerger.java | 30 --------------- 13 files changed, 48 insertions(+), 97 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/Codec.java b/lucene/src/java/org/apache/lucene/codecs/Codec.java index bc8e626f049..288d6489557 100644 --- a/lucene/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/src/java/org/apache/lucene/codecs/Codec.java @@ -60,14 +60,8 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { docValuesFormat().files(info, files); normsFormat().files(info, files); } - } - - /** Populates files with any filenames that are - * stored outside of CFS for the info segment. - */ - // TODO: can we somehow totally remove this? - public void separateFiles(SegmentInfo info, Set files) throws IOException { - liveDocsFormat().separateFiles(info, files); + // never inside CFS + liveDocsFormat().files(info, files); } /** Encodes/decodes postings */ diff --git a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java index 7b0e7e9fd6f..6c8ee7bb9e7 100644 --- a/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/LiveDocsFormat.java @@ -37,5 +37,5 @@ public abstract class LiveDocsFormat { public abstract Bits readLiveDocs(Directory dir, SegmentInfo info, IOContext context) throws IOException; /** writes bits to a file */ public abstract void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfo info, IOContext context) throws IOException; - public abstract void separateFiles(SegmentInfo info, Set files) throws IOException; + public abstract void files(SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java index f636b2956ff..888c1b8b666 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java @@ -61,6 +61,9 @@ public class Lucene3xCodec extends Codec { private final Lucene3xNormsFormat normsFormat = new Lucene3xNormsFormat(); + /** Extension of compound file for doc store files*/ + static final String COMPOUND_FILE_STORE_EXTENSION = "cfx"; + // TODO: this should really be a different impl private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat() { @Override @@ -125,31 +128,30 @@ public class Lucene3xCodec extends Codec { return liveDocsFormat; } - // overrides the default implementation in codec.java to handle CFS without CFE + // overrides the default implementation in codec.java to handle CFS without CFE, + // shared doc stores, compound doc stores, separate norms, etc @Override public void files(SegmentInfo info, Set files) throws IOException { if (info.getUseCompoundFile()) { files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); - // NOTE: we don't add the CFE extension: because 3.x format doesn't use it. } else { - super.files(info, files); + postingsFormat().files(info, "", files); + storedFieldsFormat().files(info, files); + termVectorsFormat().files(info, files); + fieldInfosFormat().files(info, files); + // TODO: segmentInfosFormat should be allowed to declare additional files + // if it wants, in addition to segments_N + docValuesFormat().files(info, files); + normsFormat().files(info, files); } - } - - // override the default implementation in codec.java to handle separate norms files, and shared compound docstores - @Override - public void separateFiles(SegmentInfo info, Set files) throws IOException { - super.separateFiles(info, files); + // never inside CFS + liveDocsFormat().files(info, files); normsFormat().separateFiles(info, files); + + // shared docstores: these guys check the hair if (info.getDocStoreOffset() != -1) { - // We are sharing doc stores (stored fields, term - // vectors) with other segments - assert info.getDocStoreSegment() != null; - if (info.getDocStoreIsCompoundFile()) { - files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)); - } - // otherwise, if its not a compound docstore, storedfieldsformat/termvectorsformat are each adding their relevant files + storedFieldsFormat().files(info, files); + termVectorsFormat().files(info, files); } - } - + } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java index 53e2f3efb44..624dcddc043 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java @@ -58,7 +58,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader { if (si.getDocStoreIsCompoundFile()) { dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( si.getDocStoreSegment(), "", - IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); + Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } } else if (si.getUseCompoundFile()) { dir = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName( @@ -144,7 +144,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader { if (docStoreOffset != -1) { storesSegment = docStoreSegment; storeIsCompoundFile = docStoreIsCompoundFile; - ext = IndexFileNames.COMPOUND_FILE_STORE_EXTENSION; + ext = Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION; } else { storesSegment = name; storeIsCompoundFile = isCompoundFile; diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java index 9b96f1a457a..c3e5026310b 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java @@ -147,7 +147,7 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme try { if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) { d = storeCFSReader = new CompoundFileDirectory(si.dir, - IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); + IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { storeCFSReader = null; } @@ -327,14 +327,18 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme return fieldsStream; } + // note: if there are shared docstores, we are also called by Lucene3xCodec even in + // the CFS case. so logic here must handle this. public static void files(SegmentInfo info, Set files) throws IOException { if (info.getDocStoreOffset() != -1) { assert info.getDocStoreSegment() != null; - if (!info.getDocStoreIsCompoundFile()) { + if (info.getDocStoreIsCompoundFile()) { + files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); + } else { files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION)); } - } else { + } else if (!info.getUseCompoundFile()) { files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION)); } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java index 0df4bb94b30..b60d36cf4d3 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsReader.java @@ -123,7 +123,7 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader { try { if (docStoreOffset != -1 && si.getDocStoreIsCompoundFile()) { d = storeCFSReader = new CompoundFileDirectory(si.dir, - IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), context, false); + IndexFileNames.segmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { storeCFSReader = null; } @@ -690,16 +690,20 @@ public class Lucene3xTermVectorsReader extends TermVectorsReader { return new Lucene3xTermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, docStoreOffset, format); } + // note: if there are shared docstores, we are also called by Lucene3xCodec even in + // the CFS case. so logic here must handle this. public static void files(SegmentInfo info, Set files) throws IOException { if (info.getHasVectors()) { if (info.getDocStoreOffset() != -1) { assert info.getDocStoreSegment() != null; - if (!info.getDocStoreIsCompoundFile()) { + if (info.getDocStoreIsCompoundFile()) { + files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); + } else { files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", VECTORS_DOCUMENTS_EXTENSION)); } - } else { + } else if (!info.getUseCompoundFile()) { files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_INDEX_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_FIELDS_EXTENSION)); files.add(IndexFileNames.segmentFileName(info.name, "", VECTORS_DOCUMENTS_EXTENSION)); diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java index 1023cd3e357..89710ea9a29 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java @@ -48,7 +48,7 @@ public class Lucene40LiveDocsFormat extends LiveDocsFormat { } @Override - public void separateFiles(SegmentInfo info, Set files) throws IOException { + public void files(SegmentInfo info, Set files) throws IOException { if (info.hasDeletions()) { files.add(IndexFileNames.fileNameFromGeneration(info.name, DELETES_EXTENSION, info.getDelGen())); } diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java index c779c2a132d..fab1fa381f7 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java @@ -138,7 +138,7 @@ public class SimpleTextLiveDocsFormat extends LiveDocsFormat { } @Override - public void separateFiles(SegmentInfo info, Set files) throws IOException { + public void files(SegmentInfo info, Set files) throws IOException { if (info.hasDeletions()) { files.add(IndexFileNames.fileNameFromGeneration(info.name, LIVEDOCS_EXTENSION, info.getDelGen())); } diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java index 1bcb493b1d8..b7b5044e3cd 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java +++ b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java @@ -54,9 +54,6 @@ public final class IndexFileNames { /** Extension of compound file entries */ public static final String COMPOUND_FILE_ENTRIES_EXTENSION = "cfe"; - /** Extension of compound file for doc store files*/ - public static final String COMPOUND_FILE_STORE_EXTENSION = "cfx"; - /** * This array contains all filename extensions used by * Lucene's index files, with one exception, namely the @@ -68,7 +65,6 @@ public final class IndexFileNames { COMPOUND_FILE_EXTENSION, COMPOUND_FILE_ENTRIES_EXTENSION, GEN_EXTENSION, - COMPOUND_FILE_STORE_EXTENSION, }; /** diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 52c483b849d..8be30d9fcfe 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -2565,7 +2565,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // Copy the segment files for (String file: info.files()) { final String newFileName; - if (codecDocStoreFiles.contains(file) || file.endsWith(IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)) { + if (codecDocStoreFiles.contains(file)) { newFileName = newDsName + IndexFileNames.stripSegmentName(file); if (dsFilesCopied.contains(newFileName)) { continue; @@ -4070,12 +4070,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { */ static final Collection createCompoundFile(Directory directory, String fileName, CheckAbort checkAbort, final SegmentInfo info, IOContext context) throws IOException { - + assert info.getDocStoreOffset() == -1; // Now merge all added files Collection files = info.files(); CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); try { - assert assertNoSeparateFiles(files, directory, info); + // nocommit: we could make a crappy regex like before... + // assert assertNoSeparateFiles(files, directory, info); for (String file : files) { directory.copy(cfsDir, file, file, context); checkAbort.work(directory.fileLength(file)); @@ -4086,20 +4087,4 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { return files; } - - - /** - * used only by assert: checks that filenames about to be put in cfs belong. - */ - private static boolean assertNoSeparateFiles(Collection files, - Directory dir, SegmentInfo info) throws IOException { - // maybe this is overkill, but codec naming clashes would be bad. - Set separateFiles = new HashSet(); - info.getCodec().separateFiles(info, separateFiles); - - for (String file : files) { - assert !separateFiles.contains(file) : file + " should not go in CFS!"; - } - return true; - } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index 8db91f2c443..9acfba2667d 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -421,9 +421,6 @@ public final class SegmentInfo implements Cloneable { final Set fileSet = new HashSet(); codec.files(this, fileSet); - - // regardless of compound file setting: these files are always in the directory - codec.separateFiles(this, fileSet); files = new ArrayList(fileSet); diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java index 2013d91167c..5b7029c5fc0 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java @@ -117,14 +117,13 @@ public class PreFlexRWCodec extends Lucene3xCodec { if (info.getUseCompoundFile() && LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) { // because we don't fully emulate 3.x codec, PreFlexRW actually writes 4.x format CFS files. // so we must check segment version here to see if its a "real" 3.x segment or a "fake" - // one that we wrote with a 4.x-format CFS+CFE - files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); + // one that we wrote with a 4.x-format CFS+CFE, in this case we must add the .CFE String version = info.getVersion(); if (version != null && StringHelper.getVersionComparator().compare("4.0", version) <= 0) { files.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); } - } else { - super.files(info, files); } + + super.files(info, files); } } diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java index e5b6eb84114..d9445f935fa 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -135,34 +135,4 @@ public class TestSegmentMerger extends LuceneTestCase { TestSegmentReader.checkNorms(mergedReader); mergedReader.close(); } - - // LUCENE-3143 - public void testInvalidFilesToCreateCompound() throws Exception { - Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); - IndexWriter w = new IndexWriter(dir, iwc); - - // Create an index w/ .del file - w.addDocument(new Document()); - Document doc = new Document(); - doc.add(new TextField("c", "test")); - w.addDocument(doc); - w.commit(); - w.deleteDocuments(new Term("c", "test")); - w.close(); - - // Assert that SM fails if .del exists - SegmentMerger sm = new SegmentMerger(InfoStream.getDefault(), dir, 1, "a", MergeState.CheckAbort.NONE, null, null, Codec.getDefault(), newIOContext(random)); - boolean doFail = false; - try { - IndexWriter.createCompoundFile(dir, "b1", MergeState.CheckAbort.NONE, w.segmentInfos.info(0), newIOContext(random)); - doFail = true; // should never get here - } catch (AssertionError e) { - // expected - } - assertFalse("should not have been able to create a .cfs with .del and .s* files", doFail); - - dir.close(); - } - } From 8421cdfbb9c2f2a403ea37ff6b8fdb9aff4da0f0 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 17:37:17 +0000 Subject: [PATCH 18/20] LUCENE-3728: remove outdated nocommit, not relevant since we dont repack CFS on addindexes git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237822 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/index/IndexWriter.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 8be30d9fcfe..9eecc06035a 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -4075,8 +4075,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { Collection files = info.files(); CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); try { - // nocommit: we could make a crappy regex like before... - // assert assertNoSeparateFiles(files, directory, info); for (String file : files) { directory.copy(cfsDir, file, file, context); checkAbort.work(directory.fileLength(file)); From c024c2e34176ed962199959b79c45203de4836e8 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 17:50:08 +0000 Subject: [PATCH 19/20] LUCENE-3728: add note to this infostream that it includes docstores git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237842 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/lucene/index/DocumentsWriterPerThread.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index c4a02455e11..9ed5d23349d 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -495,10 +495,9 @@ public class DocumentsWriterPerThread { if (infoStream.isEnabled("DWPT")) { final double newSegmentSize = newSegment.sizeInBytes()/1024./1024.; - // nocommit: some of this is confusing since it includes docstores infoStream.message("DWPT", "flushed: segment=" + newSegment + " ramUsed=" + nf.format(startMBUsed) + " MB" + - " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + + " newFlushedSize(includes docstores)=" + nf.format(newSegmentSize) + " MB" + " docs/MB=" + nf.format(flushedDocCount / newSegmentSize) + " new/old=" + nf.format(100.0 * newSegmentSize / startMBUsed) + "%"); } From ea1299a074f8b1bb7b1fc155b921463376613bbb Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 30 Jan 2012 18:16:54 +0000 Subject: [PATCH 20/20] LUCENE-3728: remove dead code git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237885 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene3x/Lucene3xStoredFieldsReader.java | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java index c3e5026310b..9d3d33e19af 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java @@ -216,10 +216,6 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme } } - public final int size() { - return size; - } - private void seekIndex(int docID) throws IOException { indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L); } @@ -301,32 +297,6 @@ public final class Lucene3xStoredFieldsReader extends StoredFieldsReader impleme } } - /** Returns the length in bytes of each raw document in a - * contiguous range of length numDocs starting with - * startDocID. Returns the IndexInput (the fieldStream), - * already seeked to the starting point for startDocID.*/ - public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException { - seekIndex(startDocID); - long startOffset = indexStream.readLong(); - long lastOffset = startOffset; - int count = 0; - while (count < numDocs) { - final long offset; - final int docID = docStoreOffset + startDocID + count + 1; - assert docID <= numTotalDocs; - if (docID < numTotalDocs) - offset = indexStream.readLong(); - else - offset = fieldsStream.length(); - lengths[count++] = (int) (offset-lastOffset); - lastOffset = offset; - } - - fieldsStream.seek(startOffset); - - return fieldsStream; - } - // note: if there are shared docstores, we are also called by Lucene3xCodec even in // the CFS case. so logic here must handle this. public static void files(SegmentInfo info, Set files) throws IOException {