From b6f7024afbd46cb3cbf82eddadcb1298929a2dde Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 12 Oct 2012 00:52:57 +0000 Subject: [PATCH 01/20] Make branch to switch default index format git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4446@1397400 13f79535-47bb-0310-9956-ffa450edef68 From 54ff47eff00694cb4b8df366ee0188c2bba678af Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 12 Oct 2012 02:00:19 +0000 Subject: [PATCH 02/20] LUCENE-4446: quick stab at a start... I think core tests pass but all else is TODO/untested git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4446@1397416 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/codecs/block/package.html | 25 -- ...rmat.java => Pulsing41PostingsFormat.java} | 16 +- .../org.apache.lucene.codecs.PostingsFormat | 3 +- .../TestFixedGapPostingsFormat.java | 4 +- .../codecs/bloom/TestBloomPostingsFormat.java | 2 +- .../codecs/pulsing/Test10KPulsings.java | 5 +- .../pulsing/TestPulsingPostingsFormat.java | 6 +- .../codecs/pulsing/TestPulsingReuse.java | 2 +- .../lucene/codecs/lucene41}/ForUtil.java | 4 +- .../lucene/codecs/lucene41/Lucene41Codec.java | 122 ++++++ .../lucene41/Lucene41PostingsBaseFormat.java | 51 +++ .../lucene41/Lucene41PostingsFormat.java} | 24 +- .../lucene41/Lucene41PostingsReader.java} | 78 ++-- .../lucene41/Lucene41PostingsWriter.java} | 38 +- .../codecs/lucene41/Lucene41SkipReader.java} | 16 +- .../codecs/lucene41/Lucene41SkipWriter.java} | 8 +- .../lucene/codecs/lucene41/package.html | 396 ++++++++++++++++++ .../services/org.apache.lucene.codecs.Codec | 1 + .../org.apache.lucene.codecs.PostingsFormat | 1 + .../org/apache/lucene/TestExternalCodecs.java | 8 +- .../lucene41}/TestBlockPostingsFormat.java | 13 +- .../lucene41}/TestBlockPostingsFormat2.java | 15 +- .../lucene41}/TestBlockPostingsFormat3.java | 10 +- .../lucene/codecs/lucene41}/TestForUtil.java | 8 +- .../perfield/TestPerFieldPostingsFormat2.java | 30 +- .../apache/lucene/index/TestAddIndexes.java | 12 +- .../TestAllFilesHaveCodecHeader.java | 6 +- .../lucene/index/TestDirectoryReader.java | 6 +- .../apache/lucene/index/TestDocTermOrds.java | 4 +- .../lucene/index/TestDuelingCodecs.java | 2 +- .../org/apache/lucene/index/TestFlex.java | 4 +- .../lucene/index/TestMultiLevelSkipList.java | 4 +- .../lucene/index/TestSegmentTermEnum.java | 4 +- .../lucene/util/TestNamedSPILoader.java | 6 +- ...=> TestBloomFilteredLucene41Postings.java} | 12 +- .../Lucene41WithOrds.java} | 20 +- .../package.html | 0 .../org/apache/lucene/index/RandomCodec.java | 24 +- .../util/TestRuleSetupAndRestoreClassEnv.java | 17 +- .../org.apache.lucene.codecs.PostingsFormat | 4 +- 40 files changed, 771 insertions(+), 240 deletions(-) delete mode 100644 lucene/codecs/src/java/org/apache/lucene/codecs/block/package.html rename lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/{Pulsing40PostingsFormat.java => Pulsing41PostingsFormat.java} (75%) rename lucene/{codecs/src/java/org/apache/lucene/codecs/block => core/src/java/org/apache/lucene/codecs/lucene41}/ForUtil.java (98%) create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsBaseFormat.java rename lucene/{codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java => core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java} (96%) rename lucene/{codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java => core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java} (95%) rename lucene/{codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java => core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java} (93%) rename lucene/{codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java => core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java} (91%) rename lucene/{codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java => core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java} (94%) create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html rename lucene/{codecs/src/test/org/apache/lucene/codecs/block => core/src/test/org/apache/lucene/codecs/lucene41}/TestBlockPostingsFormat.java (71%) rename lucene/{codecs/src/test/org/apache/lucene/codecs/block => core/src/test/org/apache/lucene/codecs/lucene41}/TestBlockPostingsFormat2.java (91%) rename lucene/{codecs/src/test/org/apache/lucene/codecs/block => core/src/test/org/apache/lucene/codecs/lucene41}/TestBlockPostingsFormat3.java (98%) rename lucene/{codecs/src/test/org/apache/lucene/codecs/block => core/src/test/org/apache/lucene/codecs/lucene41}/TestForUtil.java (92%) rename lucene/core/src/test/org/apache/lucene/{codecs/lucene40 => index}/TestAllFilesHaveCodecHeader.java (95%) rename lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/{TestBloomFilteredLucene40Postings.java => TestBloomFilteredLucene41Postings.java} (87%) rename lucene/test-framework/src/java/org/apache/lucene/codecs/{lucene40ords/Lucene40WithOrds.java => lucene41ords/Lucene41WithOrds.java} (89%) rename lucene/test-framework/src/java/org/apache/lucene/codecs/{lucene40ords => lucene41ords}/package.html (100%) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/package.html b/lucene/codecs/src/java/org/apache/lucene/codecs/block/package.html deleted file mode 100644 index c4fe9c6a8ea..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/package.html +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - -BlockPostingsFormat file format. - - diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing40PostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java similarity index 75% rename from lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing40PostingsFormat.java rename to lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java index faf8df2bbf8..7fd7fb0504a 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing40PostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java @@ -18,28 +18,28 @@ package org.apache.lucene.codecs.pulsing; */ import org.apache.lucene.codecs.BlockTreeTermsWriter; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsBaseFormat; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; // javadocs +import org.apache.lucene.codecs.lucene41.Lucene41PostingsBaseFormat; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs /** - * Concrete pulsing implementation over {@link Lucene40PostingsFormat}. + * Concrete pulsing implementation over {@link Lucene41PostingsFormat}. * * @lucene.experimental */ -public class Pulsing40PostingsFormat extends PulsingPostingsFormat { +public class Pulsing41PostingsFormat extends PulsingPostingsFormat { /** Inlines docFreq=1 terms, otherwise uses the normal "Lucene40" format. */ - public Pulsing40PostingsFormat() { + public Pulsing41PostingsFormat() { this(1); } /** Inlines docFreq=freqCutoff terms, otherwise uses the normal "Lucene40" format. */ - public Pulsing40PostingsFormat(int freqCutoff) { + public Pulsing41PostingsFormat(int freqCutoff) { this(freqCutoff, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE); } /** Inlines docFreq=freqCutoff terms, otherwise uses the normal "Lucene40" format. */ - public Pulsing40PostingsFormat(int freqCutoff, int minBlockSize, int maxBlockSize) { - super("Pulsing40", new Lucene40PostingsBaseFormat(), freqCutoff, minBlockSize, maxBlockSize); + public Pulsing41PostingsFormat(int freqCutoff, int minBlockSize, int maxBlockSize) { + super("Pulsing41", new Lucene41PostingsBaseFormat(), freqCutoff, minBlockSize, maxBlockSize); } } diff --git a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 72b05c5e74e..22062983d91 100644 --- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -13,9 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat +org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat org.apache.lucene.codecs.memory.MemoryPostingsFormat org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat org.apache.lucene.codecs.memory.DirectPostingsFormat -org.apache.lucene.codecs.block.BlockPostingsFormat diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java index d45b6828d1a..055bc21e2fb 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java @@ -20,7 +20,7 @@ package org.apache.lucene.codecs.blockterms; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene40.Lucene40Codec; -import org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds; +import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds; import org.apache.lucene.index.BasePostingsFormatTestCase; /** @@ -29,7 +29,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; // TODO: we should add an instantiation for VarGap too to TestFramework, and a test in this package // TODO: ensure both of these are also in rotation in RandomCodec public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase { - private final PostingsFormat postings = new Lucene40WithOrds(); + private final PostingsFormat postings = new Lucene41WithOrds(); private final Codec codec = new Lucene40Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java index 3bd9a90935c..143163feead 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java @@ -26,7 +26,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; * Basic tests for BloomPostingsFormat */ public class TestBloomPostingsFormat extends BasePostingsFormatTestCase { - private final PostingsFormat postings = new TestBloomFilteredLucene40Postings(); + private final PostingsFormat postings = new TestBloomFilteredLucene41Postings(); private final Codec codec = new Lucene40Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java index 3e47dc549cc..6e53a631ea3 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java @@ -25,7 +25,6 @@ import java.util.Locale; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -52,7 +51,7 @@ import org.apache.lucene.util._TestUtil; public class Test10KPulsings extends LuceneTestCase { public void test10kPulsed() throws Exception { // we always run this test with pulsing codec. - Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing40PostingsFormat(1)); + Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1)); File f = _TestUtil.getTempDir("10kpulsed"); BaseDirectoryWrapper dir = newFSDirectory(f); @@ -103,7 +102,7 @@ public class Test10KPulsings extends LuceneTestCase { public void test10kNotPulsed() throws Exception { // we always run this test with pulsing codec. int freqCutoff = _TestUtil.nextInt(random(), 1, 10); - Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing40PostingsFormat(freqCutoff)); + Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(freqCutoff)); File f = _TestUtil.getTempDir("10knotpulsed"); BaseDirectoryWrapper dir = newFSDirectory(f); diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingPostingsFormat.java index 3156323df61..75271d33d17 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.pulsing; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.index.BasePostingsFormatTestCase; /** @@ -27,8 +27,8 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; */ public class TestPulsingPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize cutoff - private final PostingsFormat postings = new Pulsing40PostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final PostingsFormat postings = new Pulsing41PostingsFormat(); + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java index 488fca34baf..cfa520aef02 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java @@ -45,7 +45,7 @@ public class TestPulsingReuse extends LuceneTestCase { // TODO: this is a basic test. this thing is complicated, add more public void testSophisticatedReuse() throws Exception { // we always run this test with pulsing codec. - Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing40PostingsFormat(1)); + Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1)); Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp)); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/ForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java similarity index 98% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/ForUtil.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java index fc52520f551..88f70a249c1 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/ForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -28,7 +28,7 @@ import org.apache.lucene.util.packed.PackedInts.Decoder; import org.apache.lucene.util.packed.PackedInts.FormatAndBits; import org.apache.lucene.util.packed.PackedInts; -import static org.apache.lucene.codecs.block.BlockPostingsFormat.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE; /** * Encode all values in normal area with fixed bit width, diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java new file mode 100644 index 00000000000..48219582b75 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java @@ -0,0 +1,122 @@ +package org.apache.lucene.codecs.lucene41; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.TermVectorsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat; +import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat; +import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat; +import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; + +/** + * Implements the Lucene 4.1 index format, with configurable per-field postings formats. + *

+ * If you want to reuse functionality of this codec in another codec, extend + * {@link FilterCodec}. + * + * @see org.apache.lucene.codecs.lucene41 package documentation for file format details. + * @lucene.experimental + */ +// NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever +// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader +// (it writes a minor version, etc). +public class Lucene41Codec extends Codec { + private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat(); + private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat(); + private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat(); + private final DocValuesFormat docValuesFormat = new Lucene40DocValuesFormat(); + private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat(); + private final NormsFormat normsFormat = new Lucene40NormsFormat(); + private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat(); + + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return Lucene41Codec.this.getPostingsFormatForField(field); + } + }; + + /** Sole constructor. */ + public Lucene41Codec() { + super("Lucene41"); + } + + @Override + public final StoredFieldsFormat storedFieldsFormat() { + return fieldsFormat; + } + + @Override + public final TermVectorsFormat termVectorsFormat() { + return vectorsFormat; + } + + @Override + public final DocValuesFormat docValuesFormat() { + return docValuesFormat; + } + + @Override + public final PostingsFormat postingsFormat() { + return postingsFormat; + } + + @Override + public final FieldInfosFormat fieldInfosFormat() { + return fieldInfosFormat; + } + + @Override + public final SegmentInfoFormat segmentInfoFormat() { + return infosFormat; + } + + @Override + public final NormsFormat normsFormat() { + return normsFormat; + } + + @Override + public final LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } + + /** Returns the postings format that should be used for writing + * new segments of field. + * + * The default implementation always returns "Lucene41" + */ + public PostingsFormat getPostingsFormatForField(String field) { + return defaultFormat; + } + + private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41"); +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsBaseFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsBaseFormat.java new file mode 100644 index 00000000000..0360c0d2709 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsBaseFormat.java @@ -0,0 +1,51 @@ +package org.apache.lucene.codecs.lucene41; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.PostingsBaseFormat; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +/** + * Provides a {@link PostingsReaderBase} and {@link + * PostingsWriterBase}. + * + * @lucene.experimental */ + +// TODO: should these also be named / looked up via SPI? +public final class Lucene41PostingsBaseFormat extends PostingsBaseFormat { + + /** Sole constructor. */ + public Lucene41PostingsBaseFormat() { + super("Lucene41"); + } + + @Override + public PostingsReaderBase postingsReaderBase(SegmentReadState state) throws IOException { + return new Lucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); + } + + @Override + public PostingsWriterBase postingsWriterBase(SegmentWriteState state) throws IOException { + return new Lucene41PostingsWriter(state); + } +} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java similarity index 96% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java index 73ce8df62c4..d1c21ed2846 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* @@ -38,7 +38,7 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.packed.PackedInts; /** - * Block postings format, which encodes postings in packed integer blocks + * Lucene 4.1 postings format, which encodes postings in packed integer blocks * for fast decode. * *

NOTE: this format is still experimental and @@ -58,7 +58,7 @@ import org.apache.lucene.util.packed.PackedInts; * *

  • * Block structure: - *

    When the postings are long enough, BlockPostingsFormat will try to encode most integer data + *

    When the postings are long enough, Lucene41PostingsFormat will try to encode most integer data * as a packed block.

    *

    Take a term with 259 documents as an example, the first 256 document ids are encoded as two packed * blocks, while the remaining 3 are encoded as one VInt block.

    @@ -161,7 +161,7 @@ import org.apache.lucene.util.packed.PackedInts; *
  • SkipFPDelta determines the position of this term's SkipData within the .doc * file. In particular, it is the length of the TermFreq data. * SkipDelta is only stored if DocFreq is not smaller than SkipMinimum - * (i.e. 8 in BlockPostingsFormat).
  • + * (i.e. 8 in Lucene41PostingsFormat). * * * @@ -238,10 +238,10 @@ import org.apache.lucene.util.packed.PackedInts; * We use this trick since the definition of skip entry is a little different from base interface. * In {@link MultiLevelSkipListWriter}, skip data is assumed to be saved for * skipIntervalth, 2*skipIntervalth ... posting in the list. However, - * in BlockPostingsFormat, the skip data is saved for skipInterval+1th, + * in Lucene41PostingsFormat, the skip data is saved for skipInterval+1th, * 2*skipInterval+1th ... posting (skipInterval==PackedBlockSize in this case). * When DocFreq is multiple of PackedBlockSize, MultiLevelSkipListWriter will expect one - * more skip data than BlockSkipWriter. + * more skip data than Lucene41SkipWriter. *
  • SkipDatum is the metadata of one skip entry. * For the first block (no matter packed or VInt), it is omitted.
  • *
  • DocSkip records the document number of every PackedBlockSizeth document number in @@ -351,7 +351,7 @@ import org.apache.lucene.util.packed.PackedInts; * @lucene.experimental */ -public final class BlockPostingsFormat extends PostingsFormat { +public final class Lucene41PostingsFormat extends PostingsFormat { /** * Filename extension for document number, frequencies, and skip data. * See chapter: Frequencies and Skip Data @@ -380,12 +380,12 @@ public final class BlockPostingsFormat extends PostingsFormat { // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding public final static int BLOCK_SIZE = 128; - public BlockPostingsFormat() { + public Lucene41PostingsFormat() { this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE); } - public BlockPostingsFormat(int minTermBlockSize, int maxTermBlockSize) { - super("Block"); + public Lucene41PostingsFormat(int minTermBlockSize, int maxTermBlockSize) { + super("Lucene41"); this.minTermBlockSize = minTermBlockSize; assert minTermBlockSize > 1; this.maxTermBlockSize = maxTermBlockSize; @@ -399,7 +399,7 @@ public final class BlockPostingsFormat extends PostingsFormat { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new BlockPostingsWriter(state); + PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state); boolean success = false; try { @@ -418,7 +418,7 @@ public final class BlockPostingsFormat extends PostingsFormat { @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - PostingsReaderBase postingsReader = new BlockPostingsReader(state.dir, + PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java similarity index 95% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java index 77b766743f0..7e14977bc25 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,9 +17,9 @@ package org.apache.lucene.codecs.block; * limitations under the License. */ -import static org.apache.lucene.codecs.block.BlockPostingsFormat.BLOCK_SIZE; -import static org.apache.lucene.codecs.block.ForUtil.MAX_DATA_SIZE; -import static org.apache.lucene.codecs.block.ForUtil.MAX_ENCODED_SIZE; +import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE; import java.io.IOException; import java.util.Arrays; @@ -49,10 +49,10 @@ import org.apache.lucene.util.IOUtils; * Concrete class that reads docId(maybe frq,pos,offset,payloads) list * with postings format. * - * @see BlockSkipReader for details - * + * @see Lucene41SkipReader for details + * @lucene.experimental */ -final class BlockPostingsReader extends PostingsReaderBase { +public final class Lucene41PostingsReader extends PostingsReaderBase { private final IndexInput docIn; private final IndexInput posIn; @@ -62,35 +62,35 @@ final class BlockPostingsReader extends PostingsReaderBase { // public static boolean DEBUG = false; - public BlockPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException { + public Lucene41PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException { boolean success = false; IndexInput docIn = null; IndexInput posIn = null; IndexInput payIn = null; try { - docIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.DOC_EXTENSION), + docIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), ioContext); CodecUtil.checkHeader(docIn, - BlockPostingsWriter.DOC_CODEC, - BlockPostingsWriter.VERSION_CURRENT, - BlockPostingsWriter.VERSION_CURRENT); + Lucene41PostingsWriter.DOC_CODEC, + Lucene41PostingsWriter.VERSION_CURRENT, + Lucene41PostingsWriter.VERSION_CURRENT); forUtil = new ForUtil(docIn); if (fieldInfos.hasProx()) { - posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.POS_EXTENSION), + posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), ioContext); CodecUtil.checkHeader(posIn, - BlockPostingsWriter.POS_CODEC, - BlockPostingsWriter.VERSION_CURRENT, - BlockPostingsWriter.VERSION_CURRENT); + Lucene41PostingsWriter.POS_CODEC, + Lucene41PostingsWriter.VERSION_CURRENT, + Lucene41PostingsWriter.VERSION_CURRENT); if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) { - payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.PAY_EXTENSION), + payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), ioContext); CodecUtil.checkHeader(payIn, - BlockPostingsWriter.PAY_CODEC, - BlockPostingsWriter.VERSION_CURRENT, - BlockPostingsWriter.VERSION_CURRENT); + Lucene41PostingsWriter.PAY_CODEC, + Lucene41PostingsWriter.VERSION_CURRENT, + Lucene41PostingsWriter.VERSION_CURRENT); } } @@ -109,9 +109,9 @@ final class BlockPostingsReader extends PostingsReaderBase { public void init(IndexInput termsIn) throws IOException { // Make sure we are talking to the matching postings writer CodecUtil.checkHeader(termsIn, - BlockPostingsWriter.TERMS_CODEC, - BlockPostingsWriter.VERSION_CURRENT, - BlockPostingsWriter.VERSION_CURRENT); + Lucene41PostingsWriter.TERMS_CODEC, + Lucene41PostingsWriter.VERSION_CURRENT, + Lucene41PostingsWriter.VERSION_CURRENT); final int indexBlockSize = termsIn.readVInt(); if (indexBlockSize != BLOCK_SIZE) { throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")"); @@ -321,7 +321,7 @@ final class BlockPostingsReader extends PostingsReaderBase { private int docBufferUpto; - private BlockSkipReader skipper; + private Lucene41SkipReader skipper; private boolean skipped; final IndexInput startDocIn; @@ -353,7 +353,7 @@ final class BlockPostingsReader extends PostingsReaderBase { private Bits liveDocs; public BlockDocsEnum(FieldInfo fieldInfo) throws IOException { - this.startDocIn = BlockPostingsReader.this.docIn; + this.startDocIn = Lucene41PostingsReader.this.docIn; this.docIn = startDocIn.clone(); indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; @@ -486,8 +486,8 @@ final class BlockPostingsReader extends PostingsReaderBase { if (skipper == null) { // Lazy init: first time this enum has ever been used for skipping - skipper = new BlockSkipReader(docIn.clone(), - BlockPostingsWriter.maxSkipLevels, + skipper = new Lucene41SkipReader(docIn.clone(), + Lucene41PostingsWriter.maxSkipLevels, BLOCK_SIZE, indexHasPos, indexHasOffsets, @@ -502,7 +502,7 @@ final class BlockPostingsReader extends PostingsReaderBase { skipped = true; } - // always plus one to fix the result, since skip position in BlockSkipReader + // always plus one to fix the result, since skip position in Lucene41SkipReader // is a little different from MultiLevelSkipListReader final int newDocUpto = skipper.skipTo(target) + 1; @@ -577,7 +577,7 @@ final class BlockPostingsReader extends PostingsReaderBase { private int docBufferUpto; private int posBufferUpto; - private BlockSkipReader skipper; + private Lucene41SkipReader skipper; private boolean skipped; final IndexInput startDocIn; @@ -628,9 +628,9 @@ final class BlockPostingsReader extends PostingsReaderBase { private Bits liveDocs; public BlockDocsAndPositionsEnum(FieldInfo fieldInfo) throws IOException { - this.startDocIn = BlockPostingsReader.this.docIn; + this.startDocIn = Lucene41PostingsReader.this.docIn; this.docIn = startDocIn.clone(); - this.posIn = BlockPostingsReader.this.posIn.clone(); + this.posIn = Lucene41PostingsReader.this.posIn.clone(); encoded = new byte[MAX_ENCODED_SIZE]; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; indexHasPayloads = fieldInfo.hasPayloads(); @@ -797,8 +797,8 @@ final class BlockPostingsReader extends PostingsReaderBase { // if (DEBUG) { // System.out.println(" create skipper"); // } - skipper = new BlockSkipReader(docIn.clone(), - BlockPostingsWriter.maxSkipLevels, + skipper = new Lucene41SkipReader(docIn.clone(), + Lucene41PostingsWriter.maxSkipLevels, BLOCK_SIZE, true, indexHasOffsets, @@ -987,7 +987,7 @@ final class BlockPostingsReader extends PostingsReaderBase { private int docBufferUpto; private int posBufferUpto; - private BlockSkipReader skipper; + private Lucene41SkipReader skipper; private boolean skipped; final IndexInput startDocIn; @@ -1044,10 +1044,10 @@ final class BlockPostingsReader extends PostingsReaderBase { private Bits liveDocs; public EverythingEnum(FieldInfo fieldInfo) throws IOException { - this.startDocIn = BlockPostingsReader.this.docIn; + this.startDocIn = Lucene41PostingsReader.this.docIn; this.docIn = startDocIn.clone(); - this.posIn = BlockPostingsReader.this.posIn.clone(); - this.payIn = BlockPostingsReader.this.payIn.clone(); + this.posIn = Lucene41PostingsReader.this.posIn.clone(); + this.payIn = Lucene41PostingsReader.this.payIn.clone(); encoded = new byte[MAX_ENCODED_SIZE]; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; if (indexHasOffsets) { @@ -1282,8 +1282,8 @@ final class BlockPostingsReader extends PostingsReaderBase { // if (DEBUG) { // System.out.println(" create skipper"); // } - skipper = new BlockSkipReader(docIn.clone(), - BlockPostingsWriter.maxSkipLevels, + skipper = new Lucene41SkipReader(docIn.clone(), + Lucene41PostingsWriter.maxSkipLevels, BLOCK_SIZE, true, indexHasOffsets, diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java similarity index 93% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java index 305e1f38d51..51e2b02422f 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,9 +17,9 @@ package org.apache.lucene.codecs.block; * limitations under the License. */ -import static org.apache.lucene.codecs.block.BlockPostingsFormat.BLOCK_SIZE; -import static org.apache.lucene.codecs.block.ForUtil.MAX_DATA_SIZE; -import static org.apache.lucene.codecs.block.ForUtil.MAX_ENCODED_SIZE; +import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE; import java.io.IOException; import java.util.ArrayList; @@ -47,10 +47,10 @@ import org.apache.lucene.util.packed.PackedInts; * * Postings list for each term will be stored separately. * - * @see BlockSkipWriter for details about skipping setting and postings layout. - * + * @see Lucene41SkipWriter for details about skipping setting and postings layout. + * @lucene.experimental */ -final class BlockPostingsWriter extends PostingsWriterBase { +public final class Lucene41PostingsWriter extends PostingsWriterBase { /** * Expert: The maximum number of skip levels. Smaller values result in @@ -58,12 +58,12 @@ final class BlockPostingsWriter extends PostingsWriterBase { */ static final int maxSkipLevels = 10; - final static String TERMS_CODEC = "BlockPostingsWriterTerms"; - final static String DOC_CODEC = "BlockPostingsWriterDoc"; - final static String POS_CODEC = "BlockPostingsWriterPos"; - final static String PAY_CODEC = "BlockPostingsWriterPay"; + final static String TERMS_CODEC = "Lucene41PostingsWriterTerms"; + final static String DOC_CODEC = "Lucene41PostingsWriterDoc"; + final static String POS_CODEC = "Lucene41PostingsWriterPos"; + final static String PAY_CODEC = "Lucene41PostingsWriterPay"; - // Increment version to change it: + // Increment version to change it: nocommit: we can start at 0 final static int VERSION_START = 0; final static int VERSION_NO_OFFSETS_IN_SKIPDATA = 1; // LUCENE-4443 final static int VERSION_CURRENT = VERSION_NO_OFFSETS_IN_SKIPDATA; @@ -112,12 +112,12 @@ final class BlockPostingsWriter extends PostingsWriterBase { final byte[] encoded; private final ForUtil forUtil; - private final BlockSkipWriter skipWriter; + private final Lucene41SkipWriter skipWriter; - public BlockPostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) throws IOException { + public Lucene41PostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) throws IOException { super(); - docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.DOC_EXTENSION), + docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), state.context); IndexOutput posOut = null; IndexOutput payOut = null; @@ -127,7 +127,7 @@ final class BlockPostingsWriter extends PostingsWriterBase { forUtil = new ForUtil(acceptableOverheadRatio, docOut); if (state.fieldInfos.hasProx()) { posDeltaBuffer = new int[MAX_DATA_SIZE]; - posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.POS_EXTENSION), + posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), state.context); CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT); @@ -148,7 +148,7 @@ final class BlockPostingsWriter extends PostingsWriterBase { } if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) { - payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.PAY_EXTENSION), + payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), state.context); CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT); } @@ -172,7 +172,7 @@ final class BlockPostingsWriter extends PostingsWriterBase { freqBuffer = new int[MAX_DATA_SIZE]; // TODO: should we try skipping every 2/4 blocks...? - skipWriter = new BlockSkipWriter(maxSkipLevels, + skipWriter = new Lucene41SkipWriter(maxSkipLevels, BLOCK_SIZE, state.segmentInfo.getDocCount(), docOut, @@ -182,7 +182,7 @@ final class BlockPostingsWriter extends PostingsWriterBase { encoded = new byte[MAX_ENCODED_SIZE]; } - public BlockPostingsWriter(SegmentWriteState state) throws IOException { + public Lucene41PostingsWriter(SegmentWriteState state) throws IOException { this(state, PackedInts.COMPACT); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java similarity index 91% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java index e5803fd9696..483b0ec21df 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -35,12 +35,12 @@ import org.apache.lucene.store.IndexInput; * 0 1 2 3 4 5 * d d d d d d (posting list) * ^ ^ (skip point in MultiLeveSkipWriter) - * ^ (skip point in BlockSkipWriter) + * ^ (skip point in Lucene41SkipWriter) * * In this case, MultiLevelSkipListReader will use the last document as a skip point, - * while BlockSkipReader should assume no skip point will comes. + * while Lucene41SkipReader should assume no skip point will comes. * - * If we use the interface directly in BlockSkipReader, it may silly try to read + * If we use the interface directly in Lucene41SkipReader, it may silly try to read * another skip data after the only skip point is loaded. * * To illustrate this, we can call skipTo(d[5]), since skip point d[3] has smaller docId, @@ -50,8 +50,8 @@ import org.apache.lucene.store.IndexInput; * Therefore, we'll trim df before passing it to the interface. see trim(int) * */ -final class BlockSkipReader extends MultiLevelSkipListReader { - // private boolean DEBUG = BlockPostingsReader.DEBUG; +final class Lucene41SkipReader extends MultiLevelSkipListReader { + // private boolean DEBUG = Lucene41PostingsReader.DEBUG; private final int blockSize; private long docPointer[]; @@ -66,7 +66,7 @@ final class BlockSkipReader extends MultiLevelSkipListReader { private long lastDocPointer; private int lastPosBufferUpto; - public BlockSkipReader(IndexInput skipStream, int maxSkipLevels, int blockSize, boolean hasPos, boolean hasOffsets, boolean hasPayloads) { + public Lucene41SkipReader(IndexInput skipStream, int maxSkipLevels, int blockSize, boolean hasPos, boolean hasOffsets, boolean hasPayloads) { super(skipStream, maxSkipLevels, blockSize, 8); this.blockSize = blockSize; docPointer = new long[maxSkipLevels]; @@ -91,7 +91,7 @@ final class BlockSkipReader extends MultiLevelSkipListReader { /** * Trim original docFreq to tell skipReader read proper number of skip points. * - * Since our definition in BlockSkip* is a little different from MultiLevelSkip* + * Since our definition in Lucene41Skip* is a little different from MultiLevelSkip* * This trimmed docFreq will prevent skipReader from: * 1. silly reading a non-existed skip point after the last block boundary * 2. moving into the vInt block diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java similarity index 94% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java index 409930c6fed..1bd082859d9 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -43,8 +43,8 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter; * 4. start offset. * */ -final class BlockSkipWriter extends MultiLevelSkipListWriter { - // private boolean DEBUG = BlockPostingsReader.DEBUG; +final class Lucene41SkipWriter extends MultiLevelSkipListWriter { + // private boolean DEBUG = Lucene41PostingsReader.DEBUG; private int[] lastSkipDoc; private long[] lastSkipDocPointer; @@ -66,7 +66,7 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter { private boolean fieldHasOffsets; private boolean fieldHasPayloads; - public BlockSkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) { + public Lucene41SkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) { super(blockSize, 8, maxSkipLevels, docCount); this.docOut = docOut; this.posOut = posOut; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html new file mode 100644 index 00000000000..aff3d7a572f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html @@ -0,0 +1,396 @@ + + + + + + + +Lucene 4.1 file format. + +

    Apache Lucene - Index File Formats

    +
    +
    + +

    Introduction

    +
    +

    This document defines the index file formats used in this version of Lucene. +If you are using a different version of Lucene, please consult the copy of +docs/ that was distributed with +the version you are using.

    +

    Apache Lucene is written in Java, but several efforts are underway to write +versions of +Lucene in other programming languages. If these versions are to remain +compatible with Apache Lucene, then a language-independent definition of the +Lucene index format is required. This document thus attempts to provide a +complete and independent definition of the Apache Lucene file formats.

    +

    As Lucene evolves, this document should evolve. Versions of Lucene in +different programming languages should endeavor to agree on file formats, and +generate new versions of this document.

    +
    + +

    Definitions

    +
    +

    The fundamental concepts in Lucene are index, document, field and term.

    +

    An index contains a sequence of documents.

    +
      +
    • A document is a sequence of fields.
    • +
    • A field is a named sequence of terms.
    • +
    • A term is a sequence of bytes.
    • +
    +

    The same sequence of bytes in two different fields is considered a different +term. Thus terms are represented as a pair: the string naming the field, and the +bytes within the field.

    + +

    Inverted Indexing

    +

    The index stores statistics about terms in order to make term-based search +more efficient. Lucene's index falls into the family of indexes known as an +inverted index. This is because it can list, for a term, the documents +that contain it. This is the inverse of the natural relationship, in which +documents list terms.

    + +

    Types of Fields

    +

    In Lucene, fields may be stored, in which case their text is stored +in the index literally, in a non-inverted manner. Fields that are inverted are +called indexed. A field may be both stored and indexed.

    +

    The text of a field may be tokenized into terms to be indexed, or the +text of a field may be used literally as a term to be indexed. Most fields are +tokenized, but sometimes it is useful for certain identifier fields to be +indexed literally.

    +

    See the {@link org.apache.lucene.document.Field Field} +java docs for more information on Fields.

    + +

    Segments

    +

    Lucene indexes may be composed of multiple sub-indexes, or segments. +Each segment is a fully independent index, which could be searched separately. +Indexes evolve by:

    +
      +
    1. Creating new segments for newly added documents.
    2. +
    3. Merging existing segments.
    4. +
    +

    Searches may involve multiple segments and/or multiple indexes, each index +potentially composed of a set of segments.

    + +

    Document Numbers

    +

    Internally, Lucene refers to documents by an integer document number. +The first document added to an index is numbered zero, and each subsequent +document added gets a number one greater than the previous.

    +

    Note that a document's number may change, so caution should be taken when +storing these numbers outside of Lucene. In particular, numbers may change in +the following situations:

    +
      +
    • +

      The numbers stored in each segment are unique only within the segment, and +must be converted before they can be used in a larger context. The standard +technique is to allocate each segment a range of values, based on the range of +numbers used in that segment. To convert a document number from a segment to an +external value, the segment's base document number is added. To convert +an external value back to a segment-specific value, the segment is identified +by the range that the external value is in, and the segment's base value is +subtracted. For example two five document segments might be combined, so that +the first segment has a base value of zero, and the second of five. Document +three from the second segment would have an external value of eight.

      +
    • +
    • +

      When documents are deleted, gaps are created in the numbering. These are +eventually removed as the index evolves through merging. Deleted documents are +dropped when segments are merged. A freshly-merged segment thus has no gaps in +its numbering.

      +
    • +
    +
    + +

    Index Structure Overview

    +
    +

    Each segment index maintains the following:

    +
      +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment info}. + This contains metadata about a segment, such as the number of documents, + what files it uses, +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Field names}. + This contains the set of field names used in the index. +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat Stored Field values}. +This contains, for each document, a list of attribute-value pairs, where the attributes +are field names. These are used to store auxiliary information about the document, such as +its title, url, or an identifier to access a database. The set of stored fields are what is +returned for each hit when searching. This is keyed by document number. +
    • +
    • +{@link org.apache.lucene.codecs.lucene41Lucene41PostingsFormat Term dictionary}. +A dictionary containing all of the terms used in all of the +indexed fields of all of the documents. The dictionary also contains the number +of documents which contain the term, and pointers to the term's frequency and +proximity data. +
    • +
    • +{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}. +For each term in the dictionary, the numbers of all the +documents that contain that term, and the frequency of the term in that +document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) +
    • +
    • +{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}. +For each term in the dictionary, the positions that the +term occurs in each document. Note that this will not exist if all fields in +all documents omit position data. +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40NormsFormat Normalization factors}. +For each field in each document, a value is stored +that is multiplied into the score for hits on that field. +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vectors}. +For each field in each document, the term vector (sometimes +called document vector) may be stored. A term vector consists of term text and +term frequency. To add Term Vectors to your index see the +{@link org.apache.lucene.document.Field Field} constructors +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat Per-document values}. +Like stored values, these are also keyed by document +number, but are generally intended to be loaded into main memory for fast +access. Whereas stored values are generally intended for summary results from +searches, per-document values are useful for things like scoring factors. +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}. +An optional file indicating which documents are deleted. +
    • +
    +

    Details on each of these are provided in their linked pages.

    +
    + +

    File Naming

    +
    +

    All files belonging to a segment have the same name with varying extensions. +The extensions correspond to the different file formats described below. When +using the Compound File format (default in 1.4 and greater) these files (except +for the Segment info file, the Lock file, and Deleted documents file) are collapsed +into a single .cfs file (see below for details)

    +

    Typically, all segments in an index are stored in a single directory, +although this is not required.

    +

    As of version 2.1 (lock-less commits), file names are never re-used (there +is one exception, "segments.gen", see below). That is, when any file is saved +to the Directory it is given a never before used filename. This is achieved +using a simple generations approach. For example, the first segments file is +segments_1, then segments_2, etc. The generation is a sequential long integer +represented in alpha-numeric (base 36) form.

    +
    + +

    Summary of File Extensions

    +
    +

    The following table summarizes the names and extensions of the files in +Lucene:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameExtensionBrief Description
    {@link org.apache.lucene.index.SegmentInfos Segments File}segments.gen, segments_NStores information about a commit point
    Lock Filewrite.lockThe Write lock prevents multiple IndexWriters from writing to the same +file.
    {@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}.siStores metadata about a segment
    {@link org.apache.lucene.store.CompoundFileDirectory Compound File}.cfs, .cfeAn optional "virtual" file consisting of all the other index files for +systems that frequently run out of file handles.
    {@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Fields}.fnmStores information about the fields
    {@link org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat Field Index}.fdxContains pointers to field data
    {@link org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat Field Data}.fdtThe stored fields for documents
    {@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}.timThe term dictionary, stores term info
    {@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}.tipThe index into the Term Dictionary
    {@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}.docContains the list of docs which contain each term along with frequency
    {@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}.posStores position information about where a term occurs in the index
    {@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}.payStores additional per-position metadata information such as character offsets and user payloads
    {@link org.apache.lucene.codecs.lucene40.Lucene40NormsFormat Norms}.nrm.cfs, .nrm.cfeEncodes length and boost factors for docs and fields
    {@link org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat Per-Document Values}.dv.cfs, .dv.cfeEncodes additional scoring factors or other per-document information.
    {@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Index}.tvxStores offset into the document data file
    {@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Documents}.tvdContains information about each document that has term vectors
    {@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Fields}.tvfThe field level info about term vectors
    {@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}.delInfo about what files are deleted
    +
    + +

    Lock File

    +The write lock, which is stored in the index directory by default, is named +"write.lock". If the lock directory is different from the index directory then +the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix +derived from the full path to the index directory. When this file is present, a +writer is currently modifying the index (adding or removing documents). This +lock file ensures that only one writer is modifying the index at a time.

    + +

    History

    +

    Compatibility notes are provided in this document, describing how file +formats have changed from prior versions:

    +
      +
    • In version 2.1, the file format was changed to allow lock-less commits (ie, +no more commit lock). The change is fully backwards compatible: you can open a +pre-2.1 index for searching or adding/deleting of docs. When the new segments +file is saved (committed), it will be written in the new file format (meaning +no specific "upgrade" process is needed). But note that once a commit has +occurred, pre-2.1 Lucene will not be able to read the index.
    • +
    • In version 2.3, the file format was changed to allow segments to share a +single set of doc store (vectors & stored fields) files. This allows for +faster indexing in certain cases. The change is fully backwards compatible (in +the same way as the lock-less commits change in 2.1).
    • +
    • In version 2.4, Strings are now written as true UTF-8 byte sequence, not +Java's modified UTF-8. See +LUCENE-510 for details.
    • +
    • In version 2.9, an optional opaque Map<String,String> CommitUserData +may be passed to IndexWriter's commit methods (and later retrieved), which is +recorded in the segments_N file. See +LUCENE-1382 for details. Also, +diagnostics were added to each segment written recording details about why it +was written (due to flush, merge; which OS/JRE was used; etc.). See issue +LUCENE-1654 for details.
    • +
    • In version 3.0, compressed fields are no longer written to the index (they +can still be read, but on merge the new segment will write them, uncompressed). +See issue LUCENE-1960 +for details.
    • +
    • In version 3.1, segments records the code version that created them. See +LUCENE-2720 for details. +Additionally segments track explicitly whether or not they have term vectors. +See LUCENE-2811 +for details.
    • +
    • In version 3.2, numeric fields are written as natively to stored fields +file, previously they were stored in text format only.
    • +
    • In version 3.4, fields can omit position data while still indexing term +frequencies.
    • +
    • In version 4.0, the format of the inverted index became extensible via +the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage +({@link org.apache.lucene.index.DocValues DocValues}) was introduced. Normalization +factors need no longer be a single byte, they can be any DocValues +{@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode +strings, they can be any byte sequence. Term offsets can optionally be indexed +into the postings lists. Payloads can be stored in the term vectors.
    • +
    • In version 4.1, the format of the postings list changed to use either +of FOR compression or variable-byte encoding, depending upon the frequency +of the term.
    • +
    + +

    Limitations

    +
    +

    When referring to term numbers, Lucene's current implementation uses a Java +int to hold the term index, which means the +maximum number of unique terms in any single index segment is ~2.1 billion +times the term index interval (default 128) = ~274 billion. This is technically +not a limitation of the index file format, just of Lucene's current +implementation.

    +

    Similarly, Lucene uses a Java int to refer to +document numbers, and the index file format uses an Int32 +on-disk to store document numbers. This is a limitation +of both the index file format and the current implementation. Eventually these +should be replaced with either UInt64 values, or +better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.

    +
    + + diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec index 82c3e5c58f8..de1cc736ea9 100644 --- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -14,3 +14,4 @@ # limitations under the License. org.apache.lucene.codecs.lucene40.Lucene40Codec +org.apache.lucene.codecs.lucene41.Lucene41Codec diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 112a1698302..023d9c9e1a6 100644 --- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -14,3 +14,4 @@ # limitations under the License. org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat +org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat diff --git a/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java index aa5bf5bd2d8..4dcb5353fc1 100644 --- a/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java @@ -19,7 +19,7 @@ package org.apache.lucene; import org.apache.lucene.analysis.*; import org.apache.lucene.codecs.*; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.document.*; import org.apache.lucene.index.*; import org.apache.lucene.search.*; @@ -31,11 +31,11 @@ import org.apache.lucene.util.*; public class TestExternalCodecs extends LuceneTestCase { - private static final class CustomPerFieldCodec extends Lucene40Codec { + private static final class CustomPerFieldCodec extends Lucene41Codec { private final PostingsFormat ramFormat = PostingsFormat.forName("RAMOnly"); - private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene40"); - private final PostingsFormat pulsingFormat = PostingsFormat.forName("Pulsing40"); + private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41"); + private final PostingsFormat pulsingFormat = PostingsFormat.forName("Pulsing41"); @Override public PostingsFormat getPostingsFormatForField(String field) { diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java similarity index 71% rename from lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat.java rename to lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java index 235c85fe2e7..dd3231e36c3 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -18,22 +18,13 @@ package org.apache.lucene.codecs.block; */ import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.block.BlockPostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; import org.apache.lucene.index.BasePostingsFormatTestCase; /** * Tests BlockPostingsFormat */ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase { - private final PostingsFormat postings = new BlockPostingsFormat(); - private final Codec codec = new Lucene40Codec() { - @Override - public PostingsFormat getPostingsFormatForField(String field) { - return postings; - } - }; + private final Codec codec = new Lucene41Codec(); @Override protected Codec getCodec() { diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java similarity index 91% rename from lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat2.java rename to lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java index 8b462d2e587..0a49540f73d 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat2.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -19,7 +19,6 @@ package org.apache.lucene.codecs.block; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -47,10 +46,10 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase { super.setUp(); dir = newFSDirectory(_TestUtil.getTempDir("testDFBlockSize")); iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - iwc.setCodec(new Lucene40Codec() { + iwc.setCodec(new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { - return PostingsFormat.forName("Block"); + return PostingsFormat.forName("Lucene41"); } }); iw = new RandomIndexWriter(random(), dir, iwc); @@ -88,7 +87,7 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase { /** tests terms with df = blocksize */ public void testDFBlockSize() throws Exception { Document doc = newDocument(); - for (int i = 0; i < BlockPostingsFormat.BLOCK_SIZE; i++) { + for (int i = 0; i < Lucene41PostingsFormat.BLOCK_SIZE; i++) { for (Field f : doc.getFields()) { f.setStringValue(f.name() + " " + f.name() + "_2"); } @@ -99,7 +98,7 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase { /** tests terms with df % blocksize = 0 */ public void testDFBlockSizeMultiple() throws Exception { Document doc = newDocument(); - for (int i = 0; i < BlockPostingsFormat.BLOCK_SIZE * 16; i++) { + for (int i = 0; i < Lucene41PostingsFormat.BLOCK_SIZE * 16; i++) { for (Field f : doc.getFields()) { f.setStringValue(f.name() + " " + f.name() + "_2"); } @@ -110,7 +109,7 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase { /** tests terms with ttf = blocksize */ public void testTTFBlockSize() throws Exception { Document doc = newDocument(); - for (int i = 0; i < BlockPostingsFormat.BLOCK_SIZE/2; i++) { + for (int i = 0; i < Lucene41PostingsFormat.BLOCK_SIZE/2; i++) { for (Field f : doc.getFields()) { f.setStringValue(f.name() + " " + f.name() + " " + f.name() + "_2 " + f.name() + "_2"); } @@ -121,7 +120,7 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase { /** tests terms with ttf % blocksize = 0 */ public void testTTFBlockSizeMultiple() throws Exception { Document doc = newDocument(); - for (int i = 0; i < BlockPostingsFormat.BLOCK_SIZE/2; i++) { + for (int i = 0; i < Lucene41PostingsFormat.BLOCK_SIZE/2; i++) { for (Field f : doc.getFields()) { String proto = (f.name() + " " + f.name() + " " + f.name() + " " + f.name() + " " + f.name() + "_2 " + f.name() + "_2 " + f.name() + "_2 " + f.name() + "_2"); diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat3.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java similarity index 98% rename from lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat3.java rename to lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java index 9ef0aae6726..34bd00789d3 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat3.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.MockVariableLengthPayloadFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -64,7 +64,7 @@ import org.apache.lucene.util.automaton.RegExp; * Tests partial enumeration (only pulling a subset of the prox data) */ public class TestBlockPostingsFormat3 extends LuceneTestCase { - static final int MAXDOC = BlockPostingsFormat.BLOCK_SIZE * 20; + static final int MAXDOC = Lucene41PostingsFormat.BLOCK_SIZE * 20; // creates 6 fields with different options and does "duels" of fields against each other public void test() throws Exception { @@ -85,10 +85,10 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { } }; IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); - iwc.setCodec(new Lucene40Codec() { + iwc.setCodec(new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { - return PostingsFormat.forName("Block"); + return PostingsFormat.forName("Lucene41"); // TODO: we could actually add more fields implemented with different PFs } }); diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestForUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java similarity index 92% rename from lucene/codecs/src/test/org/apache/lucene/codecs/block/TestForUtil.java rename to lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java index 025a6348164..3831033a6ea 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestForUtil.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,9 +17,9 @@ package org.apache.lucene.codecs.block; * limitations under the License. */ -import static org.apache.lucene.codecs.block.BlockPostingsFormat.BLOCK_SIZE; -import static org.apache.lucene.codecs.block.ForUtil.MAX_DATA_SIZE; -import static org.apache.lucene.codecs.block.ForUtil.MAX_ENCODED_SIZE; +import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE; import java.io.IOException; import java.util.Arrays; diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java index dac3b5af94a..582e774d126 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java @@ -21,10 +21,10 @@ import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; -import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat; +import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -142,7 +142,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase { assertQuery(new Term("content", "ccc"), dir, 10); assertQuery(new Term("content", "aaa"), dir, 10); - Lucene40Codec codec = (Lucene40Codec)iwconf.getCodec(); + Lucene41Codec codec = (Lucene41Codec)iwconf.getCodec(); iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) .setOpenMode(OpenMode.APPEND).setCodec(codec); @@ -158,7 +158,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase { } addDocs2(writer, 10); writer.commit(); - codec = (Lucene40Codec)iwconf.getCodec(); + codec = (Lucene41Codec)iwconf.getCodec(); assertEquals(30, writer.maxDoc()); assertQuery(new Term("content", "bbb"), dir, 10); assertQuery(new Term("content", "ccc"), dir, 10); //// @@ -200,8 +200,8 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase { } - public static class MockCodec extends Lucene40Codec { - final PostingsFormat lucene40 = new Lucene40PostingsFormat(); + public static class MockCodec extends Lucene41Codec { + final PostingsFormat lucene40 = new Lucene41PostingsFormat(); final PostingsFormat simpleText = new SimpleTextPostingsFormat(); final PostingsFormat mockSep = new MockSepPostingsFormat(); @@ -217,8 +217,8 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase { } } - public static class MockCodec2 extends Lucene40Codec { - final PostingsFormat lucene40 = new Lucene40PostingsFormat(); + public static class MockCodec2 extends Lucene41Codec { + final PostingsFormat lucene40 = new Lucene41PostingsFormat(); final PostingsFormat simpleText = new SimpleTextPostingsFormat(); @Override @@ -268,13 +268,13 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase { } public void testSameCodecDifferentInstance() throws Exception { - Codec codec = new Lucene40Codec() { + Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { if ("id".equals(field)) { - return new Pulsing40PostingsFormat(1); + return new Pulsing41PostingsFormat(1); } else if ("date".equals(field)) { - return new Pulsing40PostingsFormat(1); + return new Pulsing41PostingsFormat(1); } else { return super.getPostingsFormatForField(field); } @@ -284,13 +284,13 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase { } public void testSameCodecDifferentParams() throws Exception { - Codec codec = new Lucene40Codec() { + Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { if ("id".equals(field)) { - return new Pulsing40PostingsFormat(1); + return new Pulsing41PostingsFormat(1); } else if ("date".equals(field)) { - return new Pulsing40PostingsFormat(2); + return new Pulsing41PostingsFormat(2); } else { return super.getPostingsFormatForField(field); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java index 31b4d190f80..42db793e0db 100755 --- a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -27,8 +27,8 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; -import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; +import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -1058,9 +1058,9 @@ public class TestAddIndexes extends LuceneTestCase { aux2.close(); } - private static final class CustomPerFieldCodec extends Lucene40Codec { + private static final class CustomPerFieldCodec extends Lucene41Codec { private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText"); - private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene40"); + private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41"); private final PostingsFormat mockSepFormat = PostingsFormat.forName("MockSep"); @Override @@ -1109,7 +1109,7 @@ public class TestAddIndexes extends LuceneTestCase { private static final class UnRegisteredCodec extends FilterCodec { public UnRegisteredCodec() { - super("NotRegistered", new Lucene40Codec()); + super("NotRegistered", new Lucene41Codec()); } } @@ -1138,7 +1138,7 @@ public class TestAddIndexes extends LuceneTestCase { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - conf.setCodec(_TestUtil.alwaysPostingsFormat(new Pulsing40PostingsFormat(1 + random().nextInt(20)))); + conf.setCodec(_TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1 + random().nextInt(20)))); IndexWriter w = new IndexWriter(dir, conf); try { w.addIndexes(toAdd); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java similarity index 95% rename from lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java rename to lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java index d6535df143d..8466b90a521 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.lucene40; +package org.apache.lucene.index; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -35,13 +35,13 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; /** - * Test that a plain Lucene40Codec puts codec headers in all files. + * Test that a plain default puts codec headers in all files. */ public class TestAllFilesHaveCodecHeader extends LuceneTestCase { public void test() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - conf.setCodec(Codec.forName("Lucene40")); + conf.setCodec(Codec.forName("Lucene41")); // riw should sometimes create docvalues fields, etc RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java index 3bc247dcc1f..08819143a1c 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java @@ -28,7 +28,7 @@ import java.util.Random; import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -828,7 +828,7 @@ public void testFilesOpenClose() throws IOException { // LUCENE-1609: don't load terms index public void testNoTermsIndex() throws Throwable { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()))); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()))); Document doc = new Document(); doc.add(newTextField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO)); doc.add(newTextField("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO)); @@ -848,7 +848,7 @@ public void testFilesOpenClose() throws IOException { writer = new IndexWriter( dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())). - setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())). + setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())). setMergePolicy(newLogMergePolicy(10)) ); writer.addDocument(doc); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java b/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java index 22b2360fcd8..5c5adce7650 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java @@ -110,7 +110,7 @@ public class TestDocTermOrds extends LuceneTestCase { // Sometimes swap in codec that impls ord(): if (random().nextInt(10) == 7) { // Make sure terms index has ords: - Codec codec = _TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene40WithOrds")); + Codec codec = _TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene41WithOrds")); conf.setCodec(codec); } @@ -207,7 +207,7 @@ public class TestDocTermOrds extends LuceneTestCase { // Sometimes swap in codec that impls ord(): if (random().nextInt(10) == 7) { - Codec codec = _TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene40WithOrds")); + Codec codec = _TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene41WithOrds")); conf.setCodec(codec); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java index 0c9bd4b5a79..bb304c4a666 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java @@ -61,7 +61,7 @@ public class TestDuelingCodecs extends LuceneTestCase { public void setUp() throws Exception { super.setUp(); - // for now its SimpleText vs Lucene40(random postings format) + // for now its SimpleText vs Lucene41(random postings format) // as this gives the best overall coverage. when we have more // codecs we should probably pick 2 from Codec.availableCodecs() diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFlex.java b/lucene/core/src/test/org/apache/lucene/index/TestFlex.java index 76a1ee53481..2bd65a9ab85 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFlex.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFlex.java @@ -19,7 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.store.*; import org.apache.lucene.analysis.*; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.document.*; import org.apache.lucene.util.*; @@ -65,7 +65,7 @@ public class TestFlex extends LuceneTestCase { public void testTermOrd() throws Exception { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()))); + new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()))); Document doc = new Document(); doc.add(newTextField("f", "a b c", Field.Store.NO)); w.addDocument(doc); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java index 42de0b6e414..2ee0449b5d0 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java @@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.Directory; @@ -69,7 +69,7 @@ public class TestMultiLevelSkipList extends LuceneTestCase { public void testSimpleSkip() throws IOException { Directory dir = new CountingRAMDirectory(new RAMDirectory()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new PayloadAnalyzer()).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())).setMergePolicy(newLogMergePolicy())); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new PayloadAnalyzer()).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())).setMergePolicy(newLogMergePolicy())); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermEnum.java index 4554243ca1a..1379a3c223e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermEnum.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermEnum.java @@ -24,7 +24,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util._TestUtil; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; @@ -75,7 +75,7 @@ public class TestSegmentTermEnum extends LuceneTestCase { public void testPrevTermAtEnd() throws IOException { - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()))); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat()))); addDoc(writer, "aaa bbb"); writer.close(); SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(dir)); diff --git a/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java b/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java index ef7d047b06d..bff508ff8ea 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java @@ -25,8 +25,8 @@ import org.apache.lucene.codecs.Codec; // enough to test the basics via Codec public class TestNamedSPILoader extends LuceneTestCase { public void testLookup() { - Codec codec = Codec.forName("Lucene40"); - assertEquals("Lucene40", codec.getName()); + Codec codec = Codec.forName("Lucene41"); + assertEquals("Lucene41", codec.getName()); } // we want an exception if its not found. @@ -39,6 +39,6 @@ public class TestNamedSPILoader extends LuceneTestCase { public void testAvailableServices() { Set codecs = Codec.availableCodecs(); - assertTrue(codecs.contains("Lucene40")); + assertTrue(codecs.contains("Lucene41")); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene40Postings.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java similarity index 87% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene40Postings.java rename to lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java index 50c5a98bdfa..d5229f6f496 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene40Postings.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java @@ -22,19 +22,19 @@ import java.io.IOException; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; /** * A class used for testing {@link BloomFilteringPostingsFormat} with a concrete - * delegate (Lucene40). Creates a Bloom filter on ALL fields and with tiny + * delegate (Lucene41). Creates a Bloom filter on ALL fields and with tiny * amounts of memory reserved for the filter. DO NOT USE IN A PRODUCTION * APPLICATION This is not a realistic application of Bloom Filters as they * ordinarily are larger and operate on only primary key type fields. */ -public final class TestBloomFilteredLucene40Postings extends PostingsFormat { +public final class TestBloomFilteredLucene41Postings extends PostingsFormat { private BloomFilteringPostingsFormat delegate; @@ -54,9 +54,9 @@ public final class TestBloomFilteredLucene40Postings extends PostingsFormat { } } - public TestBloomFilteredLucene40Postings() { - super("TestBloomFilteredLucene40Postings"); - delegate = new BloomFilteringPostingsFormat(new Lucene40PostingsFormat(), + public TestBloomFilteredLucene41Postings() { + super("TestBloomFilteredLucene41Postings"); + delegate = new BloomFilteringPostingsFormat(new Lucene41PostingsFormat(), new LowMemoryBloomFactory()); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java similarity index 89% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java rename to lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java index 42f0d853688..8865136be1b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.lucene40ords; +package org.apache.lucene.codecs.lucene41ords; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -30,9 +30,9 @@ import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader; import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter; import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase; import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; // javadocs -import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; // javadocs +import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.BytesRef; @@ -41,18 +41,18 @@ import org.apache.lucene.util.BytesRef; // any PostingsBaseFormat and make it ord-able... /** - * Customized version of {@link Lucene40Codec} that uses + * Customized version of {@link Lucene41Codec} that uses * {@link FixedGapTermsIndexWriter}. */ -public final class Lucene40WithOrds extends PostingsFormat { +public final class Lucene41WithOrds extends PostingsFormat { - public Lucene40WithOrds() { - super("Lucene40WithOrds"); + public Lucene41WithOrds() { + super("Lucene41WithOrds"); } @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase docs = new Lucene40PostingsWriter(state); + PostingsWriterBase docs = new Lucene41PostingsWriter(state); // TODO: should we make the terms index more easily // pluggable? Ie so that this codec would record which @@ -91,7 +91,7 @@ public final class Lucene40WithOrds extends PostingsFormat { @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - PostingsReaderBase postings = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); + PostingsReaderBase postings = new Lucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); TermsIndexReaderBase indexReader; boolean success = false; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/package.html b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/package.html similarity index 100% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/package.html rename to lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/package.html diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java index b8676d7dafc..03737a41983 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java @@ -29,11 +29,10 @@ import java.util.Set; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.asserting.AssertingPostingsFormat; -import org.apache.lucene.codecs.block.BlockPostingsFormat; -import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene40Postings; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; -import org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; +import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds; +import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings; import org.apache.lucene.codecs.memory.DirectPostingsFormat; import org.apache.lucene.codecs.memory.MemoryPostingsFormat; import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat; @@ -41,7 +40,7 @@ import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; import org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat; -import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat; +import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -55,7 +54,7 @@ import org.apache.lucene.util._TestUtil; * documents in different orders and the test will still be deterministic * and reproducable. */ -public class RandomCodec extends Lucene40Codec { +public class RandomCodec extends Lucene41Codec { /** Shuffled list of postings formats to use for new mappings */ private List formats = new ArrayList(); @@ -94,23 +93,22 @@ public class RandomCodec extends Lucene40Codec { int lowFreqCutoff = _TestUtil.nextInt(random, 2, 100); add(avoidCodecs, - new Lucene40PostingsFormat(minItemsPerBlock, maxItemsPerBlock), - new BlockPostingsFormat(minItemsPerBlock, maxItemsPerBlock), + new Lucene41PostingsFormat(minItemsPerBlock, maxItemsPerBlock), new DirectPostingsFormat(LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : maxItemsPerBlock), LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : lowFreqCutoff)), - new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock), + new Pulsing41PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock), // add pulsing again with (usually) different parameters - new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock), + new Pulsing41PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock), //TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene40Postings to be constructed //with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing //with such "wrapper" classes? - new TestBloomFilteredLucene40Postings(), + new TestBloomFilteredLucene41Postings(), new MockSepPostingsFormat(), new MockFixedIntBlockPostingsFormat(_TestUtil.nextInt(random, 1, 2000)), new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)), new MockRandomPostingsFormat(random), new NestedPulsingPostingsFormat(), - new Lucene40WithOrds(), + new Lucene41WithOrds(), new SimpleTextPostingsFormat(), new AssertingPostingsFormat(), new MemoryPostingsFormat(true, random.nextFloat()), diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java index b432416c15b..afa1ccdb30e 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java @@ -32,6 +32,7 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.asserting.AssertingCodec; import org.apache.lucene.codecs.compressing.CompressingCodec; import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.index.RandomCodec; @@ -129,26 +130,24 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule { avoidCodecs.addAll(Arrays.asList(a.value())); } - PREFLEX_IMPERSONATION_IS_ACTIVE = false; savedCodec = Codec.getDefault(); int randomVal = random.nextInt(10); - /* note: re-enable this if we make a 4.x impersonator - if ("Lucene3x".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && + + if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && randomVal < 2 && - !shouldAvoidCodec("Lucene3x"))) { // preflex-only setup - codec = Codec.forName("Lucene3x"); - assert (codec instanceof PreFlexRWCodec) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; - PREFLEX_IMPERSONATION_IS_ACTIVE = true; - } else */ if (!"random".equals(TEST_POSTINGSFORMAT)) { + !shouldAvoidCodec("Lucene40"))) { + codec = Codec.forName("Lucene40"); + // nocommit: assert (codec instanceof PreFlexRWCodec) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; + } else if (!"random".equals(TEST_POSTINGSFORMAT)) { final PostingsFormat format; if ("MockRandom".equals(TEST_POSTINGSFORMAT)) { format = new MockRandomPostingsFormat(random); } else { format = PostingsFormat.forName(TEST_POSTINGSFORMAT); } - codec = new Lucene40Codec() { + codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return format; diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 4c82a0146bb..98619f2243b 100644 --- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -19,7 +19,7 @@ org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat org.apache.lucene.codecs.mocksep.MockSepPostingsFormat org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat -org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds -org.apache.lucene.codecs.bloom.TestBloomFilteredLucene40Postings +org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds +org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings org.apache.lucene.codecs.asserting.AssertingPostingsFormat From b1a58d9a9c9435bf6a9552f160b301fb1af87b65 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 12 Oct 2012 02:17:48 +0000 Subject: [PATCH 03/20] LUCENE-4446: cut over remaining tests etc git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4446@1397418 13f79535-47bb-0310-9956-ffa450edef68 --- .../codecs/blockterms/TestFixedGapPostingsFormat.java | 4 ++-- .../lucene/codecs/bloom/TestBloomPostingsFormat.java | 4 ++-- .../compressing/TestCompressingStoredFieldsFormat.java | 6 +++--- .../intblock/TestFixedIntBlockPostingsFormat.java | 4 ++-- .../intblock/TestVariableIntBlockPostingsFormat.java | 4 ++-- .../lucene/codecs/memory/TestDirectPostingsFormat.java | 4 ++-- .../lucene/codecs/memory/TestMemoryPostingsFormat.java | 4 ++-- .../lucene/codecs/sep/TestSepPostingsFormat.java | 4 ++-- .../src/java/org/apache/lucene/codecs/FilterCodec.java | 2 +- .../apache/lucene/codecs/lucene40/Lucene40Codec.java | 3 ++- .../org/apache/lucene/index/LiveIndexWriterConfig.java | 10 +++++----- .../lucene/codecs/lucene40/TestReuseDocsEnum.java | 1 + .../apache/lucene/index/memory/MemoryIndexTest.java | 4 ++-- .../apache/lucene/codecs/asserting/AssertingCodec.java | 6 +++--- .../codecs/asserting/AssertingPostingsFormat.java | 6 +++--- .../lucene/codecs/compressing/CompressingCodec.java | 6 +++--- .../codecs/mockrandom/MockRandomPostingsFormat.java | 3 +++ .../nestedpulsing/NestedPulsingPostingsFormat.java | 10 +++++----- .../src/java/org/apache/lucene/util/_TestUtil.java | 4 ++-- .../java/org/apache/solr/core/SchemaCodecFactory.java | 4 ++-- 20 files changed, 49 insertions(+), 44 deletions(-) diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java index 055bc21e2fb..141ff99f5fe 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.blockterms; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds; import org.apache.lucene.index.BasePostingsFormatTestCase; @@ -30,7 +30,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; // TODO: ensure both of these are also in rotation in RandomCodec public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase { private final PostingsFormat postings = new Lucene41WithOrds(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java index 143163feead..6c3034c5fd3 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.bloom; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.index.BasePostingsFormatTestCase; /** @@ -27,7 +27,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; */ public class TestBloomPostingsFormat extends BasePostingsFormatTestCase { private final PostingsFormat postings = new TestBloomFilteredLucene41Postings(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java index 9b25a4c1090..bb3a4824589 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java @@ -23,7 +23,7 @@ import java.util.Collections; import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; @@ -90,10 +90,10 @@ public class TestCompressingStoredFieldsFormat extends LuceneTestCase { if (random().nextBoolean() && (i % (data.length / 10) == 0)) { iw.w.close(); // switch codecs - if (iwConf.getCodec() instanceof Lucene40Codec) { + if (iwConf.getCodec() instanceof Lucene41Codec) { iwConf.setCodec(CompressingCodec.randomInstance(random())); } else { - iwConf.setCodec(new Lucene40Codec()); + iwConf.setCodec(new Lucene41Codec()); } iw = new RandomIndexWriter(random(), dir, iwConf); } diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java index e6338a0a674..93a1b548a62 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.intblock; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat; import org.apache.lucene.index.BasePostingsFormatTestCase; @@ -29,7 +29,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; public class TestFixedIntBlockPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize blocksize private final PostingsFormat postings = new MockFixedIntBlockPostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java index c7955acc9f5..156f91840b0 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.intblock; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat; import org.apache.lucene.index.BasePostingsFormatTestCase; @@ -29,7 +29,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; public class TestVariableIntBlockPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize blocksize private final PostingsFormat postings = new MockVariableIntBlockPostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java index caf55a83084..bab45bcc366 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.memory; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.index.BasePostingsFormatTestCase; /** @@ -29,7 +29,7 @@ public class TestDirectPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize parameters private final PostingsFormat postings = new DirectPostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryPostingsFormat.java index ca07382b3a9..93892c7b640 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.memory; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.index.BasePostingsFormatTestCase; /** @@ -28,7 +28,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; public class TestMemoryPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize doPack private final PostingsFormat postings = new MemoryPostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java index 318822ce19d..8c6df1d93fd 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.sep; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; import org.apache.lucene.index.BasePostingsFormatTestCase; @@ -29,7 +29,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; public class TestSepPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize cutoff private final PostingsFormat postings = new MockSepPostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java index 4dfae68232a..ca8e439b6d8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java @@ -27,7 +27,7 @@ package org.apache.lucene.codecs; * public final class CustomCodec extends FilterCodec { * * public CustomCodec() { - * super("CustomCodec", new Lucene40Codec()); + * super("CustomCodec", new Lucene41Codec()); * } * * public LiveDocsFormat liveDocsFormat() { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java index b98205ec8cd..076eeeaeadd 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java @@ -41,7 +41,8 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; // NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever // if they are backwards compatible or smallish we can probably do the backwards in the postingsreader // (it writes a minor version, etc). -public class Lucene40Codec extends Codec { +// nocommit: make readonly and add impersonator +public final class Lucene40Codec extends Codec { private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat(); private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat(); private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat(); diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java index 7652fa24211..4d76f595494 100755 --- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java @@ -19,7 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; // javadocs +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -186,14 +186,14 @@ public class LiveIndexWriterConfig { * NOTE: This parameter does not apply to all PostingsFormat implementations, * including the default one in this release. It only makes sense for term indexes * that are implemented as a fixed gap between terms. For example, - * {@link Lucene40PostingsFormat} implements the term index instead based upon how + * {@link Lucene41PostingsFormat} implements the term index instead based upon how * terms share prefixes. To configure its parameters (the minimum and maximum size - * for a block), you would instead use {@link Lucene40PostingsFormat#Lucene40PostingsFormat(int, int)}. + * for a block), you would instead use {@link Lucene41PostingsFormat#Lucene41PostingsFormat(int, int)}. * which can also be configured on a per-field basis: *
        * //customize Lucene40PostingsFormat, passing minBlockSize=50, maxBlockSize=100
    -   * final PostingsFormat tweakedPostings = new Lucene40PostingsFormat(50, 100);
    -   * iwc.setCodec(new Lucene40Codec() {
    +   * final PostingsFormat tweakedPostings = new Lucene41PostingsFormat(50, 100);
    +   * iwc.setCodec(new Lucene41Codec() {
        *   @Override
        *   public PostingsFormat getPostingsFormatForField(String field) {
        *     if (field.equals("fieldWithTonsOfTerms"))
    diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
    index e5a0ae51ff0..175f7a08aa7 100644
    --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
    +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
    @@ -38,6 +38,7 @@ import org.apache.lucene.util.LineFileDocs;
     import org.apache.lucene.util.LuceneTestCase;
     import org.apache.lucene.util._TestUtil;
     
    +// nocommit: really this should be in BaseTestPF or somewhere else? useful test!
     public class TestReuseDocsEnum extends LuceneTestCase {
     
       public void testReuseDocsEnumNoReuse() throws IOException {
    diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    index 68ac8106fd9..77dcedf5036 100644
    --- a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
     import org.apache.lucene.analysis.MockAnalyzer;
     import org.apache.lucene.analysis.MockTokenFilter;
     import org.apache.lucene.analysis.MockTokenizer;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
     import org.apache.lucene.index.AtomicReader;
    @@ -123,7 +123,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
         Directory ramdir = new RAMDirectory();
         Analyzer analyzer = randomAnalyzer();
         IndexWriter writer = new IndexWriter(ramdir,
    -                                         new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())));
    +                                         new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())));
         Document doc = new Document();
         Field field1 = newTextField("foo", fooField.toString(), Field.Store.NO);
         Field field2 = newTextField("term", termField.toString(), Field.Store.NO);
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java
    index 49a20b09fff..7d775e3613f 100644
    --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java
    @@ -20,10 +20,10 @@ package org.apache.lucene.codecs.asserting;
     import org.apache.lucene.codecs.FilterCodec;
     import org.apache.lucene.codecs.PostingsFormat;
     import org.apache.lucene.codecs.TermVectorsFormat;
    -import org.apache.lucene.codecs.lucene40.Lucene40Codec;
    +import org.apache.lucene.codecs.lucene41.Lucene41Codec;
     
     /**
    - * Acts like {@link Lucene40Codec} but with additional asserts.
    + * Acts like {@link Lucene41Codec} but with additional asserts.
      */
     public final class AssertingCodec extends FilterCodec {
     
    @@ -31,7 +31,7 @@ public final class AssertingCodec extends FilterCodec {
       private final TermVectorsFormat vectors = new AssertingTermVectorsFormat();
     
       public AssertingCodec() {
    -    super("Asserting", new Lucene40Codec());
    +    super("Asserting", new Lucene41Codec());
       }
     
       @Override
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
    index 775f972a1eb..94b88117e23 100644
    --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
    @@ -27,7 +27,7 @@ import org.apache.lucene.codecs.PostingsConsumer;
     import org.apache.lucene.codecs.PostingsFormat;
     import org.apache.lucene.codecs.TermStats;
     import org.apache.lucene.codecs.TermsConsumer;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
     import org.apache.lucene.index.AssertingAtomicReader;
     import org.apache.lucene.index.FieldInfo;
     import org.apache.lucene.index.FieldInfo.IndexOptions;
    @@ -38,10 +38,10 @@ import org.apache.lucene.util.BytesRef;
     import org.apache.lucene.util.OpenBitSet;
     
     /**
    - * Just like {@link Lucene40PostingsFormat} but with additional asserts.
    + * Just like {@link Lucene41PostingsFormat} but with additional asserts.
      */
     public final class AssertingPostingsFormat extends PostingsFormat {
    -  private final PostingsFormat in = new Lucene40PostingsFormat();
    +  private final PostingsFormat in = new Lucene41PostingsFormat();
       
       public AssertingPostingsFormat() {
         super("Asserting");
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
    index 91f6055b79f..904fedf0f9b 100644
    --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
    @@ -21,14 +21,14 @@ import java.util.Random;
     
     import org.apache.lucene.codecs.FilterCodec;
     import org.apache.lucene.codecs.StoredFieldsFormat;
    -import org.apache.lucene.codecs.lucene40.Lucene40Codec;
    +import org.apache.lucene.codecs.lucene41.Lucene41Codec;
     
     import com.carrotsearch.randomizedtesting.generators.RandomInts;
     import com.carrotsearch.randomizedtesting.generators.RandomPicks;
     
     /**
      * A codec that uses {@link CompressingStoredFieldsFormat} for its stored
    - * fields and delegates to {@link Lucene40Codec} for everything else.
    + * fields and delegates to {@link Lucene41Codec} for everything else.
      */
     public class CompressingCodec extends FilterCodec {
     
    @@ -49,7 +49,7 @@ public class CompressingCodec extends FilterCodec {
        */
       public CompressingCodec(CompressionMode compressionMode, int chunkSize,
           CompressingStoredFieldsIndex storedFieldsIndexFormat) {
    -    super("Compressing", new Lucene40Codec());
    +    super("Compressing", new Lucene41Codec());
         this.storedFieldsFormat = new CompressingStoredFieldsFormat(compressionMode, chunkSize, storedFieldsIndexFormat);
       }
     
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
    index c44f05bcc38..9aa48f52091 100644
    --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
    @@ -40,6 +40,7 @@ import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader;
     import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter;
     import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader;
     import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
     import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat;
     import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat;
     import org.apache.lucene.codecs.mocksep.MockSingleIntFactory;
    @@ -174,6 +175,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
           if (LuceneTestCase.VERBOSE) {
             System.out.println("MockRandomCodec: writing Standard postings");
           }
    +      // nocommit: way to randomize skipInterval and acceptibleOverHead?!
           postingsWriter = new Lucene40PostingsWriter(state, skipInterval);
         }
     
    @@ -313,6 +315,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
           if (LuceneTestCase.VERBOSE) {
             System.out.println("MockRandomCodec: reading Standard postings");
           }
    +      // nocommit
           postingsReader = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
         }
     
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
    index c44f3ef5db7..31f897e39f0 100644
    --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
    @@ -26,8 +26,8 @@ import org.apache.lucene.codecs.FieldsProducer;
     import org.apache.lucene.codecs.PostingsFormat;
     import org.apache.lucene.codecs.PostingsReaderBase;
     import org.apache.lucene.codecs.PostingsWriterBase;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
     import org.apache.lucene.codecs.pulsing.PulsingPostingsReader;
     import org.apache.lucene.codecs.pulsing.PulsingPostingsWriter;
     import org.apache.lucene.index.SegmentReadState;
    @@ -35,7 +35,7 @@ import org.apache.lucene.index.SegmentWriteState;
     import org.apache.lucene.util.IOUtils;
     
     /**
    - * Pulsing(1, Pulsing(2, Lucene40))
    + * Pulsing(1, Pulsing(2, Lucene41))
      * @lucene.experimental
      */
     // TODO: if we create PulsingPostingsBaseFormat then we
    @@ -55,7 +55,7 @@ public final class NestedPulsingPostingsFormat extends PostingsFormat {
         // Terms dict
         boolean success = false;
         try {
    -      docsWriter = new Lucene40PostingsWriter(state);
    +      docsWriter = new Lucene41PostingsWriter(state);
     
           pulsingWriterInner = new PulsingPostingsWriter(2, docsWriter);
           pulsingWriter = new PulsingPostingsWriter(1, pulsingWriterInner);
    @@ -77,7 +77,7 @@ public final class NestedPulsingPostingsFormat extends PostingsFormat {
         PostingsReaderBase pulsingReader = null;
         boolean success = false;
         try {
    -      docsReader = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
    +      docsReader = new Lucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
           pulsingReaderInner = new PulsingPostingsReader(docsReader);
           pulsingReader = new PulsingPostingsReader(pulsingReaderInner);
           FieldsProducer ret = new BlockTreeTermsReader(
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
    index 8e46b10f385..d2760ae6151 100644
    --- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
    @@ -44,7 +44,7 @@ import java.util.zip.ZipFile;
     
     import org.apache.lucene.codecs.Codec;
     import org.apache.lucene.codecs.PostingsFormat;
    -import org.apache.lucene.codecs.lucene40.Lucene40Codec;
    +import org.apache.lucene.codecs.lucene41.Lucene41Codec;
     import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
     import org.apache.lucene.document.ByteDocValuesField;
     import org.apache.lucene.document.DerefBytesDocValuesField;
    @@ -651,7 +651,7 @@ public class _TestUtil {
         if (LuceneTestCase.VERBOSE) {
           System.out.println("forcing postings format to:" + format);
         }
    -    return new Lucene40Codec() {
    +    return new Lucene41Codec() {
           @Override
           public PostingsFormat getPostingsFormatForField(String field) {
             return format;
    diff --git a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
    index c5dbe80726a..fa32081b973 100644
    --- a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
    +++ b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
    @@ -2,7 +2,7 @@ package org.apache.solr.core;
     
     import org.apache.lucene.codecs.Codec;
     import org.apache.lucene.codecs.PostingsFormat;
    -import org.apache.lucene.codecs.lucene40.Lucene40Codec;
    +import org.apache.lucene.codecs.lucene41.Lucene41Codec;
     import org.apache.solr.schema.IndexSchema;
     import org.apache.solr.schema.SchemaAware;
     import org.apache.solr.schema.SchemaField;
    @@ -42,7 +42,7 @@ public class SchemaCodecFactory extends CodecFactory implements SchemaAware {
     
       @Override
       public void inform(final IndexSchema schema) {
    -    codec = new Lucene40Codec() {
    +    codec = new Lucene41Codec() {
           @Override
           public PostingsFormat getPostingsFormatForField(String field) {
             final SchemaField fieldOrNull = schema.getFieldOrNull(field);
    
    From fcb7a55d3b1579f9fcfea9bfa36a593420295cb4 Mon Sep 17 00:00:00 2001
    From: Robert Muir 
    Date: Fri, 12 Oct 2012 15:56:38 +0000
    Subject: [PATCH 04/20] svn:eol-style native
    
    git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1397628 13f79535-47bb-0310-9956-ffa450edef68
    
    From 28fe93b39b47f7002344168bfeb997f0c4521691 Mon Sep 17 00:00:00 2001
    From: Mark Robert Miller 
    Date: Fri, 12 Oct 2012 18:25:44 +0000
    Subject: [PATCH 05/20] SOLR-3939: Consider a sync attempt from leader to
     replica that fails due to 404 a success. SOLR-3940: Rejoining the leader
     election incorrectly triggers the code path for a fresh cluster start rather
     than fail over.
    
    git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1397665 13f79535-47bb-0310-9956-ffa450edef68
    ---
     solr/CHANGES.txt                              | 10 ++++--
     .../apache/solr/cloud/ElectionContext.java    |  2 +-
     .../org/apache/solr/cloud/LeaderElector.java  |  7 ++--
     .../org/apache/solr/cloud/ZkController.java   |  6 ++--
     .../java/org/apache/solr/update/PeerSync.java |  5 +++
     .../solr/cloud/BasicDistributedZkTest.java    | 36 +++++++++++++++----
     .../apache/solr/cloud/LeaderElectionTest.java |  9 +++--
     .../org/apache/solr/cloud/OverseerTest.java   |  4 +--
     8 files changed, 54 insertions(+), 25 deletions(-)
    
    diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
    index cb4852e907f..62a0fadde7b 100644
    --- a/solr/CHANGES.txt
    +++ b/solr/CHANGES.txt
    @@ -42,9 +42,7 @@ New Features
       values of a multiValued field in their original order when highlighting.
       (Joel Bernstein via yonik)
     
    -* SOLR-3929
    -support configuring IndexWriter max thread count in solrconfig
    -
    +* SOLR-3929: Support configuring IndexWriter max thread count in solrconfig.
       (phunt via Mark Miller)
     
     Optimizations
    @@ -73,6 +71,12 @@ Bug Fixes
     
     * SOLR-3917: Partial State on Schema-Browser UI is not defined for Dynamic
       Fields & Types (steffkes)
    +  
    +* SOLR-3939: Consider a sync attempt from leader to replica that fails due 
    +  to 404 a success. (Mark Miller, Joel Bernstein)
    +  
    +* SOLR-3940: Rejoining the leader election incorrectly triggers the code path
    +  for a fresh cluster start rather than fail over. (Mark Miller)
     
     Other Changes
     ----------------------
    diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
    index 7baa465ea68..77417e9ee06 100644
    --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
    +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
    @@ -324,7 +324,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
           SolrException.log(log, "Error trying to start recovery", t);
         }
         
    -    leaderElector.joinElection(this);
    +    leaderElector.joinElection(this, true);
       }
     
       private boolean shouldIBeLeader(ZkNodeProps leaderProps, SolrCore core) {
    diff --git a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
    index 4d3a016091b..07caa55734b 100644
    --- a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
    +++ b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
    @@ -18,7 +18,6 @@ package org.apache.solr.cloud;
      */
     
     import java.io.IOException;
    -import java.io.UnsupportedEncodingException;
     import java.util.ArrayList;
     import java.util.Collections;
     import java.util.Comparator;
    @@ -43,7 +42,7 @@ import org.slf4j.LoggerFactory;
      * Leader Election process. This class contains the logic by which a
      * leader is chosen. First call * {@link #setup(ElectionContext)} to ensure
      * the election process is init'd. Next call
    - * {@link #joinElection(ElectionContext)} to start the leader election.
    + * {@link #joinElection(ElectionContext, boolean)} to start the leader election.
      * 
      * The implementation follows the classic ZooKeeper recipe of creating an
      * ephemeral, sequential node for each candidate and then looking at the set
    @@ -203,7 +202,7 @@ public  class LeaderElector {
        * 
        * @return sequential node number
        */
    -  public int joinElection(ElectionContext context) throws KeeperException, InterruptedException, IOException {
    +  public int joinElection(ElectionContext context, boolean replacement) throws KeeperException, InterruptedException, IOException {
         final String shardsElectZkPath = context.electionPath + LeaderElector.ELECTION_NODE;
         
         long sessionId = zkClient.getSolrZooKeeper().getSessionId();
    @@ -259,7 +258,7 @@ public  class LeaderElector {
           }
         }
         int seq = getSeq(leaderSeqPath);
    -    checkIfIamLeader(seq, context, false);
    +    checkIfIamLeader(seq, context, replacement);
         
         return seq;
       }
    diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    index 0ccab0e6b72..db869ff4651 100644
    --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    @@ -191,7 +191,7 @@ public final class ZkController {
     
                   ZkController.this.overseer = new Overseer(shardHandler, adminPath, zkStateReader);
                   ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName());
    -              overseerElector.joinElection(context);
    +              overseerElector.joinElection(context, true);
                   zkStateReader.createClusterStateWatchersAndUpdate();
     
                 //  cc.newCmdDistribExecutor();
    @@ -422,7 +422,7 @@ public final class ZkController {
           this.overseer = new Overseer(shardHandler, adminPath, zkStateReader);
           ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName());
           overseerElector.setup(context);
    -      overseerElector.joinElection(context);
    +      overseerElector.joinElection(context, false);
           zkStateReader.createClusterStateWatchersAndUpdate();
           
         } catch (IOException e) {
    @@ -730,7 +730,7 @@ public final class ZkController {
     
         leaderElector.setup(context);
         electionContexts.put(coreZkNodeName, context);
    -    leaderElector.joinElection(context);
    +    leaderElector.joinElection(context, false);
       }
     
     
    diff --git a/solr/core/src/java/org/apache/solr/update/PeerSync.java b/solr/core/src/java/org/apache/solr/update/PeerSync.java
    index a98917e53ad..0466864595d 100644
    --- a/solr/core/src/java/org/apache/solr/update/PeerSync.java
    +++ b/solr/core/src/java/org/apache/solr/update/PeerSync.java
    @@ -312,6 +312,11 @@ public class PeerSync  {
             log.warn(msg() + " got a 503 from " + srsp.getShardAddress() + ", counting as success");
             return true;
           }
    +      
    +      if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrException && ((SolrException) srsp.getException()).code() == 404) {
    +        log.warn(msg() + " got a 404 from " + srsp.getShardAddress() + ", counting as success");
    +        return true;
    +      }
           // TODO: at least log???
           // srsp.getException().printStackTrace(System.out);
          
    diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
    index dcf3963f442..ab55358fe61 100644
    --- a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
    +++ b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
    @@ -49,6 +49,7 @@ import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
     import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
     import org.apache.solr.client.solrj.request.CoreAdminRequest;
     import org.apache.solr.client.solrj.request.CoreAdminRequest.Create;
    +import org.apache.solr.client.solrj.request.CoreAdminRequest.Unload;
     import org.apache.solr.client.solrj.request.QueryRequest;
     import org.apache.solr.client.solrj.response.CoreAdminResponse;
     import org.apache.solr.client.solrj.response.QueryResponse;
    @@ -742,10 +743,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
             0,
             ((HttpSolrServer) client).getBaseURL().length()
                 - DEFAULT_COLLECTION.length() - 1);
    -    createCollection(oneInstanceCollection2, collectionClients, baseUrl, 1, "slice1");
    -    createCollection(oneInstanceCollection2, collectionClients, baseUrl, 2, "slice2");
    -    createCollection(oneInstanceCollection2, collectionClients, baseUrl, 3, "slice2");
    -    createCollection(oneInstanceCollection2, collectionClients, baseUrl, 4, "slice1");
    +    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 1, "slice1");
    +    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 2, "slice2");
    +    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 3, "slice2");
    +    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 4, "slice1");
         
        while (pending != null && pending.size() > 0) {
           
    @@ -764,7 +765,7 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
         
         assertAllActive(oneInstanceCollection2, solrj.getZkStateReader());
         
    -    printLayout();
    +    //printLayout();
         
        // TODO: enable when we don't falsely get slice1...
        // solrj.getZkStateReader().getLeaderUrl(oneInstanceCollection2, "slice1", 30000);
    @@ -803,6 +804,27 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
         assertNotNull(slices);
         String roles = slices.get("slice1").getReplicasMap().values().iterator().next().getStr(ZkStateReader.ROLES_PROP);
         assertEquals("none", roles);
    +    
    +    
    +    ZkCoreNodeProps props = new ZkCoreNodeProps(solrj.getZkStateReader().getClusterState().getLeader(oneInstanceCollection2, "slice1"));
    +    
    +    // now test that unloading a core gets us a new leader
    +    HttpSolrServer server = new HttpSolrServer(baseUrl);
    +    Unload unloadCmd = new Unload(true);
    +    unloadCmd.setCoreName(props.getCoreName());
    +    
    +    String leader = props.getCoreUrl();
    +    
    +    server.request(unloadCmd);
    +    
    +    int tries = 50;
    +    while (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "slice1", 10000))) {
    +      Thread.sleep(100);
    +      if (tries-- == 0) {
    +        fail("Leader never changed");
    +      }
    +    }
    +
       }
     
       private void testSearchByCollectionName() throws SolrServerException {
    @@ -875,10 +897,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
     
       private void createCollection(String collection,
           List collectionClients, String baseUrl, int num) {
    -    createCollection(collection, collectionClients, baseUrl, num, null);
    +    createSolrCore(collection, collectionClients, baseUrl, num, null);
       }
       
    -  private void createCollection(final String collection,
    +  private void createSolrCore(final String collection,
           List collectionClients, final String baseUrl, final int num,
           final String shardId) {
         Callable call = new Callable() {
    diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
    index 1b12c659586..a4f69fcbfce 100644
    --- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
    +++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
    @@ -40,7 +40,6 @@ import org.apache.zookeeper.KeeperException;
     import org.apache.zookeeper.KeeperException.NoNodeException;
     import org.junit.AfterClass;
     import org.junit.BeforeClass;
    -import org.junit.Ignore;
     import org.junit.Test;
     
     @Slow
    @@ -114,7 +113,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
               elector, "shard1", "collection1", Integer.toString(nodeNumber),
               props, zkStateReader);
           elector.setup(context);
    -      seq = elector.joinElection(context);
    +      seq = elector.joinElection(context, false);
           electionDone = true;
           seqToThread.put(seq, this);
         }
    @@ -175,7 +174,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
         ElectionContext context = new ShardLeaderElectionContextBase(elector,
             "shard2", "collection1", "dummynode1", props, zkStateReader);
         elector.setup(context);
    -    elector.joinElection(context);
    +    elector.joinElection(context, false);
         assertEquals("http://127.0.0.1/solr/",
             getLeaderUrl("collection1", "shard2"));
       }
    @@ -188,7 +187,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
         ElectionContext firstContext = new ShardLeaderElectionContextBase(first,
             "slice1", "collection2", "dummynode1", props, zkStateReader);
         first.setup(firstContext);
    -    first.joinElection(firstContext);
    +    first.joinElection(firstContext, false);
     
         Thread.sleep(1000);
         assertEquals("original leader was not registered", "http://127.0.0.1/solr/1/", getLeaderUrl("collection2", "slice1"));
    @@ -199,7 +198,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
         ElectionContext context = new ShardLeaderElectionContextBase(second,
             "slice1", "collection2", "dummynode1", props, zkStateReader);
         second.setup(context);
    -    second.joinElection(context);
    +    second.joinElection(context, false);
         Thread.sleep(1000);
         assertEquals("original leader should have stayed leader", "http://127.0.0.1/solr/1/", getLeaderUrl("collection2", "slice1"));
         firstContext.cancelElection();
    diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
    index 6520c6bd42b..59071c7d951 100644
    --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
    +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
    @@ -139,7 +139,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
               ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase(
                   elector, shardId, collection, nodeName + "_" + coreName, props,
                   zkStateReader);
    -          elector.joinElection(ctx);
    +          elector.joinElection(ctx, false);
               return shardId;
             }
             Thread.sleep(500);
    @@ -876,7 +876,7 @@ public class OverseerTest extends SolrTestCaseJ4 {
             new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader);
         ElectionContext ec = new OverseerElectionContext(zkClient, overseer, address.replaceAll("/", "_"));
         overseerElector.setup(ec);
    -    overseerElector.joinElection(ec);
    +    overseerElector.joinElection(ec, false);
         return zkClient;
       }
       
    
    From 25afdaf65b81fec19daa255c0a8bbe94466a9598 Mon Sep 17 00:00:00 2001
    From: Mark Robert Miller 
    Date: Fri, 12 Oct 2012 19:31:13 +0000
    Subject: [PATCH 06/20] SOLR-3941: The "commitOnLeader" part of distributed
     recovery can use openSearcher=false.
    
    git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1397698 13f79535-47bb-0310-9956-ffa450edef68
    ---
     solr/CHANGES.txt                                               | 3 +++
     solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java | 2 ++
     2 files changed, 5 insertions(+)
    
    diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
    index 62a0fadde7b..779d9d71562 100644
    --- a/solr/CHANGES.txt
    +++ b/solr/CHANGES.txt
    @@ -60,6 +60,9 @@ Optimizations
     
     * SOLR-3734: Improve Schema-Browser Handling for CopyField using
       dynamicField's (steffkes)
    +  
    +* SOLR-3941: The "commitOnLeader" part of distributed recovery can use
    +  openSearcher=false. (Tomas Fernandez Lobbe via Mark Miller)
     
     Bug Fixes
     ----------------------
    diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
    index fab5c2a4a60..35fb620746c 100644
    --- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
    +++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
    @@ -37,6 +37,7 @@ import org.apache.solr.common.cloud.ZkNodeProps;
     import org.apache.solr.common.cloud.ZkStateReader;
     import org.apache.solr.common.cloud.ZooKeeperException;
     import org.apache.solr.common.params.ModifiableSolrParams;
    +import org.apache.solr.common.params.UpdateParams;
     import org.apache.solr.core.CoreContainer;
     import org.apache.solr.core.CoreDescriptor;
     import org.apache.solr.core.RequestHandlers.LazyRequestHandlerWrapper;
    @@ -177,6 +178,7 @@ public class RecoveryStrategy extends Thread implements ClosableThread {
         UpdateRequest ureq = new UpdateRequest();
         ureq.setParams(new ModifiableSolrParams());
         ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
    +    ureq.getParams().set(UpdateParams.OPEN_SEARCHER, false);
         ureq.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, true).process(
             server);
         server.shutdown();
    
    From 2ea2adcf6b3633aa895b841eedc09ab540dc9b4d Mon Sep 17 00:00:00 2001
    From: Robert Muir 
    Date: Sat, 13 Oct 2012 15:27:24 +0000
    Subject: [PATCH 07/20] LUCENE-4446: wrap up cutover to blockpostingsformat
    
    git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4446@1397858 13f79535-47bb-0310-9956-ffa450edef68
    ---
     .../codecs/memory/DirectPostingsFormat.java   |  8 +--
     .../pulsing/Pulsing41PostingsFormat.java      |  6 +--
     .../java/org/apache/lucene/codecs/Codec.java  |  2 +-
     .../org/apache/lucene/codecs/FilterCodec.java |  2 +-
     .../lucene/codecs/lucene40/Lucene40Codec.java |  4 +-
     .../lucene40/Lucene40PostingsBaseFormat.java  |  5 +-
     .../lucene40/Lucene40PostingsFormat.java      | 30 ++++-------
     .../lucene40/Lucene40PostingsReader.java      | 22 +++++---
     .../lucene40/Lucene40SkipListReader.java      |  3 +-
     .../lucene41/Lucene41PostingsFormat.java      |  6 +++
     .../lucene41/Lucene41PostingsWriter.java      |  5 +-
     .../lucene/codecs/lucene41/package.html       |  2 +-
     .../org/apache/lucene/codecs/package.html     |  4 +-
     .../lucene/index/LiveIndexWriterConfig.java   |  2 +-
     .../codecs/lucene40/TestReuseDocsEnum.java    |  8 +--
     .../lucene40/Lucene40PostingsWriter.java      | 16 ++----
     .../lucene40/Lucene40RWPostingsFormat.java    | 50 +++++++++++++++++++
     .../lucene40/Lucene40SkipListWriter.java      |  3 +-
     .../lucene/codecs/lucene40/package.html       | 25 ++++++++++
     .../mockrandom/MockRandomPostingsFormat.java  | 10 ++--
     .../org/apache/lucene/index/RandomCodec.java  |  2 +-
     .../util/TestRuleSetupAndRestoreClassEnv.java |  4 +-
     .../org.apache.lucene.codecs.PostingsFormat   |  2 +-
     .../solr/collection1/conf/schema_codec.xml    |  4 +-
     .../apache/solr/core/TestCodecSupport.java    | 14 +++---
     25 files changed, 154 insertions(+), 85 deletions(-)
     rename lucene/{core => test-framework}/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java (95%)
     create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java
     rename lucene/{core => test-framework}/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java (99%)
     create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html
    
    diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
    index 97e22f4a03d..936d4ed5d61 100644
    --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
    +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
    @@ -27,7 +27,7 @@ import java.util.TreeMap;
     import org.apache.lucene.codecs.FieldsConsumer;
     import org.apache.lucene.codecs.FieldsProducer;
     import org.apache.lucene.codecs.PostingsFormat;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; // javadocs
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
     import org.apache.lucene.index.DocsAndPositionsEnum;
     import org.apache.lucene.index.DocsEnum;
     import org.apache.lucene.index.FieldInfo.IndexOptions;
    @@ -52,7 +52,7 @@ import org.apache.lucene.util.automaton.Transition;
     //   - build depth-N prefix hash?
     //   - or: longer dense skip lists than just next byte?
     
    -/** Wraps {@link Lucene40PostingsFormat} format for on-disk
    +/** Wraps {@link Lucene41PostingsFormat} format for on-disk
      *  storage, but then at read time loads and stores all
      *  terms & postings directly in RAM as byte[], int[].
      *
    @@ -100,12 +100,12 @@ public final class DirectPostingsFormat extends PostingsFormat {
       
       @Override
       public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    -    return PostingsFormat.forName("Lucene40").fieldsConsumer(state);
    +    return PostingsFormat.forName("Lucene41").fieldsConsumer(state);
       }
     
       @Override
       public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
    -    FieldsProducer postings = PostingsFormat.forName("Lucene40").fieldsProducer(state);
    +    FieldsProducer postings = PostingsFormat.forName("Lucene41").fieldsProducer(state);
         if (state.context.context != IOContext.Context.MERGE) {
           FieldsProducer loadedPostings;
           try {
    diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java
    index 7fd7fb0504a..9946062e09b 100644
    --- a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java
    +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java
    @@ -28,17 +28,17 @@ import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
      */
     public class Pulsing41PostingsFormat extends PulsingPostingsFormat {
     
    -  /** Inlines docFreq=1 terms, otherwise uses the normal "Lucene40" format. */
    +  /** Inlines docFreq=1 terms, otherwise uses the normal "Lucene41" format. */
       public Pulsing41PostingsFormat() {
         this(1);
       }
     
    -  /** Inlines docFreq=freqCutoff terms, otherwise uses the normal "Lucene40" format. */
    +  /** Inlines docFreq=freqCutoff terms, otherwise uses the normal "Lucene41" format. */
       public Pulsing41PostingsFormat(int freqCutoff) {
         this(freqCutoff, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
       }
     
    -  /** Inlines docFreq=freqCutoff terms, otherwise uses the normal "Lucene40" format. */
    +  /** Inlines docFreq=freqCutoff terms, otherwise uses the normal "Lucene41" format. */
       public Pulsing41PostingsFormat(int freqCutoff, int minBlockSize, int maxBlockSize) {
         super("Pulsing41", new Lucene41PostingsBaseFormat(), freqCutoff, minBlockSize, maxBlockSize);
       }
    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
    index 1892df6d300..7a473a3ed38 100644
    --- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
    +++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
    @@ -119,7 +119,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
         loader.reload(classloader);
       }
       
    -  private static Codec defaultCodec = Codec.forName("Lucene40");
    +  private static Codec defaultCodec = Codec.forName("Lucene41");
       
       /** expert: returns the default codec used for newly created
        *  {@link IndexWriterConfig}s.
    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
    index ca8e439b6d8..12f17197d2b 100644
    --- a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
    +++ b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
    @@ -21,7 +21,7 @@ package org.apache.lucene.codecs;
      * A codec that forwards all its method calls to another codec.
      * 

    * Extend this class when you need to reuse the functionality of an existing - * codec. For example, if you want to build a codec that redefines Lucene40's + * codec. For example, if you want to build a codec that redefines Lucene41's * {@link LiveDocsFormat}: *

      *   public final class CustomCodec extends FilterCodec {
    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
    index 076eeeaeadd..a0d66af61d0 100644
    --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
    +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
    @@ -36,12 +36,12 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
      * {@link FilterCodec}.
      *
      * @see org.apache.lucene.codecs.lucene40 package documentation for file format details.
    - * @lucene.experimental
    + * @deprecated Only for reading old 4.0 segments
      */
     // NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever
     // if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
     // (it writes a minor version, etc).
    -// nocommit: make readonly and add impersonator
    +@Deprecated
     public final class Lucene40Codec extends Codec {
       private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat();
       private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
    index df6611922e2..eaf452d6252 100644
    --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
    +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
    @@ -29,9 +29,10 @@ import org.apache.lucene.index.SegmentWriteState;
      * Provides a {@link PostingsReaderBase} and {@link
      * PostingsWriterBase}.
      *
    - * @lucene.experimental */
    + * @deprecated Only for reading old 4.0 segments */
     
     // TODO: should these also be named / looked up via SPI?
    +@Deprecated
     public final class Lucene40PostingsBaseFormat extends PostingsBaseFormat {
     
       /** Sole constructor. */
    @@ -46,6 +47,6 @@ public final class Lucene40PostingsBaseFormat extends PostingsBaseFormat {
     
       @Override
       public PostingsWriterBase postingsWriterBase(SegmentWriteState state) throws IOException {
    -    return new Lucene40PostingsWriter(state);
    +    throw new UnsupportedOperationException("this codec can only be used for reading");
       }
     }
    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
    index 16d9c47ed97..1f9c28efdb4 100644
    --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
    +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
    @@ -211,15 +211,18 @@ import org.apache.lucene.util.fst.FST; // javadocs
      * previous occurrence and an OffsetLength follows. Offset data is only written for
      * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.

    * - * @lucene.experimental */ + * @deprecated Only for reading old 4.0 segments */ // TODO: this class could be created by wrapping // BlockTreeTermsDict around Lucene40PostingsBaseFormat; ie // we should not duplicate the code from that class here: -public final class Lucene40PostingsFormat extends PostingsFormat { +@Deprecated +public class Lucene40PostingsFormat extends PostingsFormat { - private final int minBlockSize; - private final int maxBlockSize; + /** minimum items (terms or sub-blocks) per block for BlockTree */ + protected final int minBlockSize; + /** maximum items (terms or sub-blocks) per block for BlockTree */ + protected final int maxBlockSize; /** Creates {@code Lucene40PostingsFormat} with default * settings. */ @@ -231,7 +234,7 @@ public final class Lucene40PostingsFormat extends PostingsFormat { * values for {@code minBlockSize} and {@code * maxBlockSize} passed to block terms dictionary. * @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */ - public Lucene40PostingsFormat(int minBlockSize, int maxBlockSize) { + private Lucene40PostingsFormat(int minBlockSize, int maxBlockSize) { super("Lucene40"); this.minBlockSize = minBlockSize; assert minBlockSize > 1; @@ -240,22 +243,7 @@ public final class Lucene40PostingsFormat extends PostingsFormat { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase docs = new Lucene40PostingsWriter(state); - - // TODO: should we make the terms index more easily - // pluggable? Ie so that this codec would record which - // index impl was used, and switch on loading? - // Or... you must make a new Codec for this? - boolean success = false; - try { - FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize); - success = true; - return ret; - } finally { - if (!success) { - docs.close(); - } - } + throw new UnsupportedOperationException("this codec can only be used for reading"); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java index 64d2e49b1ff..a3729e2f1da 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java @@ -45,10 +45,21 @@ import org.apache.lucene.util.IOUtils; * postings format. * * @see Lucene40PostingsFormat - * @lucene.experimental */ - + * @deprecated Only for reading old 4.0 segments */ +@Deprecated public class Lucene40PostingsReader extends PostingsReaderBase { + final static String TERMS_CODEC = "Lucene40PostingsWriterTerms"; + final static String FRQ_CODEC = "Lucene40PostingsWriterFrq"; + final static String PRX_CODEC = "Lucene40PostingsWriterPrx"; + + //private static boolean DEBUG = BlockTreeTermsWriter.DEBUG; + + // Increment version to change it: + final static int VERSION_START = 0; + final static int VERSION_LONG_SKIP = 1; + final static int VERSION_CURRENT = VERSION_LONG_SKIP; + private final IndexInput freqIn; private final IndexInput proxIn; // public static boolean DEBUG = BlockTreeTermsWriter.DEBUG; @@ -67,7 +78,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { try { freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION), ioContext); - CodecUtil.checkHeader(freqIn, Lucene40PostingsWriter.FRQ_CODEC, Lucene40PostingsWriter.VERSION_START,Lucene40PostingsWriter.VERSION_CURRENT); + CodecUtil.checkHeader(freqIn, FRQ_CODEC, VERSION_START, VERSION_CURRENT); // TODO: hasProx should (somehow!) become codec private, // but it's tricky because 1) FIS.hasProx is global (it // could be all fields that have prox are written by a @@ -79,7 +90,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { if (fieldInfos.hasProx()) { proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION), ioContext); - CodecUtil.checkHeader(proxIn, Lucene40PostingsWriter.PRX_CODEC, Lucene40PostingsWriter.VERSION_START,Lucene40PostingsWriter.VERSION_CURRENT); + CodecUtil.checkHeader(proxIn, PRX_CODEC, VERSION_START, VERSION_CURRENT); } else { proxIn = null; } @@ -97,8 +108,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { public void init(IndexInput termsIn) throws IOException { // Make sure we are talking to the matching past writer - CodecUtil.checkHeader(termsIn, Lucene40PostingsWriter.TERMS_CODEC, - Lucene40PostingsWriter.VERSION_START, Lucene40PostingsWriter.VERSION_CURRENT); + CodecUtil.checkHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT); skipInterval = termsIn.readInt(); maxSkipLevels = termsIn.readInt(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java index 4cef37a5977..1580a390ba9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java @@ -28,8 +28,9 @@ import org.apache.lucene.store.IndexInput; * that stores positions and payloads. * * @see Lucene40PostingsFormat - * @lucene.experimental + * @deprecated Only for reading old 4.0 segments */ +@Deprecated public class Lucene40SkipListReader extends MultiLevelSkipListReader { private boolean currentFieldStoresPayloads; private boolean currentFieldStoresOffsets; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java index d1c21ed2846..3cbc9653bed 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java @@ -380,10 +380,16 @@ public final class Lucene41PostingsFormat extends PostingsFormat { // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding public final static int BLOCK_SIZE = 128; + /** Creates {@code Lucene41PostingsFormat} with default + * settings. */ public Lucene41PostingsFormat() { this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE); } + /** Creates {@code Lucene41PostingsFormat} with custom + * values for {@code minBlockSize} and {@code + * maxBlockSize} passed to block terms dictionary. + * @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */ public Lucene41PostingsFormat(int minTermBlockSize, int maxTermBlockSize) { super("Lucene41"); this.minTermBlockSize = minTermBlockSize; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java index 51e2b02422f..4298ea6c13b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java @@ -63,10 +63,9 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase { final static String POS_CODEC = "Lucene41PostingsWriterPos"; final static String PAY_CODEC = "Lucene41PostingsWriterPay"; - // Increment version to change it: nocommit: we can start at 0 + // Increment version to change it final static int VERSION_START = 0; - final static int VERSION_NO_OFFSETS_IN_SKIPDATA = 1; // LUCENE-4443 - final static int VERSION_CURRENT = VERSION_NO_OFFSETS_IN_SKIPDATA; + final static int VERSION_CURRENT = VERSION_START; final IndexOutput docOut; final IndexOutput posOut; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html index aff3d7a572f..14782803a1c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html @@ -153,7 +153,7 @@ its title, url, or an identifier to access a database. The set of stored fields returned for each hit when searching. This is keyed by document number.
  • -{@link org.apache.lucene.codecs.lucene41Lucene41PostingsFormat Term dictionary}. +{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}. A dictionary containing all of the terms used in all of the indexed fields of all of the documents. The dictionary also contains the number of documents which contain the term, and pointers to the term's frequency and diff --git a/lucene/core/src/java/org/apache/lucene/codecs/package.html b/lucene/core/src/java/org/apache/lucene/codecs/package.html index e6de64d057b..91a65458ac1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/package.html @@ -61,8 +61,8 @@ name of your codec. If you just want to customise the {@link org.apache.lucene.codecs.PostingsFormat}, or use different postings formats for different fields, then you can register your custom postings format in the same way (in META-INF/services/org.apache.lucene.codecs.PostingsFormat), and then extend the default - {@link org.apache.lucene.codecs.lucene40.Lucene40Codec} and override - {@link org.apache.lucene.codecs.lucene40.Lucene40Codec#getPostingsFormatForField(String)} to return your custom + {@link org.apache.lucene.codecs.lucene41.Lucene41Codec} and override + {@link org.apache.lucene.codecs.lucene41.Lucene41Codec#getPostingsFormatForField(String)} to return your custom postings format.

    diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java index 4d76f595494..9201642a750 100755 --- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java @@ -191,7 +191,7 @@ public class LiveIndexWriterConfig { * for a block), you would instead use {@link Lucene41PostingsFormat#Lucene41PostingsFormat(int, int)}. * which can also be configured on a per-field basis: *
    -   * //customize Lucene40PostingsFormat, passing minBlockSize=50, maxBlockSize=100
    +   * //customize Lucene41PostingsFormat, passing minBlockSize=50, maxBlockSize=100
        * final PostingsFormat tweakedPostings = new Lucene41PostingsFormat(50, 100);
        * iwc.setCodec(new Lucene41Codec() {
        *   @Override
    diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
    index 175f7a08aa7..98c7cb5b2f2 100644
    --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
    +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
    @@ -38,12 +38,12 @@ import org.apache.lucene.util.LineFileDocs;
     import org.apache.lucene.util.LuceneTestCase;
     import org.apache.lucene.util._TestUtil;
     
    -// nocommit: really this should be in BaseTestPF or somewhere else? useful test!
    +// TODO: really this should be in BaseTestPF or somewhere else? useful test!
     public class TestReuseDocsEnum extends LuceneTestCase {
     
       public void testReuseDocsEnumNoReuse() throws IOException {
         Directory dir = newDirectory();
    -    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat());
    +    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
         RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
         int numdocs = atLeast(20);
    @@ -70,7 +70,7 @@ public class TestReuseDocsEnum extends LuceneTestCase {
       // tests for reuse only if bits are the same either null or the same instance
       public void testReuseDocsEnumSameBitsOrNull() throws IOException {
         Directory dir = newDirectory();
    -    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat());
    +    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
         RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
         int numdocs = atLeast(20);
    @@ -114,7 +114,7 @@ public class TestReuseDocsEnum extends LuceneTestCase {
       // make sure we never reuse from another reader even if it is the same field & codec etc
       public void testReuseDocsEnumDifferentReader() throws IOException {
         Directory dir = newDirectory();
    -    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat());
    +    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
         RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
         int numdocs = atLeast(20);
    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
    similarity index 95%
    rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
    rename to lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
    index 44b953bd780..65d10b2a385 100644
    --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
    @@ -45,16 +45,6 @@ import org.apache.lucene.util.IOUtils;
      * @lucene.experimental 
      */
     public final class Lucene40PostingsWriter extends PostingsWriterBase {
    -  final static String TERMS_CODEC = "Lucene40PostingsWriterTerms";
    -  final static String FRQ_CODEC = "Lucene40PostingsWriterFrq";
    -  final static String PRX_CODEC = "Lucene40PostingsWriterPrx";
    -
    -  //private static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
    -  
    -  // Increment version to change it:
    -  final static int VERSION_START = 0;
    -  final static int VERSION_LONG_SKIP = 1;
    -  final static int VERSION_CURRENT = VERSION_LONG_SKIP;
     
       final IndexOutput freqOut;
       final IndexOutput proxOut;
    @@ -111,7 +101,7 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
         boolean success = false;
         IndexOutput proxOut = null;
         try {
    -      CodecUtil.writeHeader(freqOut, FRQ_CODEC, VERSION_CURRENT);
    +      CodecUtil.writeHeader(freqOut, Lucene40PostingsReader.FRQ_CODEC, Lucene40PostingsReader.VERSION_CURRENT);
           // TODO: this is a best effort, if one of these fields has no postings
           // then we make an empty prx file, same as if we are wrapped in 
           // per-field postingsformat. maybe... we shouldn't
    @@ -121,7 +111,7 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
             // prox file
             fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION);
             proxOut = state.directory.createOutput(fileName, state.context);
    -        CodecUtil.writeHeader(proxOut, PRX_CODEC, VERSION_CURRENT);
    +        CodecUtil.writeHeader(proxOut, Lucene40PostingsReader.PRX_CODEC, Lucene40PostingsReader.VERSION_CURRENT);
           } else {
             // Every field omits TF so we will write no prox file
             proxOut = null;
    @@ -146,7 +136,7 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
       @Override
       public void start(IndexOutput termsOut) throws IOException {
         this.termsOut = termsOut;
    -    CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
    +    CodecUtil.writeHeader(termsOut, Lucene40PostingsReader.TERMS_CODEC, Lucene40PostingsReader.VERSION_CURRENT);
         termsOut.writeInt(skipInterval);                // write skipInterval
         termsOut.writeInt(maxSkipLevels);               // write maxSkipLevels
         termsOut.writeInt(skipMinimum);                 // write skipMinimum
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java
    new file mode 100644
    index 00000000000..f749216bf38
    --- /dev/null
    +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java
    @@ -0,0 +1,50 @@
    +package org.apache.lucene.codecs.lucene40;
    +
    +import java.io.IOException;
    +
    +import org.apache.lucene.codecs.BlockTreeTermsWriter;
    +import org.apache.lucene.codecs.FieldsConsumer;
    +import org.apache.lucene.codecs.PostingsWriterBase;
    +import org.apache.lucene.index.SegmentWriteState;
    +
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +/**
    + * Read-write version of {@link Lucene40PostingsFormat} for testing.
    + */
    +public class Lucene40RWPostingsFormat extends Lucene40PostingsFormat {
    +  @Override
    +  public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
    +    PostingsWriterBase docs = new Lucene40PostingsWriter(state);
    +
    +    // TODO: should we make the terms index more easily
    +    // pluggable?  Ie so that this codec would record which
    +    // index impl was used, and switch on loading?
    +    // Or... you must make a new Codec for this?
    +    boolean success = false;
    +    try {
    +      FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize);
    +      success = true;
    +      return ret;
    +    } finally {
    +      if (!success) {
    +        docs.close();
    +      }
    +    }
    +  }
    +}
    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
    similarity index 99%
    rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
    rename to lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
    index 34cdac1dcac..62bd3047878 100644
    --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
    @@ -29,8 +29,9 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter;
      * that stores positions and payloads.
      * 
      * @see Lucene40PostingsFormat
    - * @lucene.experimental
    + * @deprecated Only for reading old 4.0 segments
      */
    +@Deprecated
     public class Lucene40SkipListWriter extends MultiLevelSkipListWriter {
       private int[] lastSkipDoc;
       private int[] lastSkipPayloadLength;
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html
    new file mode 100644
    index 00000000000..c83302cf5b7
    --- /dev/null
    +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html
    @@ -0,0 +1,25 @@
    +
    +
    +
    +
    +   
    +
    +
    +Support for testing {@link org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat}.
    +
    +
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
    index 9aa48f52091..55958b14970 100644
    --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
    @@ -38,8 +38,7 @@ import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase;
     import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
     import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader;
     import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader;
     import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter;
     import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat;
     import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat;
    @@ -175,8 +174,8 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
           if (LuceneTestCase.VERBOSE) {
             System.out.println("MockRandomCodec: writing Standard postings");
           }
    -      // nocommit: way to randomize skipInterval and acceptibleOverHead?!
    -      postingsWriter = new Lucene40PostingsWriter(state, skipInterval);
    +      // TODO: randomize variables like acceptibleOverHead?!
    +      postingsWriter = new Lucene41PostingsWriter(state, skipInterval);
         }
     
         if (random.nextBoolean()) {
    @@ -315,8 +314,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
           if (LuceneTestCase.VERBOSE) {
             System.out.println("MockRandomCodec: reading Standard postings");
           }
    -      // nocommit
    -      postingsReader = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
    +      postingsReader = new Lucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
         }
     
         if (random.nextBoolean()) {
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
    index 03737a41983..61de20efe1f 100644
    --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
    @@ -99,7 +99,7 @@ public class RandomCodec extends Lucene41Codec {
             new Pulsing41PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
             // add pulsing again with (usually) different parameters
             new Pulsing41PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
    -        //TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene40Postings to be constructed 
    +        //TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene41Postings to be constructed 
             //with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing 
             //with such "wrapper" classes?
             new TestBloomFilteredLucene41Postings(),                
    diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
    index afa1ccdb30e..e87720d59f5 100644
    --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
    +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
    @@ -32,6 +32,7 @@ import org.apache.lucene.codecs.PostingsFormat;
     import org.apache.lucene.codecs.asserting.AssertingCodec;
     import org.apache.lucene.codecs.compressing.CompressingCodec;
     import org.apache.lucene.codecs.lucene40.Lucene40Codec;
    +import org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat;
     import org.apache.lucene.codecs.lucene41.Lucene41Codec;
     import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
     import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
    @@ -133,13 +134,12 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
         savedCodec = Codec.getDefault();
         int randomVal = random.nextInt(10);
     
    -
         if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) &&
                                               "random".equals(TEST_POSTINGSFORMAT) &&
                                               randomVal < 2 &&
                                               !shouldAvoidCodec("Lucene40"))) {
           codec = Codec.forName("Lucene40");
    -      // nocommit: assert (codec instanceof PreFlexRWCodec) : "fix your classpath to have tests-framework.jar before lucene-core.jar";
    +      assert (PostingsFormat.forName("Lucene40") instanceof Lucene40RWPostingsFormat) : "fix your classpath to have tests-framework.jar before lucene-core.jar";
         } else if (!"random".equals(TEST_POSTINGSFORMAT)) {
           final PostingsFormat format;
           if ("MockRandom".equals(TEST_POSTINGSFORMAT)) {
    diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
    index 98619f2243b..3b7b3836da4 100644
    --- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
    +++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
    @@ -22,4 +22,4 @@ org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat
     org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds
     org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings
     org.apache.lucene.codecs.asserting.AssertingPostingsFormat
    -
    +org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat
    diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
    index e08ab8deb99..e28cec73722 100644
    --- a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
    +++ b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
    @@ -17,9 +17,9 @@
     -->
     
      
    -  
    +  
       
    -  
    +  
         
       
      
    diff --git a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
    index 3bf7e713eea..a49fbf98397 100644
    --- a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
    +++ b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
    @@ -37,14 +37,14 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
         Map fields = h.getCore().getSchema().getFields();
         SchemaField schemaField = fields.get("string_pulsing_f");
         PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat();
    -    assertEquals("Pulsing40", format.getPostingsFormatForField(schemaField.getName()).getName());
    +    assertEquals("Pulsing41", format.getPostingsFormatForField(schemaField.getName()).getName());
         schemaField = fields.get("string_simpletext_f");
         assertEquals("SimpleText",
             format.getPostingsFormatForField(schemaField.getName()).getName());
         schemaField = fields.get("string_standard_f");
    -    assertEquals("Lucene40", format.getPostingsFormatForField(schemaField.getName()).getName());
    +    assertEquals("Lucene41", format.getPostingsFormatForField(schemaField.getName()).getName());
         schemaField = fields.get("string_f");
    -    assertEquals("Lucene40", format.getPostingsFormatForField(schemaField.getName()).getName());
    +    assertEquals("Lucene41", format.getPostingsFormatForField(schemaField.getName()).getName());
       }
     
       public void testDynamicFields() {
    @@ -53,10 +53,10 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
     
         assertEquals("SimpleText", format.getPostingsFormatForField("foo_simple").getName());
         assertEquals("SimpleText", format.getPostingsFormatForField("bar_simple").getName());
    -    assertEquals("Pulsing40", format.getPostingsFormatForField("foo_pulsing").getName());
    -    assertEquals("Pulsing40", format.getPostingsFormatForField("bar_pulsing").getName());
    -    assertEquals("Lucene40", format.getPostingsFormatForField("foo_standard").getName());
    -    assertEquals("Lucene40", format.getPostingsFormatForField("bar_standard").getName());
    +    assertEquals("Pulsing41", format.getPostingsFormatForField("foo_pulsing").getName());
    +    assertEquals("Pulsing41", format.getPostingsFormatForField("bar_pulsing").getName());
    +    assertEquals("Lucene41", format.getPostingsFormatForField("foo_standard").getName());
    +    assertEquals("Lucene41", format.getPostingsFormatForField("bar_standard").getName());
       }
     
       public void testUnknownField() {
    
    From 2db5d70abae6ef0a98a44bc3f55986960778dcfc Mon Sep 17 00:00:00 2001
    From: Robert Muir 
    Date: Sat, 13 Oct 2012 15:31:30 +0000
    Subject: [PATCH 08/20] missing javadocs
    
    git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4446@1397860 13f79535-47bb-0310-9956-ffa450edef68
    ---
     .../apache/lucene/codecs/lucene41/Lucene41PostingsReader.java  | 1 +
     .../apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java  | 3 +++
     2 files changed, 4 insertions(+)
    
    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
    index 7e14977bc25..6292b18e6a7 100644
    --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
    +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
    @@ -62,6 +62,7 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
     
       // public static boolean DEBUG = false;
     
    +  /** Sole constructor. */
       public Lucene41PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException {
         boolean success = false;
         IndexInput docIn = null;
    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
    index 4298ea6c13b..19391afcaa7 100644
    --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
    +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
    @@ -113,6 +113,8 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
       private final ForUtil forUtil;
       private final Lucene41SkipWriter skipWriter;
       
    +  /** Creates a postings writer with the specified PackedInts overhead ratio */
    +  // TODO: does this ctor even make sense?
       public Lucene41PostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) throws IOException {
         super();
     
    @@ -181,6 +183,7 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
         encoded = new byte[MAX_ENCODED_SIZE];
       }
     
    +  /** Creates a postings writer with PackedInts.COMPACT */
       public Lucene41PostingsWriter(SegmentWriteState state) throws IOException {
         this(state, PackedInts.COMPACT);
       }
    
    From ef3f5cea1b168bebf0938f0cc2c6d9caeff69ff0 Mon Sep 17 00:00:00 2001
    From: Robert Muir 
    Date: Sun, 14 Oct 2012 20:29:18 +0000
    Subject: [PATCH 09/20] LUCENE-4446: flip file formats to point to 4.1 format
    
    git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1398128 13f79535-47bb-0310-9956-ffa450edef68
    ---
     lucene/site/xsl/index.xsl | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/lucene/site/xsl/index.xsl b/lucene/site/xsl/index.xsl
    index 08352bcdb82..0984c10153b 100644
    --- a/lucene/site/xsl/index.xsl
    +++ b/lucene/site/xsl/index.xsl
    @@ -65,7 +65,7 @@
                 
  • Changes: List of changes in this release.
  • Migration Guide: What changed in Lucene 4; how to migrate code from Lucene 3.x.
  • JRE Version Migration: Information about upgrading between major JRE versions.
  • -
  • File Formats: Guide to the supported index format used by Lucene. This can be customized by using an alternate codec.
  • +
  • File Formats: Guide to the supported index format used by Lucene. This can be customized by using an alternate codec.
  • Search and Scoring in Lucene: Introduction to how Lucene scores documents.
  • Classic Scoring Formula: Formula of Lucene's classic Vector Space implementation. (look here for other models)
  • Classic QueryParser Syntax: Overview of the Classic QueryParser's syntax and features.
  • From 715f0230576c37677902c2da4a080e11d0d0156c Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Mon, 15 Oct 2012 23:06:34 +0000 Subject: [PATCH 10/20] LUCENE-4006: Add system requirements page (markdown) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1398564 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/SYSTEM_REQUIREMENTS.txt | 16 ++++++++++++++++ lucene/build.xml | 3 ++- lucene/site/xsl/index.xsl | 1 + solr/SYSTEM_REQUIREMENTS.txt | 16 ++++++++++++++++ solr/build.xml | 10 ++++------ solr/site/xsl/index.xsl | 1 + 6 files changed, 40 insertions(+), 7 deletions(-) create mode 100644 lucene/SYSTEM_REQUIREMENTS.txt create mode 100644 solr/SYSTEM_REQUIREMENTS.txt diff --git a/lucene/SYSTEM_REQUIREMENTS.txt b/lucene/SYSTEM_REQUIREMENTS.txt new file mode 100644 index 00000000000..9b38cbf24f6 --- /dev/null +++ b/lucene/SYSTEM_REQUIREMENTS.txt @@ -0,0 +1,16 @@ +# System Requirements + +Apache Solr runs of Java 6 or greater. When using Java 7, be sure to +install at least Update 1! With all Java versions it is strongly +recommended to not use experimental `-XX` JVM options. It is also +recommended to always use the latest update version of your Java VM, +because bugs may affect Solr. An overview of known JVM bugs can be +found on http://wiki.apache.org/lucene-java/SunJavaBugs. + +CPU, disk and memory requirements are based on the many choices made in +implementing Solr (document size, number of documents, and number of +hits retrieved to name a few). The benchmarks page has some information +related to performance on particular platforms. + +*To build Apache Solr from source, refer to the `BUILD.txt` file in +the distribution directory.* diff --git a/lucene/build.xml b/lucene/build.xml index b26d455ec79..d8d3f0fb440 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -33,6 +33,7 @@ - + diff --git a/lucene/site/xsl/index.xsl b/lucene/site/xsl/index.xsl index 0984c10153b..bf77408d671 100644 --- a/lucene/site/xsl/index.xsl +++ b/lucene/site/xsl/index.xsl @@ -63,6 +63,7 @@

    Reference Documents

    • Changes: List of changes in this release.
    • +
    • System Requirements: Minimum and supported Java versions.
    • Migration Guide: What changed in Lucene 4; how to migrate code from Lucene 3.x.
    • JRE Version Migration: Information about upgrading between major JRE versions.
    • File Formats: Guide to the supported index format used by Lucene. This can be customized by using an alternate codec.
    • diff --git a/solr/SYSTEM_REQUIREMENTS.txt b/solr/SYSTEM_REQUIREMENTS.txt new file mode 100644 index 00000000000..d5edcc6766e --- /dev/null +++ b/solr/SYSTEM_REQUIREMENTS.txt @@ -0,0 +1,16 @@ +# System Requirements + +Apache Lucene runs of Java 6 or greater. When using Java 7, be sure to +install at least Update 1! With all Java versions it is strongly +recommended to not use experimental `-XX` JVM options. It is also +recommended to always use the latest update version of your Java VM, +because bugs may affect Lucene. An overview of known JVM bugs can be +found on http://wiki.apache.org/lucene-java/SunJavaBugs. + +CPU, disk and memory requirements are based on the many choices made in +implementing Lucene (document size, number of documents, and number of +hits retrieved to name a few). The benchmarks page has some information +related to performance on particular platforms. + +*To build Apache Lucene from source, refer to the `BUILD.txt` file in +the distribution directory.* diff --git a/solr/build.xml b/solr/build.xml index f74b3726c35..b873957d62a 100644 --- a/solr/build.xml +++ b/solr/build.xml @@ -144,7 +144,7 @@ depends="javadocs,changes-to-html,process-webpages"/> - + @@ -163,12 +163,10 @@ - @@ -439,8 +437,8 @@ Changes: List of changes in this release. +
    • System Requirements: Minimum and supported Java versions.
    • Solr Tutorial: This document covers the basics of running Solr using an example schema, and some sample data.
    • Lucene Documentation
    From 17324e64ddb2a6a3772b729d897775281ce188de Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Mon, 15 Oct 2012 23:16:01 +0000 Subject: [PATCH 11/20] LUCENE-4006: Exchange files git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1398570 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/SYSTEM_REQUIREMENTS.txt | 10 +++++----- solr/SYSTEM_REQUIREMENTS.txt | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lucene/SYSTEM_REQUIREMENTS.txt b/lucene/SYSTEM_REQUIREMENTS.txt index 9b38cbf24f6..d5edcc6766e 100644 --- a/lucene/SYSTEM_REQUIREMENTS.txt +++ b/lucene/SYSTEM_REQUIREMENTS.txt @@ -1,16 +1,16 @@ # System Requirements -Apache Solr runs of Java 6 or greater. When using Java 7, be sure to +Apache Lucene runs of Java 6 or greater. When using Java 7, be sure to install at least Update 1! With all Java versions it is strongly recommended to not use experimental `-XX` JVM options. It is also recommended to always use the latest update version of your Java VM, -because bugs may affect Solr. An overview of known JVM bugs can be +because bugs may affect Lucene. An overview of known JVM bugs can be found on http://wiki.apache.org/lucene-java/SunJavaBugs. CPU, disk and memory requirements are based on the many choices made in -implementing Solr (document size, number of documents, and number of +implementing Lucene (document size, number of documents, and number of hits retrieved to name a few). The benchmarks page has some information related to performance on particular platforms. -*To build Apache Solr from source, refer to the `BUILD.txt` file in -the distribution directory.* +*To build Apache Lucene from source, refer to the `BUILD.txt` file in +the distribution directory.* diff --git a/solr/SYSTEM_REQUIREMENTS.txt b/solr/SYSTEM_REQUIREMENTS.txt index d5edcc6766e..9b38cbf24f6 100644 --- a/solr/SYSTEM_REQUIREMENTS.txt +++ b/solr/SYSTEM_REQUIREMENTS.txt @@ -1,16 +1,16 @@ # System Requirements -Apache Lucene runs of Java 6 or greater. When using Java 7, be sure to +Apache Solr runs of Java 6 or greater. When using Java 7, be sure to install at least Update 1! With all Java versions it is strongly recommended to not use experimental `-XX` JVM options. It is also recommended to always use the latest update version of your Java VM, -because bugs may affect Lucene. An overview of known JVM bugs can be +because bugs may affect Solr. An overview of known JVM bugs can be found on http://wiki.apache.org/lucene-java/SunJavaBugs. CPU, disk and memory requirements are based on the many choices made in -implementing Lucene (document size, number of documents, and number of +implementing Solr (document size, number of documents, and number of hits retrieved to name a few). The benchmarks page has some information related to performance on particular platforms. -*To build Apache Lucene from source, refer to the `BUILD.txt` file in -the distribution directory.* +*To build Apache Solr from source, refer to the `BUILD.txt` file in +the distribution directory.* From a839ab7fb0329e3506a27b41bf8f1eb2f223582c Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 16 Oct 2012 15:38:44 +0000 Subject: [PATCH 12/20] don't call merged segment warmer on 100% deleted segment; drop the merged reader from the pool if it's 100% deleted git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1398850 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/index/IndexWriter.java | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index ab56cd585fa..9b0204dbab5 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -34,7 +34,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.FieldInfos.FieldNumbers; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.MergeState.CheckAbort; @@ -42,7 +41,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; @@ -52,7 +50,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.Constants; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; -import org.apache.lucene.util.MutableBits; import org.apache.lucene.util.ThreadInterruptedException; /** @@ -3118,13 +3115,11 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { mergedDeletes.dropChanges(); } readerPool.release(mergedDeletes); - if (dropSegment) { - readerPool.drop(mergedDeletes.info); - } } if (dropSegment) { assert !segmentInfos.contains(merge.info); + readerPool.drop(merge.info); deleter.deleteNewFiles(merge.info.files()); } @@ -3736,8 +3731,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer(); - - if (poolReaders && mergedSegmentWarmer != null) { + if (poolReaders && mergedSegmentWarmer != null && merge.info.info.getDocCount() != 0) { final ReadersAndLiveDocs rld = readerPool.get(merge.info, true); final SegmentReader sr = rld.getReader(IOContext.READ); try { From cd6c2fe0a2ed59a3792cd21a7e759c175792aa6f Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 16 Oct 2012 22:41:51 +0000 Subject: [PATCH 13/20] LUCENE-4485: CheckIndex's terms, terms/docs pairs counts don't include deleted docs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1399028 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 +++ .../org/apache/lucene/index/CheckIndex.java | 25 ++++++++++++------- .../apache/lucene/index/TestCheckIndex.java | 4 +-- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d9863e204f4..421f6e9ebca 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -57,6 +57,9 @@ Bug Fixes * LUCENE-4468: Fix rareish integer overflows in Block and Lucene40 postings formats (Robert Muir) +* LUCENE-4485: When CheckIndex terms, terms/docs pairs and tokens, + these counts now all exclude deleted documents. (Mike McCandless) + Optimizations * LUCENE-4443: BlockPostingsFormat no longer writes unnecessary offsets diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index bf5df24a0a2..371c48990ab 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -233,9 +233,12 @@ public class CheckIndex { TermIndexStatus() { } - /** Total term count */ + /** Number of terms with at least one live doc. */ public long termCount = 0L; + /** Number of terms with zero live docs docs. */ + public long delTermCount = 0L; + /** Total frequency across all terms. */ public long totFreq = 0L; @@ -750,7 +753,7 @@ public class CheckIndex { final TermsEnum termsEnum = terms.iterator(null); boolean hasOrd = true; - final long termCountStart = status.termCount; + final long termCountStart = status.delTermCount + status.termCount; BytesRef lastTerm = null; @@ -781,7 +784,6 @@ public class CheckIndex { if (docFreq <= 0) { throw new RuntimeException("docfreq: " + docFreq + " is out of bounds"); } - status.totFreq += docFreq; sumDocFreq += docFreq; docs = termsEnum.docs(liveDocs, docs); @@ -796,15 +798,13 @@ public class CheckIndex { } if (hasOrd) { - final long ordExpected = status.termCount - termCountStart; + final long ordExpected = status.delTermCount + status.termCount - termCountStart; if (ord != ordExpected) { throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected); } } } - status.termCount++; - final DocsEnum docs2; if (postings != null) { docs2 = postings; @@ -820,6 +820,7 @@ public class CheckIndex { if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } + status.totFreq++; visitedDocs.set(doc); int freq = -1; if (hasFreqs) { @@ -883,6 +884,12 @@ public class CheckIndex { } } + if (docCount != 0) { + status.termCount++; + } else { + status.delTermCount++; + } + final long totalTermFreq2 = termsEnum.totalTermFreq(); final boolean hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1; @@ -1063,11 +1070,11 @@ public class CheckIndex { // check unique term count long termCount = -1; - if (status.termCount-termCountStart > 0) { + if ((status.delTermCount+status.termCount)-termCountStart > 0) { termCount = fields.terms(field).size(); - if (termCount != -1 && termCount != status.termCount - termCountStart) { - throw new RuntimeException("termCount mismatch " + termCount + " vs " + (status.termCount - termCountStart)); + if (termCount != -1 && termCount != status.delTermCount + status.termCount - termCountStart) { + throw new RuntimeException("termCount mismatch " + (status.delTermCount + termCount) + " vs " + (status.termCount - termCountStart)); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java index c450fbc6998..cd33794142d 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java @@ -75,8 +75,8 @@ public class TestCheckIndex extends LuceneTestCase { assertNotNull(seg.termIndexStatus); assertNull(seg.termIndexStatus.error); - assertEquals(19, seg.termIndexStatus.termCount); - assertEquals(19, seg.termIndexStatus.totFreq); + assertEquals(18, seg.termIndexStatus.termCount); + assertEquals(18, seg.termIndexStatus.totFreq); assertEquals(18, seg.termIndexStatus.totPos); assertNotNull(seg.storedFieldStatus); From 3634d6e8ae52db22b0b3108533b6f04c6b572380 Mon Sep 17 00:00:00 2001 From: David Wayne Smiley Date: Wed, 17 Oct 2012 11:34:24 +0000 Subject: [PATCH 14/20] LUCENE-4464 polygon almost touch test git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1399185 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/test-files/data/LUCENE-4464.txt | 3 + .../lucene/spatial/StrategyTestCase.java | 8 +- .../lucene/spatial/prefix/JtsPolygonTest.java | 73 +++++++++++++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 lucene/spatial/src/test-files/data/LUCENE-4464.txt create mode 100644 lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java diff --git a/lucene/spatial/src/test-files/data/LUCENE-4464.txt b/lucene/spatial/src/test-files/data/LUCENE-4464.txt new file mode 100644 index 00000000000..dfb5a40a739 --- /dev/null +++ b/lucene/spatial/src/test-files/data/LUCENE-4464.txt @@ -0,0 +1,3 @@ +#id name shape +poly1 poly1 POLYGON ((-93.17288720912401 45.280265431486754, -93.17232270645628 45.2802724629027, -93.17229737711205 45.279497574052314, -93.1722224854913 45.277577770983854, -93.17218124644266 45.276747010395624, -93.16722650828461 45.276819421108826, -93.16581262076448 45.27684404529939, -93.16363038333625 45.276882054199596, -93.16249244695301 45.276929493877525, -93.16247370542268 45.27641118002343, -93.16246893668628 45.276279382682894, -93.1624671302382 45.274701063846244, -93.16246679905096 45.273381422360785, -93.16247689122851 45.273189685068424, -93.16249146710186 45.27291249464421, -93.16249868565903 45.272467966062614, -93.16247955957382 45.27177209534391, -93.1624787718002 45.27127651548793, -93.16247840794293 45.27104491547271, -93.16247917486976 45.27087000356473, -93.1624817727418 45.270279315147775, -93.16252487154968 45.26996729342093, -93.16254025661699 45.26976826077157, -93.16247902564132 45.269527941604, -93.16242684845764 45.2692774997531, -93.16242126018722 45.26894470083864, -93.16241263011544 45.26769394309626, -93.16246809168283 45.26571736107859, -93.16247263940593 45.26195548919013, -93.16253090997651 45.258615729449964, -93.16256878834184 45.25650987969364, -93.1626048203569 45.2546538608912, -93.16265873943591 45.251876274357876, -93.16275002007988 45.2510418534315, -93.16282237443883 45.25042383853711, -93.16286421513767 45.249181538840595, -93.16288289220509 45.24862697953288, -93.1629601120395 45.248250613185206, -93.16301002807151 45.24802483983211, -93.16301621932013 45.247670020958665, -93.16301519349018 45.247478630666144, -93.16303001333274 45.24727504082362, -93.16303463142393 45.24713931946277, -93.16302280990728 45.2470107542477, -93.16298327344437 45.24685970499298, -93.16294217154733 45.246633449219054, -93.16294315088282 45.246419514713516, -93.16295754265565 45.24621538933992, -93.16296755618336 45.24580786412655, -93.16296268372803 45.245362220836384, -93.16296319568123 45.245046689033444, -93.16297766811293 45.24481357093532, -93.16296370759883 45.2445699039253, -93.16294931051515 45.24231310924752, -93.16294559876471 45.24173111255096, -93.16295568091667 45.240776604513705, -93.1629609359182 45.24053954238007, -93.1629658719288 45.24019639978025, -93.1625355179785 45.24018482062359, -93.15847246037083 45.24007549519542, -93.15641780558727 45.24006372373029, -93.15470331938288 45.24002991133718, -93.1515176880772 45.240038275846665, -93.14892151971884 45.24004508944476, -93.14597353408716 45.240012024375574, -93.14198169289922 45.239944427606616, -93.14246140322608 45.21441838866706, -93.14239730934507 45.20842345035032, -93.14240307538512 45.203669567890245, -93.13209436867183 45.20385828388066, -93.13238731320574 45.19696183064252, -93.13244550539693 45.19559178376392, -93.13255875219626 45.19292582294682, -93.12747185962866 45.19303831675316, -93.12741613255534 45.196689407842044, -93.12341724811418 45.196748516850086, -93.12336451543653 45.19630050937325, -93.12233270487748 45.19631189179194, -93.12244695905335 45.18943470505876, -93.12752867296823 45.18931969757398, -93.1275981937757 45.18579899512077, -93.12249095182051 45.18589579364393, -93.12250905286206 45.18230218633591, -93.11745336177542 45.182234528897865, -93.11742994994425 45.17494109686777, -93.11234677240823 45.174914625057596, -93.11232755368056 45.178541858988, -93.09142510557425 45.17830768889981, -93.0878908215621 45.18208021181682, -93.04087986544745 45.182020129318005, -93.02011304608662 45.18206919600553, -92.99725469269949 45.18154883703301, -92.9866455346556 45.18162938363265, -92.98002761377205 45.181741313792635, -92.97460481311676 45.1817232745721, -92.95815138711436 45.18159971137449, -92.95832448011389 45.16710586357575, -92.95821211351648 45.15266682925307, -92.94804883291458 45.152678829402525, -92.94820512323935 45.14582287000843, -92.94821449767262 45.14541149629351, -92.93808126859899 45.145435393255234, -92.938064080176 45.1464755574292, -92.93790172782569 45.15630033200825, -92.93776855788026 45.156299483202375, -92.93416458772786 45.15627656196406, -92.92776593175911 45.156235863288074, -92.92779198321185 45.15260820059608, -92.9228643837518 45.15257871636257, -92.91761510291013 45.15254730117589, -92.91755895303478 45.15978011255037, -92.90742527225278 45.15975884768774, -92.90734951861361 45.16700513027527, -92.90243435593408 45.16697925148226, -92.90226994175299 45.16697838648701, -92.90228225598396 45.16960751885433, -92.90228682505473 45.170583562524534, -92.89838293958822 45.17058359192683, -92.89776337384279 45.17058359923907, -92.89720228241329 45.170636798053465, -92.89720546113311 45.171648743169875, -92.89721045187194 45.17323675651512, -92.89721215942521 45.17377958217219, -92.8972133713998 45.17416655315385, -92.89752994284902 45.17416793500262, -92.90230392627396 45.174188700362095, -92.90230695467396 45.17483317173849, -92.90230939234701 45.175352265892315, -92.90231342163983 45.17620891826606, -92.9023378718661 45.18141217320357, -92.89829195794003 45.18137903577816, -92.89197067471983 45.181327269964534, -92.86573042754982 45.18111238484799, -92.86537258386163 45.18110945889712, -92.86579788828743 45.16683341076013, -92.85850341291456 45.166840495697045, -92.85576616527777 45.1668651317465, -92.8455814929548 45.16695680639518, -92.8403672382906 45.167003741522834, -92.84037534438275 45.166359277271084, -92.83914257524022 45.166407761467035, -92.83786182101709 45.16655768366541, -92.83762301824869 45.16658563659705, -92.83700510809494 45.16665797101126, -92.83700330475195 45.1670405349812, -92.83520392476423 45.16704646605868, -92.83519998302931 45.1672093811339, -92.83518241658018 45.17114095264113, -92.8351705215998 45.17380185475555, -92.83516823773242 45.17431412368648, -92.82501384033566 45.174380025018145, -92.82373302900695 45.174963166130034, -92.82127603798283 45.17799740439804, -92.81495695139105 45.17798284134312, -92.81498212776123 45.18394380043827, -92.81496335262872 45.20297631525698, -92.81496300732859 45.2033351264244, -92.8149190887153 45.20460132029917, -92.81473397710002 45.21, -92.8198460035041 45.21, -92.81985864578533 45.21352006541341, -92.81476009958381 45.21350519453624, -92.81473397710002 45.211, -92.79434616877515 45.20979982288059, -92.79434485197183 45.210003888814526, -92.7942994128934 45.217028016258524, -92.79414754531777 45.217027433538036, -92.75950558164095 45.216895251116746, -92.75791266717471 45.216889175072694, -92.75634408090858 45.21737192056616, -92.75539334998972 45.21781096867505, -92.75544275719047 45.219840930849315, -92.75232263931744 45.219847708152834, -92.75345360864661 45.22241622713623, -92.75393100188802 45.22290500013628, -92.75454911801587 45.22425238152991, -92.75465656863904 45.22441872007679, -92.75478824580995 45.22461252606749, -92.75573200183275 45.22594899943625, -92.7559326169467 45.2263989667922, -92.756173357985 45.22677479396459, -92.75628338889855 45.227185737281864, -92.75651400327136 45.22770300256764, -92.75667800355963 45.228069998932774, -92.75745600158125 45.23052599674398, -92.75737071502948 45.23131853178694, -92.75760683805547 45.23212889115611, -92.7575248338702 45.23249816977935, -92.75760900807862 45.233043995948975, -92.75740715667484 45.23498808590038, -92.75739258433605 45.23515457917446, -92.75736004212973 45.235441823970014, -92.75728900664646 45.2361259970008, -92.75750924881613 45.23833187652166, -92.75783421241928 45.239151014730965, -92.75799784052033 45.2401986059374, -92.75814399470411 45.24075700093086, -92.75910499448543 45.24444199845027, -92.75927217262658 45.246363482652335, -92.759708376526 45.24795052230262, -92.76024900009054 45.24960000150479, -92.76026400206055 45.25171699829065, -92.75984499770836 45.25286799832034, -92.75883599655404 45.25442699925451, -92.75592228367496 45.256779108256175, -92.75559993467031 45.25707105760005, -92.75540261715516 45.25725539605134, -92.75458100472993 45.258140999051975, -92.75362100152239 45.25941899619891, -92.75258800661327 45.2617860021943, -92.7523530053651 45.26244399793552, -92.7521330910868 45.26318539548715, -92.75199986320791 45.26381589028983, -92.7519440909167 45.26415703570502, -92.75192391851121 45.26559725594415, -92.75247612752318 45.26746623235666, -92.75254008932185 45.26768063816608, -92.75267394173396 45.268130176728555, -92.75287910082022 45.2688320393691, -92.7530104867237 45.26921012533672, -92.75329204456183 45.26980089141646, -92.75414711285153 45.2712720735891, -92.7552129966957 45.27237299947564, -92.75574299378961 45.27288399662051, -92.75678399520334 45.273891998902435, -92.75750199664172 45.27442999825494, -92.75801999923948 45.274822998224586, -92.75866321741752 45.27578539520815, -92.7589271849383 45.27616491445647, -92.75924599787822 45.27671899844492, -92.75941999802778 45.27718649803985, -92.75960999785612 45.27731999914, -92.75978699565532 45.27743849638546, -92.76004300142414 45.277978995119405, -92.76061199738588 45.27882799808139, -92.76117799722955 45.280582999200305, -92.7613619999475 45.28220800042353, -92.76167096088638 45.2836803717185, -92.76198517744629 45.2850267976271, -92.76206945308458 45.2853507773657, -92.76202745146396 45.286658659028, -92.76204199858486 45.28698499388888, -92.76201199644161 45.28793199672008, -92.76200399722086 45.28821299803955, -92.76121399640145 45.28913599914764, -92.7603870028136 45.28991599406784, -92.75871000510011 45.29096499709372, -92.75799200634881 45.291140996050984, -92.75687800551285 45.29148399845183, -92.75507700319366 45.2919269952758, -92.75480030147037 45.291986779669465, -92.74569331443023 45.29606484000191, -92.74555580404507 45.29614422445335, -92.74523588498667 45.29631411941847, -92.76071968429389 45.29617634034589, -92.79448651640953 45.29587194744184, -92.82553071142016 45.29634288822895, -92.82523623967 45.28697641600944, -92.8246113114385 45.27459391718561, -92.82414631698042 45.26733414102221, -92.83443181636859 45.267466042102846, -92.83450366471794 45.265666722695805, -92.8395297613521 45.26570782145342, -92.83954651660255 45.2675117790906, -92.85488466565545 45.267633226883305, -92.85446380439222 45.260381978642265, -92.8530801070886 45.256940031152055, -92.8746167542768 45.2569553750289, -92.87517983690772 45.26774272327855, -92.88032459143679 45.26775272915376, -92.88028907325248 45.27498539130476, -92.885429695981 45.27499516876503, -92.88541044770409 45.27862274921294, -92.8854460740016 45.28269595676258, -92.8858306795285 45.28583335680999, -92.89095994168375 45.285838365551086, -92.89147668909354 45.290056047991875, -92.89183494474656 45.292995365557246, -92.89287941280966 45.29621886928581, -92.93574219102997 45.296382695230655, -92.9366855829562 45.29639453639271, -92.93730010601949 45.29640233268984, -92.93773633826109 45.296407862218295, -92.95031707870098 45.29656663627082, -92.95732733387652 45.29663267857854, -92.95723233585932 45.305785498930874, -92.95755812361517 45.31807293816823, -92.9575313307762 45.325662873647204, -92.96200814151011 45.32569410734573, -92.96201051236334 45.33056403262943, -92.95763365021791 45.330562956294486, -92.95750484414667 45.34006528297348, -92.95740249422305 45.3523406680097, -92.96272753035339 45.352295608902175, -92.96260253143201 45.363259386181184, -92.95732537061275 45.363286992831206, -92.95715614538045 45.36869421119079, -92.97302216756823 45.36904156334545, -92.9731090974606 45.37554810693529, -92.98760985309234 45.37555619312347, -92.98429494637762 45.38215591061988, -92.9924184629002 45.38233326055907, -93.01850137881846 45.38277378724873, -93.01956464133914 45.41174708503911, -93.03973263863047 45.412106304897264, -93.06569776540464 45.412656360563524, -93.08346874844985 45.41297273973574, -93.09263091377308 45.41335460313747, -93.1012213163472 45.413720365424695, -93.10759754754753 45.41373499082408, -93.14214551761233 45.41373101611429, -93.1421802894172 45.40666589187203, -93.14209155741717 45.38498980813781, -93.14398965535287 45.369981475770224, -93.13861914028635 45.36992203894643, -93.13946982733188 45.35540022959687, -93.14362673736643 45.35542059147377, -93.14338145836778 45.34816201728363, -93.14259222919002 45.34815677471413, -93.14123737100095 45.34271091215897, -93.14120170425102 45.34166175650565, -93.14159640367895 45.340845226624126, -93.16430988689314 45.34107128935172, -93.1641229508536 45.33731028186903, -93.163783504365 45.32713863170596, -93.16354815472778 45.31568179036097, -93.1634974864936 45.3115083559682, -93.16335415000293 45.30838048844207, -93.16326942872365 45.30653168298998, -93.16286993093225 45.29781375116957, -93.16292479029 45.297483756012355, -93.16251838572086 45.29748043583636, -93.16242411934059 45.29340169752503, -93.16237192435095 45.291513658346155, -93.16125915756838 45.29101148729498, -93.16224903398384 45.290456018307964, -93.16243543883762 45.29031474509565, -93.16248365754952 45.29016960982244, -93.1625270557542 45.28932067928762, -93.16350507037129 45.28940282906675, -93.16413761242012 45.28944739938537, -93.16430369461645 45.289411531953206, -93.164472138656 45.28937514511818, -93.16431016328954 45.288334379584406, -93.16422830296436 45.28780835028316, -93.16373011428878 45.287807744950875, -93.16348868413621 45.28778563548775, -93.16304669211718 45.28779811404454, -93.16252493722239 45.28781182501286, -93.1625182014603 45.28601279964026, -93.1625127377889 45.28416325442296, -93.1717122152211 45.28391079701647, -93.17291828928865 45.28387769615237, -93.17292468588315 45.28327561174209, -93.1729215958459 45.28269914269899, -93.17290904354249 45.28216703245599, -93.17290447076888 45.281410092382885, -93.17289432485279 45.28068732375472, -93.17288720912401 45.280265431486754)) +poly2 poly2 POLYGON((-93.26592485308495 45.18931973506328, -93.26373519655886 45.18933815615675, -93.2637828223868 45.18660121752107, -93.26280973893772 45.18656958194617, -93.2603275028686 45.186488876325654, -93.25976682936536 45.18646929139094, -93.25877703935303 45.18686109057519, -93.25788401039608 45.18633824889261, -93.25713811973642 45.186864792015704, -93.25660115549654 45.18628640445176, -93.24081325108644 45.18609354693712, -93.2356823133177 45.1860308697061, -93.23474944979115 45.186019474019865, -93.23478565684188 45.18266103967549, -93.23072066106351 45.18267669158043, -93.22480340476464 45.18267437402639, -93.21952101307244 45.18267371221728, -93.21950131879755 45.184689058075534, -93.21950381582634 45.18590104693386, -93.21950547892035 45.186708829298695, -93.21948324866376 45.18808573281868, -93.21947477056304 45.188619717930756, -93.2194751507154 45.1899146284615, -93.22390334137022 45.18991091026497, -93.2245904557543 45.18993775453468, -93.2245784309098 45.19028702856576, -93.2245932424241 45.19081834295508, -93.22460314163764 45.19137779927979, -93.22459067695124 45.19162607300785, -93.22458367100289 45.19176562022696, -93.22354968949122 45.191760188521705, -93.22131530006368 45.19175468785821, -93.22018302807493 45.19175762419069, -93.21965635944291 45.19175898704962, -93.21824735047468 45.191762639857636, -93.21840068968908 45.191840907619024, -93.21858279007587 45.191950538176606, -93.21874378970492 45.19205449060312, -93.21893581214327 45.192204972059955, -93.21911499957261 45.19238205879934, -93.21934767139433 45.192628269473076, -93.21954522989743 45.1928508489684, -93.21972003978802 45.19304459976245, -93.21997538064213 45.19332124206717, -93.22011354045264 45.193470928079385, -93.22046875034326 45.19384479955501, -93.2206469058326 45.19404172922978, -93.22079845082156 45.194244494502364, -93.2209416400795 45.19447508772328, -93.22107397875365 45.19474417974581, -93.2211368505518 45.19490985928749, -93.22118231976518 45.195047277731625, -93.22124659963487 45.19525315038074, -93.22128314962913 45.195396480693944, -93.22130715028514 45.195564823375, -93.22131862069979 45.195757013030224, -93.22130704484326 45.19599065847414, -93.22127083850016 45.19622942989826, -93.22124456959293 45.19636257994296, -93.22120917947201 45.19651471803614, -93.22115328972328 45.196774039833144, -93.22110053150747 45.19700410181286, -93.22105123806169 45.19721904984113, -93.21939747849284 45.19720754776318, -93.21658707902952 45.19719901749774, -93.21405492494755 45.19718389708806, -93.21060961905127 45.19716332241369, -93.20846870851273 45.19715738191871, -93.20635420918421 45.19714993030806, -93.20384995444252 45.19713947337882, -93.20382099935851 45.195915480832355, -93.20379040854755 45.195493880093856, -93.20373937951182 45.19525460196455, -93.20366799901262 45.194730001052676, -93.20359944927 45.194273469702246, -93.20351980946141 45.19386975065817, -93.20336890147132 45.1933312322322, -93.20348773988103 45.19317805926476, -93.20364964522179 45.19294381603321, -93.20373782170354 45.192758795441485, -93.20378634041538 45.1925589245846, -93.20378780054193 45.1924118820702, -93.20373224993294 45.192246366644895, -93.20366678053941 45.192063182244134, -93.20349712021084 45.19164111034226, -93.20336402335359 45.191262445660406, -93.20333661484061 45.19107258136713, -93.20334012614478 45.19082850506992, -93.20338500114326 45.190584969374704, -93.20346313590359 45.19035226093307, -93.20353125074365 45.19015096025676, -93.20337886118753 45.19012069933683, -93.20280004152556 45.18999823901699, -93.20236430223584 45.1898748712581, -93.20223796285948 45.18983446401002, -93.20171338128353 45.189666689690526, -93.20105175026708 45.18940210042135, -93.20059509118217 45.18937347081525, -93.20014399997638 45.18935951962055, -93.1999096512546 45.18934032171285, -93.19969162075753 45.18934030912719, -93.19953079227915 45.18938062079311, -93.19930724128803 45.189471810355066, -93.19836742091539 45.18954495845859, -93.19790904174889 45.189755310346555, -93.19770094626355 45.18978905045578, -93.19728573057267 45.1898563687543, -93.19706717806918 45.18978234280038, -93.1961191012612 45.18980511056629, -93.19583707702907 45.18977039110604, -93.19495714548943 45.18966207098092, -93.19409949054268 45.18955648989894, -93.19361391124465 45.18954758129998, -93.19142135137997 45.189507349701145, -93.18867729058191 45.18943758222878, -93.18766468614145 45.18941183701645, -93.1869063815807 45.18939255950494, -93.18676117212036 45.18939312363656, -93.18583601993124 45.18939673056086, -93.18362870083628 45.18940533739182, -93.18015920861117 45.189432919714875, -93.17748344774633 45.18940274982507, -93.17100678798263 45.18934067185518, -93.1680509570817 45.18931686702863, -93.16712265967519 45.189309389152754, -93.1632729184803 45.189289560128074, -93.1524420382428 45.189137301470666, -93.1488330300988 45.189087681208825, -93.14258337454692 45.18900953614207, -93.1425728385595 45.18964797148711, -93.14257129908563 45.19044710129245, -93.14256839076815 45.191380659844974, -93.14257549009486 45.192639988690985, -93.14256591028126 45.193624481846925, -93.1425562203409 45.19475816134898, -93.14254671019609 45.19564806883362, -93.14253591314012 45.19592629600891, -93.1425191002932 45.19635953895129, -93.14240307328147 45.20366956427245, -93.14239731024965 45.20842345007226, -93.14246141142196 45.2144183909345, -93.14198170032972 45.23994442974387, -93.14597353942523 45.240012030562795, -93.14892151981124 45.24004509174428, -93.15151768504401 45.24003827478177, -93.15470331907811 45.2400299112851, -93.15641781022819 45.240063720104146, -93.15847245794774 45.24007548756677, -93.16253551804624 45.24018481776239, -93.16296586932476 45.24019639699945, -93.16296093749654 45.240539543608094, -93.16295567833508 45.24077659970959, -93.16294559992268 45.24173110984731, -93.16294931429802 45.242313107885224, -93.16296371061823 45.24456989801016, -93.16297766989932 45.24481356907269, -93.16296319587042 45.24504668430867, -93.16296267909655 45.24536222031531, -93.16296756070733 45.24580785775435, -93.16295754084666 45.24621538734816, -93.16294315030365 45.24641950970948, -93.1629421699368 45.246633444731216, -93.16298326866249 45.24685970478054, -93.16302280494743 45.24701074802872, -93.1630346343297 45.247139320093076, -93.16303000914128 45.24727503858908, -93.16301519072017 45.24747862874394, -93.16301622062082 45.247670019373224, -93.16301002844395 45.24802483903903, -93.16296010836595 45.248250609285236, -93.16288288941641 45.248626979189, -93.16286421036493 45.24918153632857, -93.16282236866641 45.25042383853131, -93.16275001793326 45.25104184745623, -93.16265874011768 45.251876269431015, -93.1626048141941 45.25465385517585, -93.162568780952 45.25650987775294, -93.16253090903855 45.25861572819838, -93.16247264162719 45.261955487720506, -93.16246809047925 45.26571735738526, -93.16241263022145 45.267693939529536, -93.16242125944353 45.26894469986081, -93.16242684956876 45.269277499432015, -93.16247902269161 45.26952793567272, -93.16254025984375 45.269768259020054, -93.1625248689828 45.26996728874923, -93.16248176954191 45.27027930739088, -93.16247917649272 45.270869996810376, -93.16247840915516 45.27104490906511, -93.16247877426206 45.27127651283899, -93.162479560911 45.27177208702322, -93.16249869026827 45.272467959171365, -93.16249147172434 45.27291248854739, -93.16247688682598 45.27318968296259, -93.16246680083795 45.27338141702519, -93.1624671298516 45.27470105775956, -93.16246893968787 45.276279379505084, -93.1624737063593 45.2764111771935, -93.16249244905424 45.276929488819604, -93.16363037995181 45.27688204948932, -93.16581262202895 45.276844043452684, -93.16722651010657 45.27681941864911, -93.17218124072862 45.27674700948904, -93.1722224784459 45.27757776899891, -93.17229737034532 45.279497570305445, -93.17232269933695 45.28027246109518, -93.17288721010608 45.28026543129147, -93.1728943187817 45.2806873180744, -93.17290447218495 45.28141008817547, -93.17290904002667 45.28216703008146, -93.17292159084371 45.28269913830247, -93.17292468118433 45.283275608616165, -93.17291828224536 45.28387769767021, -93.1717122127579 45.283910797244246, -93.16251273143365 45.28416325629099, -93.16251820094257 45.28601279797615, -93.16252493935717 45.287811833132764, -93.16304669905364 45.28779811692505, -93.16348868871324 45.28778563925035, -93.16373011962693 45.28780774767522, -93.16422830587629 45.28780835110865, -93.1643101699488 45.28833437868018, -93.16447213914093 45.289375147768524, -93.16430369361024 45.28941153310711, -93.16413761723706 45.28944740219967, -93.16350507286433 45.289402832527344, -93.16252705964098 45.289320683284735, -93.16248365939401 45.29016961156254, -93.16243543831087 45.29031475002342, -93.16224903970826 45.2904560215217, -93.16125915934788 45.29101149209126, -93.16237192796683 45.291513661220456, -93.16242412151107 45.29340170072084, -93.16251838980172 45.29748044313293, -93.16292479370829 45.29748376064082, -93.18639094534673 45.29767533425263, -93.18833342032521 45.29769119188229, -93.1925428426471 45.29770437859642, -93.19474753040078 45.29771128804242, -93.19765740975974 45.29769541872667, -93.20297591868295 45.29776263827187, -93.20683144906876 45.29774197003572, -93.20883497923562 45.297766559466794, -93.21546742887979 45.297768422222155, -93.22617724980643 45.29791971794424, -93.23408017640227 45.298023690859175, -93.2343080073169 45.288444186545625, -93.23432525195352 45.287995322205425, -93.23469515647318 45.269279712377234, -93.23475627635968 45.266203358381446, -93.23560542207227 45.26619551047824, -93.23899176558338 45.26613779367068, -93.24250527367546 45.26608234822973, -93.243445378056 45.26606503829342, -93.24512861083372 45.2660344570852, -93.24588057830995 45.26602026067889, -93.24713274287363 45.26599455787498, -93.25036838013868 45.26592734514467, -93.25172461510564 45.265900698298395, -93.25236738024864 45.265888260809106, -93.25481754173921 45.26583307838667, -93.25571357952906 45.265819559899164, -93.2594981489083 45.26575415212897, -93.26098138766197 45.265754375486374, -93.26155216698102 45.26565612540643, -93.26170097145753 45.26562288963898, -93.26208574477789 45.26553876835043, -93.26245875524685 45.265434673708015, -93.26277275191426 45.265316250819595, -93.26311663127117 45.26517251314189, -93.26346212923646 45.26500240317637, -93.26393572774133 45.26477558787491, -93.2651820516718 45.26406759657772, -93.26518110226205 45.26337226279194, -93.26515218908767 45.26311636791454, -93.26518703008779 45.262871689663605, -93.2652064900752 45.26265582104258, -93.2652110298225 45.26215614194132, -93.26522443086994 45.26112430402238, -93.26522989950563 45.260703199933474, -93.26524872191168 45.25930812973533, -93.26525187087448 45.258897852775995, -93.26525857049303 45.258025812056765, -93.26527734826267 45.256675072153314, -93.26528081766433 45.25612813038996, -93.265287399575 45.25512698071874, -93.26530031054412 45.253711671615115, -93.26531490547187 45.25273002640574, -93.26532214123614 45.252243491267, -93.26533817105908 45.25062180123498, -93.26535413994274 45.24906421173263, -93.26536141910549 45.24841165046578, -93.26536638602661 45.24796649509243, -93.26537318826473 45.24735637067748, -93.26539798003012 45.24589779189643, -93.265404909549 45.24454674190931, -93.2654060939449 45.24296904311022, -93.26540624905046 45.24276127146885, -93.26540843815205 45.2420263885843, -93.26541275006169 45.240577352345994, -93.2654375717671 45.238843301612725, -93.26544518264211 45.237906888690105, -93.26544940933664 45.23738688110566, -93.26546966016808 45.236093591927926, -93.2654781584622 45.235359229961944, -93.26548338867605 45.23490715107922, -93.26553582901259 45.23354268990693, -93.26554071996831 45.23330119833777, -93.26555987026248 45.2323552839169, -93.26557251955711 45.23173040973764, -93.26556626032777 45.22975235185782, -93.26556606661761 45.229367333607186, -93.26556579189545 45.228823722705066, -93.26562882232702 45.226872206176665, -93.26571073971922 45.224335971082276, -93.26574560622672 45.22192222321787, -93.26574836877063 45.22173093256304, -93.26577033227747 45.22021043432355, -93.26578588443306 45.21913391123174, -93.26580662128347 45.21769799745153, -93.26580983179628 45.217475736026664, -93.26581322607608 45.217240685631346, -93.26590715360736 45.210737684073244, -93.26591966090616 45.209871711997586, -93.2659016992406 45.20722015227932, -93.26587484243684 45.203254836571126, -93.26585637174348 45.20052765082941, -93.26585684827346 45.19841676076085, -93.26587786763154 45.19732741144391, -93.2658624676632 45.1970879109074, -93.2659274100303 45.194004979577755, -93.26595017983325 45.191531890895845, -93.26595423366354 45.19092534610275, -93.26593099287571 45.190637988686554, -93.2659274057232 45.18986823069059, -93.26592485308495 45.18931973506328)) \ No newline at end of file diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java b/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java index 28459025157..ba8d53c8bf3 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java @@ -34,6 +34,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.spatial.query.SpatialArgsParser; import org.junit.Assert; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -109,8 +110,11 @@ public abstract class StrategyTestCase extends SpatialTestCase { } protected Iterator getSampleData(String testDataFile) throws IOException { - return new SampleDataReader( - getClass().getClassLoader().getResourceAsStream("data/"+testDataFile) ); + String path = "data/" + testDataFile; + InputStream stream = getClass().getClassLoader().getResourceAsStream(path); + if (stream == null) + throw new FileNotFoundException("classpath resource not found: "+path); + return new SampleDataReader(stream); } protected Iterator getTestQueries(String testQueryFile, SpatialContext ctx) throws IOException { diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java new file mode 100644 index 00000000000..13ae6544e77 --- /dev/null +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java @@ -0,0 +1,73 @@ +package org.apache.lucene.spatial.prefix; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.context.SpatialContextFactory; +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.spatial.StrategyTestCase; +import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; +import org.junit.Test; + +import java.io.IOException; +import java.util.HashMap; + +public class JtsPolygonTest extends StrategyTestCase { + + private static final double LUCENE_4464_distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;//DEFAULT 2.5% + + public JtsPolygonTest() { + try { + HashMap args = new HashMap(); + args.put("spatialContextFactory", + "com.spatial4j.core.context.jts.JtsSpatialContextFactory"); + ctx = SpatialContextFactory.makeSpatialContext(args, getClass().getClassLoader()); + } catch (NoClassDefFoundError e) { + assumeTrue("This test requires JTS jar: "+e, false); + } + + GeohashPrefixTree grid = new GeohashPrefixTree(ctx, 11);//< 1 meter == 11 maxLevels + this.strategy = new RecursivePrefixTreeStrategy(grid, getClass().getSimpleName()); + ((RecursivePrefixTreeStrategy)this.strategy).setDistErrPct(LUCENE_4464_distErrPct);//1% radius (small!) + } + + @Test + /** LUCENE-4464 */ + public void testCloseButNoMatch() throws IOException { + getAddAndVerifyIndexedDocuments("LUCENE-4464.txt"); + SpatialArgs args = q( + "POLYGON((-93.18100824442227 45.25676372469945," + + "-93.23182001200654 45.21421290799412," + + "-93.16315546122038 45.23742639412364," + + "-93.18100824442227 45.25676372469945))", + LUCENE_4464_distErrPct); + SearchResults got = executeQuery(strategy.makeQuery(args), 100); + assertEquals(1, got.numFound); + assertEquals("poly2", got.results.get(0).document.get("id")); + //did not find poly 1 ! + } + + private SpatialArgs q(String shapeStr, double distErrPct) { + Shape shape = ctx.readShape(shapeStr); + SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects, shape); + args.setDistErrPct(distErrPct); + return args; + } + +} From 398d987b19dc123ef4091f5641d1ab311547f38a Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Wed, 17 Oct 2012 11:38:38 +0000 Subject: [PATCH 15/20] LUCENE-4006: expect SYSTEM_REQUIREMENTS.txt in unpacked Lucene / Solr git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1399188 13f79535-47bb-0310-9956-ffa450edef68 --- dev-tools/scripts/smokeTestRelease.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py index dba87586f70..72cbf660c64 100644 --- a/dev-tools/scripts/smokeTestRelease.py +++ b/dev-tools/scripts/smokeTestRelease.py @@ -576,7 +576,7 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir): os.chdir(unpackPath) isSrc = artifact.find('-src') != -1 l = os.listdir(unpackPath) - textFiles = ['LICENSE', 'NOTICE', 'README'] + textFiles = ['LICENSE', 'NOTICE', 'README', 'SYSTEM_REQUIREMENTS'] if project == 'lucene': textFiles.extend(('JRE_VERSION_MIGRATION', 'CHANGES', 'MIGRATE')) if isSrc: From 2becc99249ff96ccd27a7f3f06b304dbf4c35cb8 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 17 Oct 2012 12:40:46 +0000 Subject: [PATCH 16/20] LUCENE-4486: Add support for ConstantScoreQuery in Highlighter. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1399205 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 +++ .../highlight/WeightedSpanTermExtractor.java | 5 ++++ .../search/highlight/HighlighterTest.java | 27 ++++++++++++++++++- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 421f6e9ebca..24f04b348fb 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -56,6 +56,9 @@ Bug Fixes * LUCENE-4468: Fix rareish integer overflows in Block and Lucene40 postings formats (Robert Muir) + +* LUCENE-4486: Add support for ConstantScoreQuery in Highlighter. + (Simon Willnauer) * LUCENE-4485: When CheckIndex terms, terms/docs pairs and tokens, these counts now all exclude deleted documents. (Mike McCandless) diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 4b45302bc27..4412738cdff 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -141,6 +141,11 @@ public class WeightedSpanTermExtractor { extractWeightedSpanTerms(terms, (SpanQuery) query); } else if (query instanceof FilteredQuery) { extract(((FilteredQuery) query).getQuery(), terms); + } else if (query instanceof ConstantScoreQuery) { + final Query q = ((ConstantScoreQuery) query).getQuery(); + if (q != null) { + extract(q, terms); + } } else if (query instanceof DisjunctionMaxQuery) { for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { extract(iterator.next(), terms); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index 7ce0731e1ef..115fc10f2f6 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -603,7 +603,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte // Not sure we can assert anything here - just running to check we dont // throw any exceptions } - + public void testSpanHighlighting() throws Exception { Query query1 = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "wordx")), @@ -663,6 +663,31 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte helper.start(); } + + public void testGetBestFragmentsConstantScore() throws Exception { + TestHighlightRunner helper = new TestHighlightRunner() { + + @Override + public void run() throws Exception { + numHighlights = 0; + if (random().nextBoolean()) { + BooleanQuery bq = new BooleanQuery(); + bq.add(new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery( + new Term(FIELD_NAME, "kennedy")))), Occur.MUST); + bq.add(new ConstantScoreQuery(new TermQuery(new Term(FIELD_NAME, "kennedy"))), Occur.MUST); + doSearching(bq); + } else { + doSearching(new ConstantScoreQuery(new TermQuery(new Term(FIELD_NAME, + "kennedy")))); + } + doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this); + assertTrue("Failed to find correct number of highlights " + numHighlights + " found", + numHighlights == 4); + } + }; + + helper.start(); + } public void testGetFuzzyFragments() throws Exception { TestHighlightRunner helper = new TestHighlightRunner() { From d50d86168d63d5c5c210377e067580bf42f8e2cc Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 17 Oct 2012 17:05:11 +0000 Subject: [PATCH 17/20] add a way to keep a test's temporary files even if it passes (e.g. hprof profiler output or something) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1399347 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/common-build.xml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lucene/common-build.xml b/lucene/common-build.xml index bb6e90872aa..e02f3518e60 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -771,6 +771,7 @@ + @@ -808,7 +809,7 @@ dynamicAssignmentRatio="${tests.dynamicAssignmentRatio}" shuffleOnSlave="true" - leaveTemporary="false" + leaveTemporary="${tests.leaveTemporary}" seed="${tests.seed}" heartbeat="${tests.heartbeat}" @@ -1116,6 +1117,10 @@ ant -Dtests.timestamps=on ... # Override forked JVM file.encoding ant -Dtests.file.encoding=XXX ... +# Don't remove temporary files under slave directories, even if +# the test passes. +ant -Dtests.leaveTemporary=true + # Output test files and reports. ${tests-output}/tests-report.txt - full ASCII tests report ${tests-output}/tests-failures.txt - failures only (if any) From dd93b85321d58079e3416bd07d6a5cc0990572c3 Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Wed, 17 Oct 2012 22:56:28 +0000 Subject: [PATCH 18/20] SOLR-3961: Fixed error using LimitTokenCountFilterFactory git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1399474 13f79535-47bb-0310-9956-ffa450edef68 --- .../LimitTokenCountFilterFactory.java | 7 +-- .../TestLimitTokenCountFilterFactory.java | 55 +++++++++++++++++++ solr/CHANGES.txt | 3 + .../solr/collection1/conf/schema15.xml | 6 ++ 4 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java index 83d60671d4c..346d9afb95e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java @@ -36,16 +36,13 @@ import org.apache.lucene.analysis.util.TokenFilterFactory; */ public class LimitTokenCountFilterFactory extends TokenFilterFactory { + public static final String MAX_TOKEN_COUNT_KEY = "maxTokenCount"; int maxTokenCount; @Override public void init(Map args) { super.init( args ); - String maxTokenCountArg = args.get("maxTokenCount"); - if (maxTokenCountArg == null) { - throw new IllegalArgumentException("maxTokenCount is mandatory."); - } - maxTokenCount = Integer.parseInt(args.get(maxTokenCountArg)); + maxTokenCount = getInt(MAX_TOKEN_COUNT_KEY); } @Override diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java new file mode 100644 index 00000000000..9296d0d86ce --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java @@ -0,0 +1,55 @@ +package org.apache.lucene.analysis.miscellaneous; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; + +public class TestLimitTokenCountFilterFactory extends BaseTokenStreamTestCase { + + public void test() throws IOException { + LimitTokenCountFilterFactory factory = new LimitTokenCountFilterFactory(); + Map args = new HashMap(); + args.put(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3"); + factory.init(args); + String test = "A1 B2 C3 D4 E5 F6"; + MockTokenizer tok = new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false); + // LimitTokenCountFilter doesn't consume the entire stream that it wraps + tok.setEnableChecks(false); + TokenStream stream = factory.create(tok); + assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" }); + + // param is required + factory = new LimitTokenCountFilterFactory(); + args = new HashMap(); + IllegalArgumentException iae = null; + try { + factory.init(args); + } catch (IllegalArgumentException e) { + assertTrue("exception doesn't mention param: " + e.getMessage(), + 0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY)); + iae = e; + } + assertNotNull("no exception thrown", iae); + } +} diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 779d9d71562..389dc773c8e 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -81,6 +81,9 @@ Bug Fixes * SOLR-3940: Rejoining the leader election incorrectly triggers the code path for a fresh cluster start rather than fail over. (Mark Miller) +* SOLR-3961: Fixed error using LimitTokenCountFilterFactory + (Jack Krupansky, hossman) + Other Changes ---------------------- diff --git a/solr/core/src/test-files/solr/collection1/conf/schema15.xml b/solr/core/src/test-files/solr/collection1/conf/schema15.xml index 62e26fdb3f5..5c613dd81a6 100755 --- a/solr/core/src/test-files/solr/collection1/conf/schema15.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema15.xml @@ -269,6 +269,12 @@ + + + + + + From 33b30097aae95e67fa205b640323ae88f59f191e Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 18 Oct 2012 12:19:05 +0000 Subject: [PATCH 19/20] reduce RAM cost per unique field while writing postings git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1399607 13f79535-47bb-0310-9956-ffa450edef68 --- .../codecs/blockterms/BlockTermsWriter.java | 61 ++++++++++------ .../lucene/codecs/BlockTreeTermsWriter.java | 72 ++++++++++++------- 2 files changed, 84 insertions(+), 49 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java index a3b27a2890e..367e45e2b1e 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java @@ -69,7 +69,27 @@ public class BlockTermsWriter extends FieldsConsumer { final FieldInfos fieldInfos; FieldInfo currentField; private final TermsIndexWriterBase termsIndexWriter; - private final List fields = new ArrayList(); + + private static class FieldMetaData { + public final FieldInfo fieldInfo; + public final long numTerms; + public final long termsStartPointer; + public final long sumTotalTermFreq; + public final long sumDocFreq; + public final int docCount; + + public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) { + assert numTerms > 0; + this.fieldInfo = fieldInfo; + this.termsStartPointer = termsStartPointer; + this.numTerms = numTerms; + this.sumTotalTermFreq = sumTotalTermFreq; + this.sumDocFreq = sumDocFreq; + this.docCount = docCount; + } + } + + private final List fields = new ArrayList(); // private final String segment; @@ -108,9 +128,7 @@ public class BlockTermsWriter extends FieldsConsumer { assert currentField == null || currentField.name.compareTo(field.name) < 0; currentField = field; TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer()); - final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter); - fields.add(terms); - return terms; + return new TermsWriter(fieldIndexWriter, field, postingsWriter); } @Override @@ -118,27 +136,18 @@ public class BlockTermsWriter extends FieldsConsumer { try { - int nonZeroCount = 0; - for(TermsWriter field : fields) { - if (field.numTerms > 0) { - nonZeroCount++; - } - } - final long dirStart = out.getFilePointer(); - out.writeVInt(nonZeroCount); - for(TermsWriter field : fields) { - if (field.numTerms > 0) { - out.writeVInt(field.fieldInfo.number); - out.writeVLong(field.numTerms); - out.writeVLong(field.termsStartPointer); - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { - out.writeVLong(field.sumTotalTermFreq); - } - out.writeVLong(field.sumDocFreq); - out.writeVInt(field.docCount); + out.writeVInt(fields.size()); + for(FieldMetaData field : fields) { + out.writeVInt(field.fieldInfo.number); + out.writeVLong(field.numTerms); + out.writeVLong(field.termsStartPointer); + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { + out.writeVLong(field.sumTotalTermFreq); } + out.writeVLong(field.sumDocFreq); + out.writeVInt(field.docCount); } writeTrailer(dirStart); } finally { @@ -249,6 +258,14 @@ public class BlockTermsWriter extends FieldsConsumer { this.sumDocFreq = sumDocFreq; this.docCount = docCount; fieldIndexWriter.finish(out.getFilePointer()); + if (numTerms > 0) { + fields.add(new FieldMetaData(fieldInfo, + numTerms, + termsStartPointer, + sumTotalTermFreq, + sumDocFreq, + docCount)); + } } private int sharedPrefix(BytesRef term1, BytesRef term2) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java index c3c4c8cf5aa..39ced1d8e44 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java @@ -228,7 +228,30 @@ public class BlockTreeTermsWriter extends FieldsConsumer { final PostingsWriterBase postingsWriter; final FieldInfos fieldInfos; FieldInfo currentField; - private final List fields = new ArrayList(); + + private static class FieldMetaData { + public final FieldInfo fieldInfo; + public final BytesRef rootCode; + public final long numTerms; + public final long indexStartFP; + public final long sumTotalTermFreq; + public final long sumDocFreq; + public final int docCount; + + public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount) { + assert numTerms > 0; + this.fieldInfo = fieldInfo; + assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms; + this.rootCode = rootCode; + this.indexStartFP = indexStartFP; + this.numTerms = numTerms; + this.sumTotalTermFreq = sumTotalTermFreq; + this.sumDocFreq = sumDocFreq; + this.docCount = docCount; + } + } + + private final List fields = new ArrayList(); // private final String segment; /** Create a new writer. The number of items (terms or @@ -313,9 +336,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer { //if (DEBUG) System.out.println("\nBTTW.addField seg=" + segment + " field=" + field.name); assert currentField == null || currentField.name.compareTo(field.name) < 0; currentField = field; - final TermsWriter terms = new TermsWriter(field); - fields.add(terms); - return terms; + return new TermsWriter(field); } static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) { @@ -1007,6 +1028,14 @@ public class BlockTreeTermsWriter extends FieldsConsumer { // System.out.println("SAVED to " + dotFileName); // w.close(); // } + + fields.add(new FieldMetaData(fieldInfo, + ((PendingBlock) pending.get(0)).index.getEmptyOutput(), + numTerms, + indexStartFP, + sumTotalTermFreq, + sumDocFreq, + docCount)); } else { assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1; assert sumDocFreq == 0; @@ -1024,34 +1053,23 @@ public class BlockTreeTermsWriter extends FieldsConsumer { IOException ioe = null; try { - int nonZeroCount = 0; - for(TermsWriter field : fields) { - if (field.numTerms > 0) { - nonZeroCount++; - } - } - final long dirStart = out.getFilePointer(); final long indexDirStart = indexOut.getFilePointer(); - out.writeVInt(nonZeroCount); + out.writeVInt(fields.size()); - for(TermsWriter field : fields) { - if (field.numTerms > 0) { - //System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms"); - out.writeVInt(field.fieldInfo.number); - out.writeVLong(field.numTerms); - final BytesRef rootCode = ((PendingBlock) field.pending.get(0)).index.getEmptyOutput(); - assert rootCode != null: "field=" + field.fieldInfo.name + " numTerms=" + field.numTerms; - out.writeVInt(rootCode.length); - out.writeBytes(rootCode.bytes, rootCode.offset, rootCode.length); - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { - out.writeVLong(field.sumTotalTermFreq); - } - out.writeVLong(field.sumDocFreq); - out.writeVInt(field.docCount); - indexOut.writeVLong(field.indexStartFP); + for(FieldMetaData field : fields) { + //System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms"); + out.writeVInt(field.fieldInfo.number); + out.writeVLong(field.numTerms); + out.writeVInt(field.rootCode.length); + out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length); + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { + out.writeVLong(field.sumTotalTermFreq); } + out.writeVLong(field.sumDocFreq); + out.writeVInt(field.docCount); + indexOut.writeVLong(field.indexStartFP); } writeTrailer(out, dirStart); writeIndexTrailer(indexOut, indexDirStart); From 3088a66b17a1cf32522b3cc8ab1635fbb8a8da67 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 18 Oct 2012 12:34:08 +0000 Subject: [PATCH 20/20] only check SYSTEM_REQUIREMENTS.txt for lucene git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1399620 13f79535-47bb-0310-9956-ffa450edef68 --- dev-tools/scripts/smokeTestRelease.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py index 72cbf660c64..cc72aabf147 100644 --- a/dev-tools/scripts/smokeTestRelease.py +++ b/dev-tools/scripts/smokeTestRelease.py @@ -576,9 +576,9 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir): os.chdir(unpackPath) isSrc = artifact.find('-src') != -1 l = os.listdir(unpackPath) - textFiles = ['LICENSE', 'NOTICE', 'README', 'SYSTEM_REQUIREMENTS'] + textFiles = ['LICENSE', 'NOTICE', 'README'] if project == 'lucene': - textFiles.extend(('JRE_VERSION_MIGRATION', 'CHANGES', 'MIGRATE')) + textFiles.extend(('JRE_VERSION_MIGRATION', 'CHANGES', 'MIGRATE', 'SYSTEM_REQUIREMENTS')) if isSrc: textFiles.append('BUILD') for fileName in textFiles: @@ -629,6 +629,10 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir): if project == 'lucene': if len(l) > 0: raise RuntimeError('%s: unexpected files/dirs in artifact %s: %s' % (project, artifact, l)) + else: + # TODO: re-enable this check + if False and not os.path.exists('%s/solr/SYSTEM_REQUIREMENTS.txt' % unpackPath): + raise RuntimeError('%s: solr/SYSTEM_REQUIREMENTS.txt does not exist in artifact %s' % (project, artifact)) if isSrc: print(' make sure no JARs/WARs in src dist...')