diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py index dba87586f70..cc72aabf147 100644 --- a/dev-tools/scripts/smokeTestRelease.py +++ b/dev-tools/scripts/smokeTestRelease.py @@ -578,7 +578,7 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir): l = os.listdir(unpackPath) textFiles = ['LICENSE', 'NOTICE', 'README'] if project == 'lucene': - textFiles.extend(('JRE_VERSION_MIGRATION', 'CHANGES', 'MIGRATE')) + textFiles.extend(('JRE_VERSION_MIGRATION', 'CHANGES', 'MIGRATE', 'SYSTEM_REQUIREMENTS')) if isSrc: textFiles.append('BUILD') for fileName in textFiles: @@ -629,6 +629,10 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir): if project == 'lucene': if len(l) > 0: raise RuntimeError('%s: unexpected files/dirs in artifact %s: %s' % (project, artifact, l)) + else: + # TODO: re-enable this check + if False and not os.path.exists('%s/solr/SYSTEM_REQUIREMENTS.txt' % unpackPath): + raise RuntimeError('%s: solr/SYSTEM_REQUIREMENTS.txt does not exist in artifact %s' % (project, artifact)) if isSrc: print(' make sure no JARs/WARs in src dist...') diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d9863e204f4..24f04b348fb 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -56,6 +56,12 @@ Bug Fixes * LUCENE-4468: Fix rareish integer overflows in Block and Lucene40 postings formats (Robert Muir) + +* LUCENE-4486: Add support for ConstantScoreQuery in Highlighter. + (Simon Willnauer) + +* LUCENE-4485: When CheckIndex terms, terms/docs pairs and tokens, + these counts now all exclude deleted documents. (Mike McCandless) Optimizations diff --git a/lucene/SYSTEM_REQUIREMENTS.txt b/lucene/SYSTEM_REQUIREMENTS.txt new file mode 100644 index 00000000000..d5edcc6766e --- /dev/null +++ b/lucene/SYSTEM_REQUIREMENTS.txt @@ -0,0 +1,16 @@ +# System Requirements + +Apache Lucene runs of Java 6 or greater. When using Java 7, be sure to +install at least Update 1! With all Java versions it is strongly +recommended to not use experimental `-XX` JVM options. It is also +recommended to always use the latest update version of your Java VM, +because bugs may affect Lucene. An overview of known JVM bugs can be +found on http://wiki.apache.org/lucene-java/SunJavaBugs. + +CPU, disk and memory requirements are based on the many choices made in +implementing Lucene (document size, number of documents, and number of +hits retrieved to name a few). The benchmarks page has some information +related to performance on particular platforms. + +*To build Apache Lucene from source, refer to the `BUILD.txt` file in +the distribution directory.* diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java index 83d60671d4c..346d9afb95e 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilterFactory.java @@ -36,16 +36,13 @@ import org.apache.lucene.analysis.util.TokenFilterFactory; */ public class LimitTokenCountFilterFactory extends TokenFilterFactory { + public static final String MAX_TOKEN_COUNT_KEY = "maxTokenCount"; int maxTokenCount; @Override public void init(Map args) { super.init( args ); - String maxTokenCountArg = args.get("maxTokenCount"); - if (maxTokenCountArg == null) { - throw new IllegalArgumentException("maxTokenCount is mandatory."); - } - maxTokenCount = Integer.parseInt(args.get(maxTokenCountArg)); + maxTokenCount = getInt(MAX_TOKEN_COUNT_KEY); } @Override diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java new file mode 100644 index 00000000000..9296d0d86ce --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java @@ -0,0 +1,55 @@ +package org.apache.lucene.analysis.miscellaneous; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; + +public class TestLimitTokenCountFilterFactory extends BaseTokenStreamTestCase { + + public void test() throws IOException { + LimitTokenCountFilterFactory factory = new LimitTokenCountFilterFactory(); + Map args = new HashMap(); + args.put(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3"); + factory.init(args); + String test = "A1 B2 C3 D4 E5 F6"; + MockTokenizer tok = new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false); + // LimitTokenCountFilter doesn't consume the entire stream that it wraps + tok.setEnableChecks(false); + TokenStream stream = factory.create(tok); + assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" }); + + // param is required + factory = new LimitTokenCountFilterFactory(); + args = new HashMap(); + IllegalArgumentException iae = null; + try { + factory.init(args); + } catch (IllegalArgumentException e) { + assertTrue("exception doesn't mention param: " + e.getMessage(), + 0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY)); + iae = e; + } + assertNotNull("no exception thrown", iae); + } +} diff --git a/lucene/build.xml b/lucene/build.xml index b26d455ec79..d8d3f0fb440 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -33,6 +33,7 @@ - + diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java index a3b27a2890e..367e45e2b1e 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java @@ -69,7 +69,27 @@ public class BlockTermsWriter extends FieldsConsumer { final FieldInfos fieldInfos; FieldInfo currentField; private final TermsIndexWriterBase termsIndexWriter; - private final List fields = new ArrayList(); + + private static class FieldMetaData { + public final FieldInfo fieldInfo; + public final long numTerms; + public final long termsStartPointer; + public final long sumTotalTermFreq; + public final long sumDocFreq; + public final int docCount; + + public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) { + assert numTerms > 0; + this.fieldInfo = fieldInfo; + this.termsStartPointer = termsStartPointer; + this.numTerms = numTerms; + this.sumTotalTermFreq = sumTotalTermFreq; + this.sumDocFreq = sumDocFreq; + this.docCount = docCount; + } + } + + private final List fields = new ArrayList(); // private final String segment; @@ -108,9 +128,7 @@ public class BlockTermsWriter extends FieldsConsumer { assert currentField == null || currentField.name.compareTo(field.name) < 0; currentField = field; TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer()); - final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter); - fields.add(terms); - return terms; + return new TermsWriter(fieldIndexWriter, field, postingsWriter); } @Override @@ -118,27 +136,18 @@ public class BlockTermsWriter extends FieldsConsumer { try { - int nonZeroCount = 0; - for(TermsWriter field : fields) { - if (field.numTerms > 0) { - nonZeroCount++; - } - } - final long dirStart = out.getFilePointer(); - out.writeVInt(nonZeroCount); - for(TermsWriter field : fields) { - if (field.numTerms > 0) { - out.writeVInt(field.fieldInfo.number); - out.writeVLong(field.numTerms); - out.writeVLong(field.termsStartPointer); - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { - out.writeVLong(field.sumTotalTermFreq); - } - out.writeVLong(field.sumDocFreq); - out.writeVInt(field.docCount); + out.writeVInt(fields.size()); + for(FieldMetaData field : fields) { + out.writeVInt(field.fieldInfo.number); + out.writeVLong(field.numTerms); + out.writeVLong(field.termsStartPointer); + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { + out.writeVLong(field.sumTotalTermFreq); } + out.writeVLong(field.sumDocFreq); + out.writeVInt(field.docCount); } writeTrailer(dirStart); } finally { @@ -249,6 +258,14 @@ public class BlockTermsWriter extends FieldsConsumer { this.sumDocFreq = sumDocFreq; this.docCount = docCount; fieldIndexWriter.finish(out.getFilePointer()); + if (numTerms > 0) { + fields.add(new FieldMetaData(fieldInfo, + numTerms, + termsStartPointer, + sumTotalTermFreq, + sumDocFreq, + docCount)); + } } private int sharedPrefix(BytesRef term1, BytesRef term2) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java index 97e22f4a03d..936d4ed5d61 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java @@ -27,7 +27,7 @@ import java.util.TreeMap; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; // javadocs +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; @@ -52,7 +52,7 @@ import org.apache.lucene.util.automaton.Transition; // - build depth-N prefix hash? // - or: longer dense skip lists than just next byte? -/** Wraps {@link Lucene40PostingsFormat} format for on-disk +/** Wraps {@link Lucene41PostingsFormat} format for on-disk * storage, but then at read time loads and stores all * terms & postings directly in RAM as byte[], int[]. * @@ -100,12 +100,12 @@ public final class DirectPostingsFormat extends PostingsFormat { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - return PostingsFormat.forName("Lucene40").fieldsConsumer(state); + return PostingsFormat.forName("Lucene41").fieldsConsumer(state); } @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - FieldsProducer postings = PostingsFormat.forName("Lucene40").fieldsProducer(state); + FieldsProducer postings = PostingsFormat.forName("Lucene41").fieldsProducer(state); if (state.context.context != IOContext.Context.MERGE) { FieldsProducer loadedPostings; try { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing40PostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java similarity index 70% rename from lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing40PostingsFormat.java rename to lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java index faf8df2bbf8..9946062e09b 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing40PostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java @@ -18,28 +18,28 @@ package org.apache.lucene.codecs.pulsing; */ import org.apache.lucene.codecs.BlockTreeTermsWriter; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsBaseFormat; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; // javadocs +import org.apache.lucene.codecs.lucene41.Lucene41PostingsBaseFormat; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs /** - * Concrete pulsing implementation over {@link Lucene40PostingsFormat}. + * Concrete pulsing implementation over {@link Lucene41PostingsFormat}. * * @lucene.experimental */ -public class Pulsing40PostingsFormat extends PulsingPostingsFormat { +public class Pulsing41PostingsFormat extends PulsingPostingsFormat { - /** Inlines docFreq=1 terms, otherwise uses the normal "Lucene40" format. */ - public Pulsing40PostingsFormat() { + /** Inlines docFreq=1 terms, otherwise uses the normal "Lucene41" format. */ + public Pulsing41PostingsFormat() { this(1); } - /** Inlines docFreq=freqCutoff terms, otherwise uses the normal "Lucene40" format. */ - public Pulsing40PostingsFormat(int freqCutoff) { + /** Inlines docFreq=freqCutoff terms, otherwise uses the normal "Lucene41" format. */ + public Pulsing41PostingsFormat(int freqCutoff) { this(freqCutoff, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE); } - /** Inlines docFreq=freqCutoff terms, otherwise uses the normal "Lucene40" format. */ - public Pulsing40PostingsFormat(int freqCutoff, int minBlockSize, int maxBlockSize) { - super("Pulsing40", new Lucene40PostingsBaseFormat(), freqCutoff, minBlockSize, maxBlockSize); + /** Inlines docFreq=freqCutoff terms, otherwise uses the normal "Lucene41" format. */ + public Pulsing41PostingsFormat(int freqCutoff, int minBlockSize, int maxBlockSize) { + super("Pulsing41", new Lucene41PostingsBaseFormat(), freqCutoff, minBlockSize, maxBlockSize); } } diff --git a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 72b05c5e74e..22062983d91 100644 --- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -13,9 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat +org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat org.apache.lucene.codecs.memory.MemoryPostingsFormat org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat org.apache.lucene.codecs.memory.DirectPostingsFormat -org.apache.lucene.codecs.block.BlockPostingsFormat diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java index d45b6828d1a..141ff99f5fe 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/blockterms/TestFixedGapPostingsFormat.java @@ -19,8 +19,8 @@ package org.apache.lucene.codecs.blockterms; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; -import org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; +import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds; import org.apache.lucene.index.BasePostingsFormatTestCase; /** @@ -29,8 +29,8 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; // TODO: we should add an instantiation for VarGap too to TestFramework, and a test in this package // TODO: ensure both of these are also in rotation in RandomCodec public class TestFixedGapPostingsFormat extends BasePostingsFormatTestCase { - private final PostingsFormat postings = new Lucene40WithOrds(); - private final Codec codec = new Lucene40Codec() { + private final PostingsFormat postings = new Lucene41WithOrds(); + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java index 3bd9a90935c..6c3034c5fd3 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/bloom/TestBloomPostingsFormat.java @@ -19,15 +19,15 @@ package org.apache.lucene.codecs.bloom; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.index.BasePostingsFormatTestCase; /** * Basic tests for BloomPostingsFormat */ public class TestBloomPostingsFormat extends BasePostingsFormatTestCase { - private final PostingsFormat postings = new TestBloomFilteredLucene40Postings(); - private final Codec codec = new Lucene40Codec() { + private final PostingsFormat postings = new TestBloomFilteredLucene41Postings(); + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java index 9b25a4c1090..bb3a4824589 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java @@ -23,7 +23,7 @@ import java.util.Collections; import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; @@ -90,10 +90,10 @@ public class TestCompressingStoredFieldsFormat extends LuceneTestCase { if (random().nextBoolean() && (i % (data.length / 10) == 0)) { iw.w.close(); // switch codecs - if (iwConf.getCodec() instanceof Lucene40Codec) { + if (iwConf.getCodec() instanceof Lucene41Codec) { iwConf.setCodec(CompressingCodec.randomInstance(random())); } else { - iwConf.setCodec(new Lucene40Codec()); + iwConf.setCodec(new Lucene41Codec()); } iw = new RandomIndexWriter(random(), dir, iwConf); } diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java index e6338a0a674..93a1b548a62 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestFixedIntBlockPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.intblock; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat; import org.apache.lucene.index.BasePostingsFormatTestCase; @@ -29,7 +29,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; public class TestFixedIntBlockPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize blocksize private final PostingsFormat postings = new MockFixedIntBlockPostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java index c7955acc9f5..156f91840b0 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/intblock/TestVariableIntBlockPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.intblock; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat; import org.apache.lucene.index.BasePostingsFormatTestCase; @@ -29,7 +29,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; public class TestVariableIntBlockPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize blocksize private final PostingsFormat postings = new MockVariableIntBlockPostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java index caf55a83084..bab45bcc366 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestDirectPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.memory; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.index.BasePostingsFormatTestCase; /** @@ -29,7 +29,7 @@ public class TestDirectPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize parameters private final PostingsFormat postings = new DirectPostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryPostingsFormat.java index ca07382b3a9..93892c7b640 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestMemoryPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.memory; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.index.BasePostingsFormatTestCase; /** @@ -28,7 +28,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; public class TestMemoryPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize doPack private final PostingsFormat postings = new MemoryPostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java index 3e47dc549cc..6e53a631ea3 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java @@ -25,7 +25,6 @@ import java.util.Locale; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -52,7 +51,7 @@ import org.apache.lucene.util._TestUtil; public class Test10KPulsings extends LuceneTestCase { public void test10kPulsed() throws Exception { // we always run this test with pulsing codec. - Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing40PostingsFormat(1)); + Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1)); File f = _TestUtil.getTempDir("10kpulsed"); BaseDirectoryWrapper dir = newFSDirectory(f); @@ -103,7 +102,7 @@ public class Test10KPulsings extends LuceneTestCase { public void test10kNotPulsed() throws Exception { // we always run this test with pulsing codec. int freqCutoff = _TestUtil.nextInt(random(), 1, 10); - Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing40PostingsFormat(freqCutoff)); + Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(freqCutoff)); File f = _TestUtil.getTempDir("10knotpulsed"); BaseDirectoryWrapper dir = newFSDirectory(f); diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingPostingsFormat.java index 3156323df61..75271d33d17 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.pulsing; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.index.BasePostingsFormatTestCase; /** @@ -27,8 +27,8 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; */ public class TestPulsingPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize cutoff - private final PostingsFormat postings = new Pulsing40PostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final PostingsFormat postings = new Pulsing41PostingsFormat(); + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java index 488fca34baf..cfa520aef02 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java @@ -45,7 +45,7 @@ public class TestPulsingReuse extends LuceneTestCase { // TODO: this is a basic test. this thing is complicated, add more public void testSophisticatedReuse() throws Exception { // we always run this test with pulsing codec. - Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing40PostingsFormat(1)); + Codec cp = _TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1)); Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp)); diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java index 318822ce19d..8c6df1d93fd 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/sep/TestSepPostingsFormat.java @@ -19,7 +19,7 @@ package org.apache.lucene.codecs.sep; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; import org.apache.lucene.index.BasePostingsFormatTestCase; @@ -29,7 +29,7 @@ import org.apache.lucene.index.BasePostingsFormatTestCase; public class TestSepPostingsFormat extends BasePostingsFormatTestCase { // TODO: randomize cutoff private final PostingsFormat postings = new MockSepPostingsFormat(); - private final Codec codec = new Lucene40Codec() { + private final Codec codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return postings; diff --git a/lucene/common-build.xml b/lucene/common-build.xml index 1f9c09e8a0a..1f1b5c6cd74 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -771,6 +771,7 @@ + @@ -808,7 +809,7 @@ dynamicAssignmentRatio="${tests.dynamicAssignmentRatio}" shuffleOnSlave="true" - leaveTemporary="false" + leaveTemporary="${tests.leaveTemporary}" seed="${tests.seed}" heartbeat="${tests.heartbeat}" @@ -1116,6 +1117,10 @@ ant -Dtests.timestamps=on ... # Override forked JVM file.encoding ant -Dtests.file.encoding=XXX ... +# Don't remove temporary files under slave directories, even if +# the test passes. +ant -Dtests.leaveTemporary=true + # Output test files and reports. ${tests-output}/tests-report.txt - full ASCII tests report ${tests-output}/tests-failures.txt - failures only (if any) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java index c3c4c8cf5aa..39ced1d8e44 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java @@ -228,7 +228,30 @@ public class BlockTreeTermsWriter extends FieldsConsumer { final PostingsWriterBase postingsWriter; final FieldInfos fieldInfos; FieldInfo currentField; - private final List fields = new ArrayList(); + + private static class FieldMetaData { + public final FieldInfo fieldInfo; + public final BytesRef rootCode; + public final long numTerms; + public final long indexStartFP; + public final long sumTotalTermFreq; + public final long sumDocFreq; + public final int docCount; + + public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount) { + assert numTerms > 0; + this.fieldInfo = fieldInfo; + assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms; + this.rootCode = rootCode; + this.indexStartFP = indexStartFP; + this.numTerms = numTerms; + this.sumTotalTermFreq = sumTotalTermFreq; + this.sumDocFreq = sumDocFreq; + this.docCount = docCount; + } + } + + private final List fields = new ArrayList(); // private final String segment; /** Create a new writer. The number of items (terms or @@ -313,9 +336,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer { //if (DEBUG) System.out.println("\nBTTW.addField seg=" + segment + " field=" + field.name); assert currentField == null || currentField.name.compareTo(field.name) < 0; currentField = field; - final TermsWriter terms = new TermsWriter(field); - fields.add(terms); - return terms; + return new TermsWriter(field); } static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) { @@ -1007,6 +1028,14 @@ public class BlockTreeTermsWriter extends FieldsConsumer { // System.out.println("SAVED to " + dotFileName); // w.close(); // } + + fields.add(new FieldMetaData(fieldInfo, + ((PendingBlock) pending.get(0)).index.getEmptyOutput(), + numTerms, + indexStartFP, + sumTotalTermFreq, + sumDocFreq, + docCount)); } else { assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1; assert sumDocFreq == 0; @@ -1024,34 +1053,23 @@ public class BlockTreeTermsWriter extends FieldsConsumer { IOException ioe = null; try { - int nonZeroCount = 0; - for(TermsWriter field : fields) { - if (field.numTerms > 0) { - nonZeroCount++; - } - } - final long dirStart = out.getFilePointer(); final long indexDirStart = indexOut.getFilePointer(); - out.writeVInt(nonZeroCount); + out.writeVInt(fields.size()); - for(TermsWriter field : fields) { - if (field.numTerms > 0) { - //System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms"); - out.writeVInt(field.fieldInfo.number); - out.writeVLong(field.numTerms); - final BytesRef rootCode = ((PendingBlock) field.pending.get(0)).index.getEmptyOutput(); - assert rootCode != null: "field=" + field.fieldInfo.name + " numTerms=" + field.numTerms; - out.writeVInt(rootCode.length); - out.writeBytes(rootCode.bytes, rootCode.offset, rootCode.length); - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { - out.writeVLong(field.sumTotalTermFreq); - } - out.writeVLong(field.sumDocFreq); - out.writeVInt(field.docCount); - indexOut.writeVLong(field.indexStartFP); + for(FieldMetaData field : fields) { + //System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms"); + out.writeVInt(field.fieldInfo.number); + out.writeVLong(field.numTerms); + out.writeVInt(field.rootCode.length); + out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length); + if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) { + out.writeVLong(field.sumTotalTermFreq); } + out.writeVLong(field.sumDocFreq); + out.writeVInt(field.docCount); + indexOut.writeVLong(field.indexStartFP); } writeTrailer(out, dirStart); writeIndexTrailer(indexOut, indexDirStart); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java index 1892df6d300..7a473a3ed38 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java @@ -119,7 +119,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI { loader.reload(classloader); } - private static Codec defaultCodec = Codec.forName("Lucene40"); + private static Codec defaultCodec = Codec.forName("Lucene41"); /** expert: returns the default codec used for newly created * {@link IndexWriterConfig}s. diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java index 4dfae68232a..12f17197d2b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java @@ -21,13 +21,13 @@ package org.apache.lucene.codecs; * A codec that forwards all its method calls to another codec. *

* Extend this class when you need to reuse the functionality of an existing - * codec. For example, if you want to build a codec that redefines Lucene40's + * codec. For example, if you want to build a codec that redefines Lucene41's * {@link LiveDocsFormat}: *

  *   public final class CustomCodec extends FilterCodec {
  *
  *     public CustomCodec() {
- *       super("CustomCodec", new Lucene40Codec());
+ *       super("CustomCodec", new Lucene41Codec());
  *     }
  *
  *     public LiveDocsFormat liveDocsFormat() {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
index b98205ec8cd..a0d66af61d0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
@@ -36,12 +36,13 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
  * {@link FilterCodec}.
  *
  * @see org.apache.lucene.codecs.lucene40 package documentation for file format details.
- * @lucene.experimental
+ * @deprecated Only for reading old 4.0 segments
  */
 // NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever
 // if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
 // (it writes a minor version, etc).
-public class Lucene40Codec extends Codec {
+@Deprecated
+public final class Lucene40Codec extends Codec {
   private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat();
   private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
   private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
index df6611922e2..eaf452d6252 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
@@ -29,9 +29,10 @@ import org.apache.lucene.index.SegmentWriteState;
  * Provides a {@link PostingsReaderBase} and {@link
  * PostingsWriterBase}.
  *
- * @lucene.experimental */
+ * @deprecated Only for reading old 4.0 segments */
 
 // TODO: should these also be named / looked up via SPI?
+@Deprecated
 public final class Lucene40PostingsBaseFormat extends PostingsBaseFormat {
 
   /** Sole constructor. */
@@ -46,6 +47,6 @@ public final class Lucene40PostingsBaseFormat extends PostingsBaseFormat {
 
   @Override
   public PostingsWriterBase postingsWriterBase(SegmentWriteState state) throws IOException {
-    return new Lucene40PostingsWriter(state);
+    throw new UnsupportedOperationException("this codec can only be used for reading");
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
index 16d9c47ed97..1f9c28efdb4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
@@ -211,15 +211,18 @@ import org.apache.lucene.util.fst.FST; // javadocs
  * previous occurrence and an OffsetLength follows. Offset data is only written for
  * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.

* - * @lucene.experimental */ + * @deprecated Only for reading old 4.0 segments */ // TODO: this class could be created by wrapping // BlockTreeTermsDict around Lucene40PostingsBaseFormat; ie // we should not duplicate the code from that class here: -public final class Lucene40PostingsFormat extends PostingsFormat { +@Deprecated +public class Lucene40PostingsFormat extends PostingsFormat { - private final int minBlockSize; - private final int maxBlockSize; + /** minimum items (terms or sub-blocks) per block for BlockTree */ + protected final int minBlockSize; + /** maximum items (terms or sub-blocks) per block for BlockTree */ + protected final int maxBlockSize; /** Creates {@code Lucene40PostingsFormat} with default * settings. */ @@ -231,7 +234,7 @@ public final class Lucene40PostingsFormat extends PostingsFormat { * values for {@code minBlockSize} and {@code * maxBlockSize} passed to block terms dictionary. * @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */ - public Lucene40PostingsFormat(int minBlockSize, int maxBlockSize) { + private Lucene40PostingsFormat(int minBlockSize, int maxBlockSize) { super("Lucene40"); this.minBlockSize = minBlockSize; assert minBlockSize > 1; @@ -240,22 +243,7 @@ public final class Lucene40PostingsFormat extends PostingsFormat { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase docs = new Lucene40PostingsWriter(state); - - // TODO: should we make the terms index more easily - // pluggable? Ie so that this codec would record which - // index impl was used, and switch on loading? - // Or... you must make a new Codec for this? - boolean success = false; - try { - FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize); - success = true; - return ret; - } finally { - if (!success) { - docs.close(); - } - } + throw new UnsupportedOperationException("this codec can only be used for reading"); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java index 64d2e49b1ff..a3729e2f1da 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java @@ -45,10 +45,21 @@ import org.apache.lucene.util.IOUtils; * postings format. * * @see Lucene40PostingsFormat - * @lucene.experimental */ - + * @deprecated Only for reading old 4.0 segments */ +@Deprecated public class Lucene40PostingsReader extends PostingsReaderBase { + final static String TERMS_CODEC = "Lucene40PostingsWriterTerms"; + final static String FRQ_CODEC = "Lucene40PostingsWriterFrq"; + final static String PRX_CODEC = "Lucene40PostingsWriterPrx"; + + //private static boolean DEBUG = BlockTreeTermsWriter.DEBUG; + + // Increment version to change it: + final static int VERSION_START = 0; + final static int VERSION_LONG_SKIP = 1; + final static int VERSION_CURRENT = VERSION_LONG_SKIP; + private final IndexInput freqIn; private final IndexInput proxIn; // public static boolean DEBUG = BlockTreeTermsWriter.DEBUG; @@ -67,7 +78,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { try { freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION), ioContext); - CodecUtil.checkHeader(freqIn, Lucene40PostingsWriter.FRQ_CODEC, Lucene40PostingsWriter.VERSION_START,Lucene40PostingsWriter.VERSION_CURRENT); + CodecUtil.checkHeader(freqIn, FRQ_CODEC, VERSION_START, VERSION_CURRENT); // TODO: hasProx should (somehow!) become codec private, // but it's tricky because 1) FIS.hasProx is global (it // could be all fields that have prox are written by a @@ -79,7 +90,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { if (fieldInfos.hasProx()) { proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION), ioContext); - CodecUtil.checkHeader(proxIn, Lucene40PostingsWriter.PRX_CODEC, Lucene40PostingsWriter.VERSION_START,Lucene40PostingsWriter.VERSION_CURRENT); + CodecUtil.checkHeader(proxIn, PRX_CODEC, VERSION_START, VERSION_CURRENT); } else { proxIn = null; } @@ -97,8 +108,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase { public void init(IndexInput termsIn) throws IOException { // Make sure we are talking to the matching past writer - CodecUtil.checkHeader(termsIn, Lucene40PostingsWriter.TERMS_CODEC, - Lucene40PostingsWriter.VERSION_START, Lucene40PostingsWriter.VERSION_CURRENT); + CodecUtil.checkHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT); skipInterval = termsIn.readInt(); maxSkipLevels = termsIn.readInt(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java index 4cef37a5977..1580a390ba9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java @@ -28,8 +28,9 @@ import org.apache.lucene.store.IndexInput; * that stores positions and payloads. * * @see Lucene40PostingsFormat - * @lucene.experimental + * @deprecated Only for reading old 4.0 segments */ +@Deprecated public class Lucene40SkipListReader extends MultiLevelSkipListReader { private boolean currentFieldStoresPayloads; private boolean currentFieldStoresOffsets; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/ForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java similarity index 98% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/ForUtil.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java index fc52520f551..88f70a249c1 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/ForUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -28,7 +28,7 @@ import org.apache.lucene.util.packed.PackedInts.Decoder; import org.apache.lucene.util.packed.PackedInts.FormatAndBits; import org.apache.lucene.util.packed.PackedInts; -import static org.apache.lucene.codecs.block.BlockPostingsFormat.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE; /** * Encode all values in normal area with fixed bit width, diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java new file mode 100644 index 00000000000..48219582b75 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java @@ -0,0 +1,122 @@ +package org.apache.lucene.codecs.lucene41; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.TermVectorsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat; +import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat; +import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat; +import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; + +/** + * Implements the Lucene 4.1 index format, with configurable per-field postings formats. + *

+ * If you want to reuse functionality of this codec in another codec, extend + * {@link FilterCodec}. + * + * @see org.apache.lucene.codecs.lucene41 package documentation for file format details. + * @lucene.experimental + */ +// NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever +// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader +// (it writes a minor version, etc). +public class Lucene41Codec extends Codec { + private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat(); + private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat(); + private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat(); + private final DocValuesFormat docValuesFormat = new Lucene40DocValuesFormat(); + private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat(); + private final NormsFormat normsFormat = new Lucene40NormsFormat(); + private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat(); + + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return Lucene41Codec.this.getPostingsFormatForField(field); + } + }; + + /** Sole constructor. */ + public Lucene41Codec() { + super("Lucene41"); + } + + @Override + public final StoredFieldsFormat storedFieldsFormat() { + return fieldsFormat; + } + + @Override + public final TermVectorsFormat termVectorsFormat() { + return vectorsFormat; + } + + @Override + public final DocValuesFormat docValuesFormat() { + return docValuesFormat; + } + + @Override + public final PostingsFormat postingsFormat() { + return postingsFormat; + } + + @Override + public final FieldInfosFormat fieldInfosFormat() { + return fieldInfosFormat; + } + + @Override + public final SegmentInfoFormat segmentInfoFormat() { + return infosFormat; + } + + @Override + public final NormsFormat normsFormat() { + return normsFormat; + } + + @Override + public final LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } + + /** Returns the postings format that should be used for writing + * new segments of field. + * + * The default implementation always returns "Lucene41" + */ + public PostingsFormat getPostingsFormatForField(String field) { + return defaultFormat; + } + + private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41"); +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsBaseFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsBaseFormat.java new file mode 100644 index 00000000000..0360c0d2709 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsBaseFormat.java @@ -0,0 +1,51 @@ +package org.apache.lucene.codecs.lucene41; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.PostingsBaseFormat; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +/** + * Provides a {@link PostingsReaderBase} and {@link + * PostingsWriterBase}. + * + * @lucene.experimental */ + +// TODO: should these also be named / looked up via SPI? +public final class Lucene41PostingsBaseFormat extends PostingsBaseFormat { + + /** Sole constructor. */ + public Lucene41PostingsBaseFormat() { + super("Lucene41"); + } + + @Override + public PostingsReaderBase postingsReaderBase(SegmentReadState state) throws IOException { + return new Lucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); + } + + @Override + public PostingsWriterBase postingsWriterBase(SegmentWriteState state) throws IOException { + return new Lucene41PostingsWriter(state); + } +} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java similarity index 95% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java index 73ce8df62c4..3cbc9653bed 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* @@ -38,7 +38,7 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.packed.PackedInts; /** - * Block postings format, which encodes postings in packed integer blocks + * Lucene 4.1 postings format, which encodes postings in packed integer blocks * for fast decode. * *

NOTE: this format is still experimental and @@ -58,7 +58,7 @@ import org.apache.lucene.util.packed.PackedInts; * *

  • * Block structure: - *

    When the postings are long enough, BlockPostingsFormat will try to encode most integer data + *

    When the postings are long enough, Lucene41PostingsFormat will try to encode most integer data * as a packed block.

    *

    Take a term with 259 documents as an example, the first 256 document ids are encoded as two packed * blocks, while the remaining 3 are encoded as one VInt block.

    @@ -161,7 +161,7 @@ import org.apache.lucene.util.packed.PackedInts; *
  • SkipFPDelta determines the position of this term's SkipData within the .doc * file. In particular, it is the length of the TermFreq data. * SkipDelta is only stored if DocFreq is not smaller than SkipMinimum - * (i.e. 8 in BlockPostingsFormat).
  • + * (i.e. 8 in Lucene41PostingsFormat). * * * @@ -238,10 +238,10 @@ import org.apache.lucene.util.packed.PackedInts; * We use this trick since the definition of skip entry is a little different from base interface. * In {@link MultiLevelSkipListWriter}, skip data is assumed to be saved for * skipIntervalth, 2*skipIntervalth ... posting in the list. However, - * in BlockPostingsFormat, the skip data is saved for skipInterval+1th, + * in Lucene41PostingsFormat, the skip data is saved for skipInterval+1th, * 2*skipInterval+1th ... posting (skipInterval==PackedBlockSize in this case). * When DocFreq is multiple of PackedBlockSize, MultiLevelSkipListWriter will expect one - * more skip data than BlockSkipWriter. + * more skip data than Lucene41SkipWriter. *
  • SkipDatum is the metadata of one skip entry. * For the first block (no matter packed or VInt), it is omitted.
  • *
  • DocSkip records the document number of every PackedBlockSizeth document number in @@ -351,7 +351,7 @@ import org.apache.lucene.util.packed.PackedInts; * @lucene.experimental */ -public final class BlockPostingsFormat extends PostingsFormat { +public final class Lucene41PostingsFormat extends PostingsFormat { /** * Filename extension for document number, frequencies, and skip data. * See chapter: Frequencies and Skip Data @@ -380,12 +380,18 @@ public final class BlockPostingsFormat extends PostingsFormat { // NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding public final static int BLOCK_SIZE = 128; - public BlockPostingsFormat() { + /** Creates {@code Lucene41PostingsFormat} with default + * settings. */ + public Lucene41PostingsFormat() { this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE); } - public BlockPostingsFormat(int minTermBlockSize, int maxTermBlockSize) { - super("Block"); + /** Creates {@code Lucene41PostingsFormat} with custom + * values for {@code minBlockSize} and {@code + * maxBlockSize} passed to block terms dictionary. + * @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */ + public Lucene41PostingsFormat(int minTermBlockSize, int maxTermBlockSize) { + super("Lucene41"); this.minTermBlockSize = minTermBlockSize; assert minTermBlockSize > 1; this.maxTermBlockSize = maxTermBlockSize; @@ -399,7 +405,7 @@ public final class BlockPostingsFormat extends PostingsFormat { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new BlockPostingsWriter(state); + PostingsWriterBase postingsWriter = new Lucene41PostingsWriter(state); boolean success = false; try { @@ -418,7 +424,7 @@ public final class BlockPostingsFormat extends PostingsFormat { @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - PostingsReaderBase postingsReader = new BlockPostingsReader(state.dir, + PostingsReaderBase postingsReader = new Lucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java similarity index 95% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java index 77b766743f0..6292b18e6a7 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,9 +17,9 @@ package org.apache.lucene.codecs.block; * limitations under the License. */ -import static org.apache.lucene.codecs.block.BlockPostingsFormat.BLOCK_SIZE; -import static org.apache.lucene.codecs.block.ForUtil.MAX_DATA_SIZE; -import static org.apache.lucene.codecs.block.ForUtil.MAX_ENCODED_SIZE; +import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE; import java.io.IOException; import java.util.Arrays; @@ -49,10 +49,10 @@ import org.apache.lucene.util.IOUtils; * Concrete class that reads docId(maybe frq,pos,offset,payloads) list * with postings format. * - * @see BlockSkipReader for details - * + * @see Lucene41SkipReader for details + * @lucene.experimental */ -final class BlockPostingsReader extends PostingsReaderBase { +public final class Lucene41PostingsReader extends PostingsReaderBase { private final IndexInput docIn; private final IndexInput posIn; @@ -62,35 +62,36 @@ final class BlockPostingsReader extends PostingsReaderBase { // public static boolean DEBUG = false; - public BlockPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException { + /** Sole constructor. */ + public Lucene41PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException { boolean success = false; IndexInput docIn = null; IndexInput posIn = null; IndexInput payIn = null; try { - docIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.DOC_EXTENSION), + docIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), ioContext); CodecUtil.checkHeader(docIn, - BlockPostingsWriter.DOC_CODEC, - BlockPostingsWriter.VERSION_CURRENT, - BlockPostingsWriter.VERSION_CURRENT); + Lucene41PostingsWriter.DOC_CODEC, + Lucene41PostingsWriter.VERSION_CURRENT, + Lucene41PostingsWriter.VERSION_CURRENT); forUtil = new ForUtil(docIn); if (fieldInfos.hasProx()) { - posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.POS_EXTENSION), + posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), ioContext); CodecUtil.checkHeader(posIn, - BlockPostingsWriter.POS_CODEC, - BlockPostingsWriter.VERSION_CURRENT, - BlockPostingsWriter.VERSION_CURRENT); + Lucene41PostingsWriter.POS_CODEC, + Lucene41PostingsWriter.VERSION_CURRENT, + Lucene41PostingsWriter.VERSION_CURRENT); if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) { - payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, BlockPostingsFormat.PAY_EXTENSION), + payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), ioContext); CodecUtil.checkHeader(payIn, - BlockPostingsWriter.PAY_CODEC, - BlockPostingsWriter.VERSION_CURRENT, - BlockPostingsWriter.VERSION_CURRENT); + Lucene41PostingsWriter.PAY_CODEC, + Lucene41PostingsWriter.VERSION_CURRENT, + Lucene41PostingsWriter.VERSION_CURRENT); } } @@ -109,9 +110,9 @@ final class BlockPostingsReader extends PostingsReaderBase { public void init(IndexInput termsIn) throws IOException { // Make sure we are talking to the matching postings writer CodecUtil.checkHeader(termsIn, - BlockPostingsWriter.TERMS_CODEC, - BlockPostingsWriter.VERSION_CURRENT, - BlockPostingsWriter.VERSION_CURRENT); + Lucene41PostingsWriter.TERMS_CODEC, + Lucene41PostingsWriter.VERSION_CURRENT, + Lucene41PostingsWriter.VERSION_CURRENT); final int indexBlockSize = termsIn.readVInt(); if (indexBlockSize != BLOCK_SIZE) { throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")"); @@ -321,7 +322,7 @@ final class BlockPostingsReader extends PostingsReaderBase { private int docBufferUpto; - private BlockSkipReader skipper; + private Lucene41SkipReader skipper; private boolean skipped; final IndexInput startDocIn; @@ -353,7 +354,7 @@ final class BlockPostingsReader extends PostingsReaderBase { private Bits liveDocs; public BlockDocsEnum(FieldInfo fieldInfo) throws IOException { - this.startDocIn = BlockPostingsReader.this.docIn; + this.startDocIn = Lucene41PostingsReader.this.docIn; this.docIn = startDocIn.clone(); indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; @@ -486,8 +487,8 @@ final class BlockPostingsReader extends PostingsReaderBase { if (skipper == null) { // Lazy init: first time this enum has ever been used for skipping - skipper = new BlockSkipReader(docIn.clone(), - BlockPostingsWriter.maxSkipLevels, + skipper = new Lucene41SkipReader(docIn.clone(), + Lucene41PostingsWriter.maxSkipLevels, BLOCK_SIZE, indexHasPos, indexHasOffsets, @@ -502,7 +503,7 @@ final class BlockPostingsReader extends PostingsReaderBase { skipped = true; } - // always plus one to fix the result, since skip position in BlockSkipReader + // always plus one to fix the result, since skip position in Lucene41SkipReader // is a little different from MultiLevelSkipListReader final int newDocUpto = skipper.skipTo(target) + 1; @@ -577,7 +578,7 @@ final class BlockPostingsReader extends PostingsReaderBase { private int docBufferUpto; private int posBufferUpto; - private BlockSkipReader skipper; + private Lucene41SkipReader skipper; private boolean skipped; final IndexInput startDocIn; @@ -628,9 +629,9 @@ final class BlockPostingsReader extends PostingsReaderBase { private Bits liveDocs; public BlockDocsAndPositionsEnum(FieldInfo fieldInfo) throws IOException { - this.startDocIn = BlockPostingsReader.this.docIn; + this.startDocIn = Lucene41PostingsReader.this.docIn; this.docIn = startDocIn.clone(); - this.posIn = BlockPostingsReader.this.posIn.clone(); + this.posIn = Lucene41PostingsReader.this.posIn.clone(); encoded = new byte[MAX_ENCODED_SIZE]; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; indexHasPayloads = fieldInfo.hasPayloads(); @@ -797,8 +798,8 @@ final class BlockPostingsReader extends PostingsReaderBase { // if (DEBUG) { // System.out.println(" create skipper"); // } - skipper = new BlockSkipReader(docIn.clone(), - BlockPostingsWriter.maxSkipLevels, + skipper = new Lucene41SkipReader(docIn.clone(), + Lucene41PostingsWriter.maxSkipLevels, BLOCK_SIZE, true, indexHasOffsets, @@ -987,7 +988,7 @@ final class BlockPostingsReader extends PostingsReaderBase { private int docBufferUpto; private int posBufferUpto; - private BlockSkipReader skipper; + private Lucene41SkipReader skipper; private boolean skipped; final IndexInput startDocIn; @@ -1044,10 +1045,10 @@ final class BlockPostingsReader extends PostingsReaderBase { private Bits liveDocs; public EverythingEnum(FieldInfo fieldInfo) throws IOException { - this.startDocIn = BlockPostingsReader.this.docIn; + this.startDocIn = Lucene41PostingsReader.this.docIn; this.docIn = startDocIn.clone(); - this.posIn = BlockPostingsReader.this.posIn.clone(); - this.payIn = BlockPostingsReader.this.payIn.clone(); + this.posIn = Lucene41PostingsReader.this.posIn.clone(); + this.payIn = Lucene41PostingsReader.this.payIn.clone(); encoded = new byte[MAX_ENCODED_SIZE]; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; if (indexHasOffsets) { @@ -1282,8 +1283,8 @@ final class BlockPostingsReader extends PostingsReaderBase { // if (DEBUG) { // System.out.println(" create skipper"); // } - skipper = new BlockSkipReader(docIn.clone(), - BlockPostingsWriter.maxSkipLevels, + skipper = new Lucene41SkipReader(docIn.clone(), + Lucene41PostingsWriter.maxSkipLevels, BLOCK_SIZE, true, indexHasOffsets, diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java similarity index 92% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java index 305e1f38d51..19391afcaa7 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,9 +17,9 @@ package org.apache.lucene.codecs.block; * limitations under the License. */ -import static org.apache.lucene.codecs.block.BlockPostingsFormat.BLOCK_SIZE; -import static org.apache.lucene.codecs.block.ForUtil.MAX_DATA_SIZE; -import static org.apache.lucene.codecs.block.ForUtil.MAX_ENCODED_SIZE; +import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE; +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE; import java.io.IOException; import java.util.ArrayList; @@ -47,10 +47,10 @@ import org.apache.lucene.util.packed.PackedInts; * * Postings list for each term will be stored separately. * - * @see BlockSkipWriter for details about skipping setting and postings layout. - * + * @see Lucene41SkipWriter for details about skipping setting and postings layout. + * @lucene.experimental */ -final class BlockPostingsWriter extends PostingsWriterBase { +public final class Lucene41PostingsWriter extends PostingsWriterBase { /** * Expert: The maximum number of skip levels. Smaller values result in @@ -58,15 +58,14 @@ final class BlockPostingsWriter extends PostingsWriterBase { */ static final int maxSkipLevels = 10; - final static String TERMS_CODEC = "BlockPostingsWriterTerms"; - final static String DOC_CODEC = "BlockPostingsWriterDoc"; - final static String POS_CODEC = "BlockPostingsWriterPos"; - final static String PAY_CODEC = "BlockPostingsWriterPay"; + final static String TERMS_CODEC = "Lucene41PostingsWriterTerms"; + final static String DOC_CODEC = "Lucene41PostingsWriterDoc"; + final static String POS_CODEC = "Lucene41PostingsWriterPos"; + final static String PAY_CODEC = "Lucene41PostingsWriterPay"; - // Increment version to change it: + // Increment version to change it final static int VERSION_START = 0; - final static int VERSION_NO_OFFSETS_IN_SKIPDATA = 1; // LUCENE-4443 - final static int VERSION_CURRENT = VERSION_NO_OFFSETS_IN_SKIPDATA; + final static int VERSION_CURRENT = VERSION_START; final IndexOutput docOut; final IndexOutput posOut; @@ -112,12 +111,14 @@ final class BlockPostingsWriter extends PostingsWriterBase { final byte[] encoded; private final ForUtil forUtil; - private final BlockSkipWriter skipWriter; + private final Lucene41SkipWriter skipWriter; - public BlockPostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) throws IOException { + /** Creates a postings writer with the specified PackedInts overhead ratio */ + // TODO: does this ctor even make sense? + public Lucene41PostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) throws IOException { super(); - docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.DOC_EXTENSION), + docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), state.context); IndexOutput posOut = null; IndexOutput payOut = null; @@ -127,7 +128,7 @@ final class BlockPostingsWriter extends PostingsWriterBase { forUtil = new ForUtil(acceptableOverheadRatio, docOut); if (state.fieldInfos.hasProx()) { posDeltaBuffer = new int[MAX_DATA_SIZE]; - posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.POS_EXTENSION), + posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), state.context); CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT); @@ -148,7 +149,7 @@ final class BlockPostingsWriter extends PostingsWriterBase { } if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) { - payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.PAY_EXTENSION), + payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), state.context); CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT); } @@ -172,7 +173,7 @@ final class BlockPostingsWriter extends PostingsWriterBase { freqBuffer = new int[MAX_DATA_SIZE]; // TODO: should we try skipping every 2/4 blocks...? - skipWriter = new BlockSkipWriter(maxSkipLevels, + skipWriter = new Lucene41SkipWriter(maxSkipLevels, BLOCK_SIZE, state.segmentInfo.getDocCount(), docOut, @@ -182,7 +183,8 @@ final class BlockPostingsWriter extends PostingsWriterBase { encoded = new byte[MAX_ENCODED_SIZE]; } - public BlockPostingsWriter(SegmentWriteState state) throws IOException { + /** Creates a postings writer with PackedInts.COMPACT */ + public Lucene41PostingsWriter(SegmentWriteState state) throws IOException { this(state, PackedInts.COMPACT); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java similarity index 91% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java index e5803fd9696..483b0ec21df 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipReader.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -35,12 +35,12 @@ import org.apache.lucene.store.IndexInput; * 0 1 2 3 4 5 * d d d d d d (posting list) * ^ ^ (skip point in MultiLeveSkipWriter) - * ^ (skip point in BlockSkipWriter) + * ^ (skip point in Lucene41SkipWriter) * * In this case, MultiLevelSkipListReader will use the last document as a skip point, - * while BlockSkipReader should assume no skip point will comes. + * while Lucene41SkipReader should assume no skip point will comes. * - * If we use the interface directly in BlockSkipReader, it may silly try to read + * If we use the interface directly in Lucene41SkipReader, it may silly try to read * another skip data after the only skip point is loaded. * * To illustrate this, we can call skipTo(d[5]), since skip point d[3] has smaller docId, @@ -50,8 +50,8 @@ import org.apache.lucene.store.IndexInput; * Therefore, we'll trim df before passing it to the interface. see trim(int) * */ -final class BlockSkipReader extends MultiLevelSkipListReader { - // private boolean DEBUG = BlockPostingsReader.DEBUG; +final class Lucene41SkipReader extends MultiLevelSkipListReader { + // private boolean DEBUG = Lucene41PostingsReader.DEBUG; private final int blockSize; private long docPointer[]; @@ -66,7 +66,7 @@ final class BlockSkipReader extends MultiLevelSkipListReader { private long lastDocPointer; private int lastPosBufferUpto; - public BlockSkipReader(IndexInput skipStream, int maxSkipLevels, int blockSize, boolean hasPos, boolean hasOffsets, boolean hasPayloads) { + public Lucene41SkipReader(IndexInput skipStream, int maxSkipLevels, int blockSize, boolean hasPos, boolean hasOffsets, boolean hasPayloads) { super(skipStream, maxSkipLevels, blockSize, 8); this.blockSize = blockSize; docPointer = new long[maxSkipLevels]; @@ -91,7 +91,7 @@ final class BlockSkipReader extends MultiLevelSkipListReader { /** * Trim original docFreq to tell skipReader read proper number of skip points. * - * Since our definition in BlockSkip* is a little different from MultiLevelSkip* + * Since our definition in Lucene41Skip* is a little different from MultiLevelSkip* * This trimmed docFreq will prevent skipReader from: * 1. silly reading a non-existed skip point after the last block boundary * 2. moving into the vInt block diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java similarity index 94% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java rename to lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java index 409930c6fed..1bd082859d9 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SkipWriter.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.block; +package org.apache.lucene.codecs.lucene41; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -43,8 +43,8 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter; * 4. start offset. * */ -final class BlockSkipWriter extends MultiLevelSkipListWriter { - // private boolean DEBUG = BlockPostingsReader.DEBUG; +final class Lucene41SkipWriter extends MultiLevelSkipListWriter { + // private boolean DEBUG = Lucene41PostingsReader.DEBUG; private int[] lastSkipDoc; private long[] lastSkipDocPointer; @@ -66,7 +66,7 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter { private boolean fieldHasOffsets; private boolean fieldHasPayloads; - public BlockSkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) { + public Lucene41SkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) { super(blockSize, 8, maxSkipLevels, docCount); this.docOut = docOut; this.posOut = posOut; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html new file mode 100644 index 00000000000..14782803a1c --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html @@ -0,0 +1,396 @@ + + + + + + + +Lucene 4.1 file format. + +

    Apache Lucene - Index File Formats

    + + +

    Introduction

    +
    +

    This document defines the index file formats used in this version of Lucene. +If you are using a different version of Lucene, please consult the copy of +docs/ that was distributed with +the version you are using.

    +

    Apache Lucene is written in Java, but several efforts are underway to write +versions of +Lucene in other programming languages. If these versions are to remain +compatible with Apache Lucene, then a language-independent definition of the +Lucene index format is required. This document thus attempts to provide a +complete and independent definition of the Apache Lucene file formats.

    +

    As Lucene evolves, this document should evolve. Versions of Lucene in +different programming languages should endeavor to agree on file formats, and +generate new versions of this document.

    +
    + +

    Definitions

    +
    +

    The fundamental concepts in Lucene are index, document, field and term.

    +

    An index contains a sequence of documents.

    +
      +
    • A document is a sequence of fields.
    • +
    • A field is a named sequence of terms.
    • +
    • A term is a sequence of bytes.
    • +
    +

    The same sequence of bytes in two different fields is considered a different +term. Thus terms are represented as a pair: the string naming the field, and the +bytes within the field.

    + +

    Inverted Indexing

    +

    The index stores statistics about terms in order to make term-based search +more efficient. Lucene's index falls into the family of indexes known as an +inverted index. This is because it can list, for a term, the documents +that contain it. This is the inverse of the natural relationship, in which +documents list terms.

    + +

    Types of Fields

    +

    In Lucene, fields may be stored, in which case their text is stored +in the index literally, in a non-inverted manner. Fields that are inverted are +called indexed. A field may be both stored and indexed.

    +

    The text of a field may be tokenized into terms to be indexed, or the +text of a field may be used literally as a term to be indexed. Most fields are +tokenized, but sometimes it is useful for certain identifier fields to be +indexed literally.

    +

    See the {@link org.apache.lucene.document.Field Field} +java docs for more information on Fields.

    + +

    Segments

    +

    Lucene indexes may be composed of multiple sub-indexes, or segments. +Each segment is a fully independent index, which could be searched separately. +Indexes evolve by:

    +
      +
    1. Creating new segments for newly added documents.
    2. +
    3. Merging existing segments.
    4. +
    +

    Searches may involve multiple segments and/or multiple indexes, each index +potentially composed of a set of segments.

    + +

    Document Numbers

    +

    Internally, Lucene refers to documents by an integer document number. +The first document added to an index is numbered zero, and each subsequent +document added gets a number one greater than the previous.

    +

    Note that a document's number may change, so caution should be taken when +storing these numbers outside of Lucene. In particular, numbers may change in +the following situations:

    +
      +
    • +

      The numbers stored in each segment are unique only within the segment, and +must be converted before they can be used in a larger context. The standard +technique is to allocate each segment a range of values, based on the range of +numbers used in that segment. To convert a document number from a segment to an +external value, the segment's base document number is added. To convert +an external value back to a segment-specific value, the segment is identified +by the range that the external value is in, and the segment's base value is +subtracted. For example two five document segments might be combined, so that +the first segment has a base value of zero, and the second of five. Document +three from the second segment would have an external value of eight.

      +
    • +
    • +

      When documents are deleted, gaps are created in the numbering. These are +eventually removed as the index evolves through merging. Deleted documents are +dropped when segments are merged. A freshly-merged segment thus has no gaps in +its numbering.

      +
    • +
    +
    + +

    Index Structure Overview

    +
    +

    Each segment index maintains the following:

    +
      +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment info}. + This contains metadata about a segment, such as the number of documents, + what files it uses, +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Field names}. + This contains the set of field names used in the index. +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat Stored Field values}. +This contains, for each document, a list of attribute-value pairs, where the attributes +are field names. These are used to store auxiliary information about the document, such as +its title, url, or an identifier to access a database. The set of stored fields are what is +returned for each hit when searching. This is keyed by document number. +
    • +
    • +{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}. +A dictionary containing all of the terms used in all of the +indexed fields of all of the documents. The dictionary also contains the number +of documents which contain the term, and pointers to the term's frequency and +proximity data. +
    • +
    • +{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Frequency data}. +For each term in the dictionary, the numbers of all the +documents that contain that term, and the frequency of the term in that +document, unless frequencies are omitted (IndexOptions.DOCS_ONLY) +
    • +
    • +{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Proximity data}. +For each term in the dictionary, the positions that the +term occurs in each document. Note that this will not exist if all fields in +all documents omit position data. +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40NormsFormat Normalization factors}. +For each field in each document, a value is stored +that is multiplied into the score for hits on that field. +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vectors}. +For each field in each document, the term vector (sometimes +called document vector) may be stored. A term vector consists of term text and +term frequency. To add Term Vectors to your index see the +{@link org.apache.lucene.document.Field Field} constructors +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat Per-document values}. +Like stored values, these are also keyed by document +number, but are generally intended to be loaded into main memory for fast +access. Whereas stored values are generally intended for summary results from +searches, per-document values are useful for things like scoring factors. +
    • +
    • +{@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted documents}. +An optional file indicating which documents are deleted. +
    • +
    +

    Details on each of these are provided in their linked pages.

    +
    + +

    File Naming

    +
    +

    All files belonging to a segment have the same name with varying extensions. +The extensions correspond to the different file formats described below. When +using the Compound File format (default in 1.4 and greater) these files (except +for the Segment info file, the Lock file, and Deleted documents file) are collapsed +into a single .cfs file (see below for details)

    +

    Typically, all segments in an index are stored in a single directory, +although this is not required.

    +

    As of version 2.1 (lock-less commits), file names are never re-used (there +is one exception, "segments.gen", see below). That is, when any file is saved +to the Directory it is given a never before used filename. This is achieved +using a simple generations approach. For example, the first segments file is +segments_1, then segments_2, etc. The generation is a sequential long integer +represented in alpha-numeric (base 36) form.

    +
    + +

    Summary of File Extensions

    +
    +

    The following table summarizes the names and extensions of the files in +Lucene:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameExtensionBrief Description
    {@link org.apache.lucene.index.SegmentInfos Segments File}segments.gen, segments_NStores information about a commit point
    Lock Filewrite.lockThe Write lock prevents multiple IndexWriters from writing to the same +file.
    {@link org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat Segment Info}.siStores metadata about a segment
    {@link org.apache.lucene.store.CompoundFileDirectory Compound File}.cfs, .cfeAn optional "virtual" file consisting of all the other index files for +systems that frequently run out of file handles.
    {@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Fields}.fnmStores information about the fields
    {@link org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat Field Index}.fdxContains pointers to field data
    {@link org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat Field Data}.fdtThe stored fields for documents
    {@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Dictionary}.timThe term dictionary, stores term info
    {@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term Index}.tipThe index into the Term Dictionary
    {@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Frequencies}.docContains the list of docs which contain each term along with frequency
    {@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Positions}.posStores position information about where a term occurs in the index
    {@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Payloads}.payStores additional per-position metadata information such as character offsets and user payloads
    {@link org.apache.lucene.codecs.lucene40.Lucene40NormsFormat Norms}.nrm.cfs, .nrm.cfeEncodes length and boost factors for docs and fields
    {@link org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat Per-Document Values}.dv.cfs, .dv.cfeEncodes additional scoring factors or other per-document information.
    {@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Index}.tvxStores offset into the document data file
    {@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Documents}.tvdContains information about each document that has term vectors
    {@link org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat Term Vector Fields}.tvfThe field level info about term vectors
    {@link org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat Deleted Documents}.delInfo about what files are deleted
    +
    + +

    Lock File

    +The write lock, which is stored in the index directory by default, is named +"write.lock". If the lock directory is different from the index directory then +the write lock will be named "XXXX-write.lock" where XXXX is a unique prefix +derived from the full path to the index directory. When this file is present, a +writer is currently modifying the index (adding or removing documents). This +lock file ensures that only one writer is modifying the index at a time.

    + +

    History

    +

    Compatibility notes are provided in this document, describing how file +formats have changed from prior versions:

    +
      +
    • In version 2.1, the file format was changed to allow lock-less commits (ie, +no more commit lock). The change is fully backwards compatible: you can open a +pre-2.1 index for searching or adding/deleting of docs. When the new segments +file is saved (committed), it will be written in the new file format (meaning +no specific "upgrade" process is needed). But note that once a commit has +occurred, pre-2.1 Lucene will not be able to read the index.
    • +
    • In version 2.3, the file format was changed to allow segments to share a +single set of doc store (vectors & stored fields) files. This allows for +faster indexing in certain cases. The change is fully backwards compatible (in +the same way as the lock-less commits change in 2.1).
    • +
    • In version 2.4, Strings are now written as true UTF-8 byte sequence, not +Java's modified UTF-8. See +LUCENE-510 for details.
    • +
    • In version 2.9, an optional opaque Map<String,String> CommitUserData +may be passed to IndexWriter's commit methods (and later retrieved), which is +recorded in the segments_N file. See +LUCENE-1382 for details. Also, +diagnostics were added to each segment written recording details about why it +was written (due to flush, merge; which OS/JRE was used; etc.). See issue +LUCENE-1654 for details.
    • +
    • In version 3.0, compressed fields are no longer written to the index (they +can still be read, but on merge the new segment will write them, uncompressed). +See issue LUCENE-1960 +for details.
    • +
    • In version 3.1, segments records the code version that created them. See +LUCENE-2720 for details. +Additionally segments track explicitly whether or not they have term vectors. +See LUCENE-2811 +for details.
    • +
    • In version 3.2, numeric fields are written as natively to stored fields +file, previously they were stored in text format only.
    • +
    • In version 3.4, fields can omit position data while still indexing term +frequencies.
    • +
    • In version 4.0, the format of the inverted index became extensible via +the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage +({@link org.apache.lucene.index.DocValues DocValues}) was introduced. Normalization +factors need no longer be a single byte, they can be any DocValues +{@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode +strings, they can be any byte sequence. Term offsets can optionally be indexed +into the postings lists. Payloads can be stored in the term vectors.
    • +
    • In version 4.1, the format of the postings list changed to use either +of FOR compression or variable-byte encoding, depending upon the frequency +of the term.
    • +
    + +

    Limitations

    +
    +

    When referring to term numbers, Lucene's current implementation uses a Java +int to hold the term index, which means the +maximum number of unique terms in any single index segment is ~2.1 billion +times the term index interval (default 128) = ~274 billion. This is technically +not a limitation of the index file format, just of Lucene's current +implementation.

    +

    Similarly, Lucene uses a Java int to refer to +document numbers, and the index file format uses an Int32 +on-disk to store document numbers. This is a limitation +of both the index file format and the current implementation. Eventually these +should be replaced with either UInt64 values, or +better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.

    +
    + + diff --git a/lucene/core/src/java/org/apache/lucene/codecs/package.html b/lucene/core/src/java/org/apache/lucene/codecs/package.html index e6de64d057b..91a65458ac1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/package.html @@ -61,8 +61,8 @@ name of your codec. If you just want to customise the {@link org.apache.lucene.codecs.PostingsFormat}, or use different postings formats for different fields, then you can register your custom postings format in the same way (in META-INF/services/org.apache.lucene.codecs.PostingsFormat), and then extend the default - {@link org.apache.lucene.codecs.lucene40.Lucene40Codec} and override - {@link org.apache.lucene.codecs.lucene40.Lucene40Codec#getPostingsFormatForField(String)} to return your custom + {@link org.apache.lucene.codecs.lucene41.Lucene41Codec} and override + {@link org.apache.lucene.codecs.lucene41.Lucene41Codec#getPostingsFormatForField(String)} to return your custom postings format.

    diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index bf5df24a0a2..371c48990ab 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -233,9 +233,12 @@ public class CheckIndex { TermIndexStatus() { } - /** Total term count */ + /** Number of terms with at least one live doc. */ public long termCount = 0L; + /** Number of terms with zero live docs docs. */ + public long delTermCount = 0L; + /** Total frequency across all terms. */ public long totFreq = 0L; @@ -750,7 +753,7 @@ public class CheckIndex { final TermsEnum termsEnum = terms.iterator(null); boolean hasOrd = true; - final long termCountStart = status.termCount; + final long termCountStart = status.delTermCount + status.termCount; BytesRef lastTerm = null; @@ -781,7 +784,6 @@ public class CheckIndex { if (docFreq <= 0) { throw new RuntimeException("docfreq: " + docFreq + " is out of bounds"); } - status.totFreq += docFreq; sumDocFreq += docFreq; docs = termsEnum.docs(liveDocs, docs); @@ -796,15 +798,13 @@ public class CheckIndex { } if (hasOrd) { - final long ordExpected = status.termCount - termCountStart; + final long ordExpected = status.delTermCount + status.termCount - termCountStart; if (ord != ordExpected) { throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected); } } } - status.termCount++; - final DocsEnum docs2; if (postings != null) { docs2 = postings; @@ -820,6 +820,7 @@ public class CheckIndex { if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } + status.totFreq++; visitedDocs.set(doc); int freq = -1; if (hasFreqs) { @@ -883,6 +884,12 @@ public class CheckIndex { } } + if (docCount != 0) { + status.termCount++; + } else { + status.delTermCount++; + } + final long totalTermFreq2 = termsEnum.totalTermFreq(); final boolean hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1; @@ -1063,11 +1070,11 @@ public class CheckIndex { // check unique term count long termCount = -1; - if (status.termCount-termCountStart > 0) { + if ((status.delTermCount+status.termCount)-termCountStart > 0) { termCount = fields.terms(field).size(); - if (termCount != -1 && termCount != status.termCount - termCountStart) { - throw new RuntimeException("termCount mismatch " + termCount + " vs " + (status.termCount - termCountStart)); + if (termCount != -1 && termCount != status.delTermCount + status.termCount - termCountStart) { + throw new RuntimeException("termCount mismatch " + (status.delTermCount + termCount) + " vs " + (status.termCount - termCountStart)); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index ab56cd585fa..9b0204dbab5 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -34,7 +34,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.FieldInfos.FieldNumbers; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.MergeState.CheckAbort; @@ -42,7 +41,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; @@ -52,7 +50,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.Constants; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; -import org.apache.lucene.util.MutableBits; import org.apache.lucene.util.ThreadInterruptedException; /** @@ -3118,13 +3115,11 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { mergedDeletes.dropChanges(); } readerPool.release(mergedDeletes); - if (dropSegment) { - readerPool.drop(mergedDeletes.info); - } } if (dropSegment) { assert !segmentInfos.contains(merge.info); + readerPool.drop(merge.info); deleter.deleteNewFiles(merge.info.files()); } @@ -3736,8 +3731,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer(); - - if (poolReaders && mergedSegmentWarmer != null) { + if (poolReaders && mergedSegmentWarmer != null && merge.info.info.getDocCount() != 0) { final ReadersAndLiveDocs rld = readerPool.get(merge.info, true); final SegmentReader sr = rld.getReader(IOContext.READ); try { diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java index 7652fa24211..9201642a750 100755 --- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java @@ -19,7 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; // javadocs +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -186,14 +186,14 @@ public class LiveIndexWriterConfig { * NOTE: This parameter does not apply to all PostingsFormat implementations, * including the default one in this release. It only makes sense for term indexes * that are implemented as a fixed gap between terms. For example, - * {@link Lucene40PostingsFormat} implements the term index instead based upon how + * {@link Lucene41PostingsFormat} implements the term index instead based upon how * terms share prefixes. To configure its parameters (the minimum and maximum size - * for a block), you would instead use {@link Lucene40PostingsFormat#Lucene40PostingsFormat(int, int)}. + * for a block), you would instead use {@link Lucene41PostingsFormat#Lucene41PostingsFormat(int, int)}. * which can also be configured on a per-field basis: *
    -   * //customize Lucene40PostingsFormat, passing minBlockSize=50, maxBlockSize=100
    -   * final PostingsFormat tweakedPostings = new Lucene40PostingsFormat(50, 100);
    -   * iwc.setCodec(new Lucene40Codec() {
    +   * //customize Lucene41PostingsFormat, passing minBlockSize=50, maxBlockSize=100
    +   * final PostingsFormat tweakedPostings = new Lucene41PostingsFormat(50, 100);
    +   * iwc.setCodec(new Lucene41Codec() {
        *   @Override
        *   public PostingsFormat getPostingsFormatForField(String field) {
        *     if (field.equals("fieldWithTonsOfTerms"))
    diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
    index 82c3e5c58f8..de1cc736ea9 100644
    --- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
    +++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
    @@ -14,3 +14,4 @@
     #  limitations under the License.
     
     org.apache.lucene.codecs.lucene40.Lucene40Codec
    +org.apache.lucene.codecs.lucene41.Lucene41Codec
    diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
    index 112a1698302..023d9c9e1a6 100644
    --- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
    +++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
    @@ -14,3 +14,4 @@
     #  limitations under the License.
     
     org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat
    +org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat
    diff --git a/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java
    index aa5bf5bd2d8..4dcb5353fc1 100644
    --- a/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java
    +++ b/lucene/core/src/test/org/apache/lucene/TestExternalCodecs.java
    @@ -19,7 +19,7 @@ package org.apache.lucene;
     
     import org.apache.lucene.analysis.*;
     import org.apache.lucene.codecs.*;
    -import org.apache.lucene.codecs.lucene40.Lucene40Codec;
    +import org.apache.lucene.codecs.lucene41.Lucene41Codec;
     import org.apache.lucene.document.*;
     import org.apache.lucene.index.*;
     import org.apache.lucene.search.*;
    @@ -31,11 +31,11 @@ import org.apache.lucene.util.*;
     
     public class TestExternalCodecs extends LuceneTestCase {
     
    -  private static final class CustomPerFieldCodec extends Lucene40Codec {
    +  private static final class CustomPerFieldCodec extends Lucene41Codec {
         
         private final PostingsFormat ramFormat = PostingsFormat.forName("RAMOnly");
    -    private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene40");
    -    private final PostingsFormat pulsingFormat = PostingsFormat.forName("Pulsing40");
    +    private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
    +    private final PostingsFormat pulsingFormat = PostingsFormat.forName("Pulsing41");
     
         @Override
         public PostingsFormat getPostingsFormatForField(String field) {
    diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
    index e5a0ae51ff0..98c7cb5b2f2 100644
    --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
    +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
    @@ -38,11 +38,12 @@ import org.apache.lucene.util.LineFileDocs;
     import org.apache.lucene.util.LuceneTestCase;
     import org.apache.lucene.util._TestUtil;
     
    +// TODO: really this should be in BaseTestPF or somewhere else? useful test!
     public class TestReuseDocsEnum extends LuceneTestCase {
     
       public void testReuseDocsEnumNoReuse() throws IOException {
         Directory dir = newDirectory();
    -    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat());
    +    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
         RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
         int numdocs = atLeast(20);
    @@ -69,7 +70,7 @@ public class TestReuseDocsEnum extends LuceneTestCase {
       // tests for reuse only if bits are the same either null or the same instance
       public void testReuseDocsEnumSameBitsOrNull() throws IOException {
         Directory dir = newDirectory();
    -    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat());
    +    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
         RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
         int numdocs = atLeast(20);
    @@ -113,7 +114,7 @@ public class TestReuseDocsEnum extends LuceneTestCase {
       // make sure we never reuse from another reader even if it is the same field & codec etc
       public void testReuseDocsEnumDifferentReader() throws IOException {
         Directory dir = newDirectory();
    -    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat());
    +    Codec cp = _TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
         RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
         int numdocs = atLeast(20);
    diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java
    similarity index 71%
    rename from lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat.java
    rename to lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java
    index 235c85fe2e7..dd3231e36c3 100644
    --- a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat.java
    +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat.java
    @@ -1,4 +1,4 @@
    -package org.apache.lucene.codecs.block;
    +package org.apache.lucene.codecs.lucene41;
     
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
    @@ -18,22 +18,13 @@ package org.apache.lucene.codecs.block;
      */
     
     import org.apache.lucene.codecs.Codec;
    -import org.apache.lucene.codecs.PostingsFormat;
    -import org.apache.lucene.codecs.block.BlockPostingsFormat;
    -import org.apache.lucene.codecs.lucene40.Lucene40Codec;
     import org.apache.lucene.index.BasePostingsFormatTestCase;
     
     /**
      * Tests BlockPostingsFormat
      */
     public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
    -  private final PostingsFormat postings = new BlockPostingsFormat();
    -  private final Codec codec = new Lucene40Codec() {
    -    @Override
    -    public PostingsFormat getPostingsFormatForField(String field) {
    -      return postings;
    -    }
    -  };
    +  private final Codec codec = new Lucene41Codec();
     
       @Override
       protected Codec getCodec() {
    diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java
    similarity index 91%
    rename from lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat2.java
    rename to lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java
    index 8b462d2e587..0a49540f73d 100644
    --- a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat2.java
    +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat2.java
    @@ -1,4 +1,4 @@
    -package org.apache.lucene.codecs.block;
    +package org.apache.lucene.codecs.lucene41;
     
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
    @@ -19,7 +19,6 @@ package org.apache.lucene.codecs.block;
     
     import org.apache.lucene.analysis.MockAnalyzer;
     import org.apache.lucene.codecs.PostingsFormat;
    -import org.apache.lucene.codecs.lucene40.Lucene40Codec;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
     import org.apache.lucene.document.FieldType;
    @@ -47,10 +46,10 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase {
         super.setUp();
         dir = newFSDirectory(_TestUtil.getTempDir("testDFBlockSize"));
         iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    -    iwc.setCodec(new Lucene40Codec() {
    +    iwc.setCodec(new Lucene41Codec() {
           @Override
           public PostingsFormat getPostingsFormatForField(String field) {
    -        return PostingsFormat.forName("Block");
    +        return PostingsFormat.forName("Lucene41");
           }
         });
         iw = new RandomIndexWriter(random(), dir, iwc);
    @@ -88,7 +87,7 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase {
       /** tests terms with df = blocksize */
       public void testDFBlockSize() throws Exception {
         Document doc = newDocument();
    -    for (int i = 0; i < BlockPostingsFormat.BLOCK_SIZE; i++) {
    +    for (int i = 0; i < Lucene41PostingsFormat.BLOCK_SIZE; i++) {
           for (Field f : doc.getFields()) {
             f.setStringValue(f.name() + " " + f.name() + "_2");
           }
    @@ -99,7 +98,7 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase {
       /** tests terms with df % blocksize = 0 */
       public void testDFBlockSizeMultiple() throws Exception {
         Document doc = newDocument();
    -    for (int i = 0; i < BlockPostingsFormat.BLOCK_SIZE * 16; i++) {
    +    for (int i = 0; i < Lucene41PostingsFormat.BLOCK_SIZE * 16; i++) {
           for (Field f : doc.getFields()) {
             f.setStringValue(f.name() + " " + f.name() + "_2");
           }
    @@ -110,7 +109,7 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase {
       /** tests terms with ttf = blocksize */
       public void testTTFBlockSize() throws Exception {
         Document doc = newDocument();
    -    for (int i = 0; i < BlockPostingsFormat.BLOCK_SIZE/2; i++) {
    +    for (int i = 0; i < Lucene41PostingsFormat.BLOCK_SIZE/2; i++) {
           for (Field f : doc.getFields()) {
             f.setStringValue(f.name() + " " + f.name() + " " + f.name() + "_2 " + f.name() + "_2");
           }
    @@ -121,7 +120,7 @@ public class TestBlockPostingsFormat2 extends LuceneTestCase {
       /** tests terms with ttf % blocksize = 0 */
       public void testTTFBlockSizeMultiple() throws Exception {
         Document doc = newDocument();
    -    for (int i = 0; i < BlockPostingsFormat.BLOCK_SIZE/2; i++) {
    +    for (int i = 0; i < Lucene41PostingsFormat.BLOCK_SIZE/2; i++) {
           for (Field f : doc.getFields()) {
             String proto = (f.name() + " " + f.name() + " " + f.name() + " " + f.name() + " " 
                            + f.name() + "_2 " + f.name() + "_2 " + f.name() + "_2 " + f.name() + "_2");
    diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat3.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java
    similarity index 98%
    rename from lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat3.java
    rename to lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java
    index 9ef0aae6726..34bd00789d3 100644
    --- a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestBlockPostingsFormat3.java
    +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestBlockPostingsFormat3.java
    @@ -1,4 +1,4 @@
    -package org.apache.lucene.codecs.block;
    +package org.apache.lucene.codecs.lucene41;
     
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
    @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.MockVariableLengthPayloadFilter;
     import org.apache.lucene.analysis.TokenFilter;
     import org.apache.lucene.analysis.Tokenizer;
     import org.apache.lucene.codecs.PostingsFormat;
    -import org.apache.lucene.codecs.lucene40.Lucene40Codec;
    +import org.apache.lucene.codecs.lucene41.Lucene41Codec;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
     import org.apache.lucene.document.FieldType;
    @@ -64,7 +64,7 @@ import org.apache.lucene.util.automaton.RegExp;
      * Tests partial enumeration (only pulling a subset of the prox data) 
      */
     public class TestBlockPostingsFormat3 extends LuceneTestCase {
    -  static final int MAXDOC = BlockPostingsFormat.BLOCK_SIZE * 20;
    +  static final int MAXDOC = Lucene41PostingsFormat.BLOCK_SIZE * 20;
       
       // creates 6 fields with different options and does "duels" of fields against each other
       public void test() throws Exception {
    @@ -85,10 +85,10 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
           }
         };
         IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    -    iwc.setCodec(new Lucene40Codec() {
    +    iwc.setCodec(new Lucene41Codec() {
           @Override
           public PostingsFormat getPostingsFormatForField(String field) {
    -        return PostingsFormat.forName("Block");
    +        return PostingsFormat.forName("Lucene41");
             // TODO: we could actually add more fields implemented with different PFs
           }
         });
    diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestForUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java
    similarity index 92%
    rename from lucene/codecs/src/test/org/apache/lucene/codecs/block/TestForUtil.java
    rename to lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java
    index 025a6348164..3831033a6ea 100644
    --- a/lucene/codecs/src/test/org/apache/lucene/codecs/block/TestForUtil.java
    +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestForUtil.java
    @@ -1,4 +1,4 @@
    -package org.apache.lucene.codecs.block;
    +package org.apache.lucene.codecs.lucene41;
     
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
    @@ -17,9 +17,9 @@ package org.apache.lucene.codecs.block;
      * limitations under the License.
      */
     
    -import static org.apache.lucene.codecs.block.BlockPostingsFormat.BLOCK_SIZE;
    -import static org.apache.lucene.codecs.block.ForUtil.MAX_DATA_SIZE;
    -import static org.apache.lucene.codecs.block.ForUtil.MAX_ENCODED_SIZE;
    +import static org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat.BLOCK_SIZE;
    +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_DATA_SIZE;
    +import static org.apache.lucene.codecs.lucene41.ForUtil.MAX_ENCODED_SIZE;
     
     import java.io.IOException;
     import java.util.Arrays;
    diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
    index dac3b5af94a..582e774d126 100644
    --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
    +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
    @@ -21,10 +21,10 @@ import java.io.IOException;
     import org.apache.lucene.analysis.MockAnalyzer;
     import org.apache.lucene.codecs.Codec;
     import org.apache.lucene.codecs.PostingsFormat;
    -import org.apache.lucene.codecs.lucene40.Lucene40Codec;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
    +import org.apache.lucene.codecs.lucene41.Lucene41Codec;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
     import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat;
    -import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat;
    +import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
     import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
    @@ -142,7 +142,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
     
         assertQuery(new Term("content", "ccc"), dir, 10);
         assertQuery(new Term("content", "aaa"), dir, 10);
    -    Lucene40Codec codec = (Lucene40Codec)iwconf.getCodec();
    +    Lucene41Codec codec = (Lucene41Codec)iwconf.getCodec();
     
         iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
             .setOpenMode(OpenMode.APPEND).setCodec(codec);
    @@ -158,7 +158,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
         }
         addDocs2(writer, 10);
         writer.commit();
    -    codec = (Lucene40Codec)iwconf.getCodec();
    +    codec = (Lucene41Codec)iwconf.getCodec();
         assertEquals(30, writer.maxDoc());
         assertQuery(new Term("content", "bbb"), dir, 10);
         assertQuery(new Term("content", "ccc"), dir, 10);   ////
    @@ -200,8 +200,8 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
     
       }
     
    -  public static class MockCodec extends Lucene40Codec {
    -    final PostingsFormat lucene40 = new Lucene40PostingsFormat();
    +  public static class MockCodec extends Lucene41Codec {
    +    final PostingsFormat lucene40 = new Lucene41PostingsFormat();
         final PostingsFormat simpleText = new SimpleTextPostingsFormat();
         final PostingsFormat mockSep = new MockSepPostingsFormat();
         
    @@ -217,8 +217,8 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
         }
       }
     
    -  public static class MockCodec2 extends Lucene40Codec {
    -    final PostingsFormat lucene40 = new Lucene40PostingsFormat();
    +  public static class MockCodec2 extends Lucene41Codec {
    +    final PostingsFormat lucene40 = new Lucene41PostingsFormat();
         final PostingsFormat simpleText = new SimpleTextPostingsFormat();
         
         @Override
    @@ -268,13 +268,13 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
       }
       
       public void testSameCodecDifferentInstance() throws Exception {
    -    Codec codec = new Lucene40Codec() {
    +    Codec codec = new Lucene41Codec() {
           @Override
           public PostingsFormat getPostingsFormatForField(String field) {
             if ("id".equals(field)) {
    -          return new Pulsing40PostingsFormat(1);
    +          return new Pulsing41PostingsFormat(1);
             } else if ("date".equals(field)) {
    -          return new Pulsing40PostingsFormat(1);
    +          return new Pulsing41PostingsFormat(1);
             } else {
               return super.getPostingsFormatForField(field);
             }
    @@ -284,13 +284,13 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
       }
       
       public void testSameCodecDifferentParams() throws Exception {
    -    Codec codec = new Lucene40Codec() {
    +    Codec codec = new Lucene41Codec() {
           @Override
           public PostingsFormat getPostingsFormatForField(String field) {
             if ("id".equals(field)) {
    -          return new Pulsing40PostingsFormat(1);
    +          return new Pulsing41PostingsFormat(1);
             } else if ("date".equals(field)) {
    -          return new Pulsing40PostingsFormat(2);
    +          return new Pulsing41PostingsFormat(2);
             } else {
               return super.getPostingsFormatForField(field);
             }
    diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
    index 31b4d190f80..42db793e0db 100755
    --- a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
    +++ b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
    @@ -27,8 +27,8 @@ import org.apache.lucene.analysis.MockAnalyzer;
     import org.apache.lucene.codecs.Codec;
     import org.apache.lucene.codecs.FilterCodec;
     import org.apache.lucene.codecs.PostingsFormat;
    -import org.apache.lucene.codecs.lucene40.Lucene40Codec;
    -import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat;
    +import org.apache.lucene.codecs.lucene41.Lucene41Codec;
    +import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
     import org.apache.lucene.document.FieldType;
    @@ -1058,9 +1058,9 @@ public class TestAddIndexes extends LuceneTestCase {
         aux2.close();
       }
     
    -  private static final class CustomPerFieldCodec extends Lucene40Codec {
    +  private static final class CustomPerFieldCodec extends Lucene41Codec {
         private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText");
    -    private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene40");
    +    private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
         private final PostingsFormat mockSepFormat = PostingsFormat.forName("MockSep");
     
         @Override
    @@ -1109,7 +1109,7 @@ public class TestAddIndexes extends LuceneTestCase {
       
       private static final class UnRegisteredCodec extends FilterCodec {
         public UnRegisteredCodec() {
    -      super("NotRegistered", new Lucene40Codec());
    +      super("NotRegistered", new Lucene41Codec());
         }
       }
       
    @@ -1138,7 +1138,7 @@ public class TestAddIndexes extends LuceneTestCase {
           Directory dir = newDirectory();
           IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
               new MockAnalyzer(random()));
    -      conf.setCodec(_TestUtil.alwaysPostingsFormat(new Pulsing40PostingsFormat(1 + random().nextInt(20))));
    +      conf.setCodec(_TestUtil.alwaysPostingsFormat(new Pulsing41PostingsFormat(1 + random().nextInt(20))));
           IndexWriter w = new IndexWriter(dir, conf);
           try {
             w.addIndexes(toAdd);
    diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
    similarity index 95%
    rename from lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java
    rename to lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
    index d6535df143d..8466b90a521 100644
    --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java
    +++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
    @@ -1,4 +1,4 @@
    -package org.apache.lucene.codecs.lucene40;
    +package org.apache.lucene.index;
     
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
    @@ -35,13 +35,13 @@ import org.apache.lucene.util.LuceneTestCase;
     import org.apache.lucene.util._TestUtil;
     
     /**
    - * Test that a plain Lucene40Codec puts codec headers in all files.
    + * Test that a plain default puts codec headers in all files.
      */
     public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
       public void test() throws Exception {
         Directory dir = newDirectory();
         IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    -    conf.setCodec(Codec.forName("Lucene40"));
    +    conf.setCodec(Codec.forName("Lucene41"));
         // riw should sometimes create docvalues fields, etc
         RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
         Document doc = new Document();
    diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
    index c450fbc6998..cd33794142d 100644
    --- a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
    +++ b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
    @@ -75,8 +75,8 @@ public class TestCheckIndex extends LuceneTestCase {
     
         assertNotNull(seg.termIndexStatus);
         assertNull(seg.termIndexStatus.error);
    -    assertEquals(19, seg.termIndexStatus.termCount);
    -    assertEquals(19, seg.termIndexStatus.totFreq);
    +    assertEquals(18, seg.termIndexStatus.termCount);
    +    assertEquals(18, seg.termIndexStatus.totFreq);
         assertEquals(18, seg.termIndexStatus.totPos);
     
         assertNotNull(seg.storedFieldStatus);
    diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java
    index 3bc247dcc1f..08819143a1c 100644
    --- a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java
    +++ b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java
    @@ -28,7 +28,7 @@ import java.util.Random;
     import java.util.Set;
     
     import org.apache.lucene.analysis.MockAnalyzer;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
     import org.apache.lucene.document.FieldType;
    @@ -828,7 +828,7 @@ public void testFilesOpenClose() throws IOException {
       // LUCENE-1609: don't load terms index
       public void testNoTermsIndex() throws Throwable {
         Directory dir = newDirectory();
    -    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())));
    +    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())));
         Document doc = new Document();
         doc.add(newTextField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO));
         doc.add(newTextField("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO));
    @@ -848,7 +848,7 @@ public void testFilesOpenClose() throws IOException {
         writer = new IndexWriter(
             dir,
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).
    -            setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())).
    +            setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())).
                 setMergePolicy(newLogMergePolicy(10))
         );
         writer.addDocument(doc);
    diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java b/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java
    index 22b2360fcd8..5c5adce7650 100644
    --- a/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java
    +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocTermOrds.java
    @@ -110,7 +110,7 @@ public class TestDocTermOrds extends LuceneTestCase {
         // Sometimes swap in codec that impls ord():
         if (random().nextInt(10) == 7) {
           // Make sure terms index has ords:
    -      Codec codec = _TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene40WithOrds"));
    +      Codec codec = _TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene41WithOrds"));
           conf.setCodec(codec);
         }
         
    @@ -207,7 +207,7 @@ public class TestDocTermOrds extends LuceneTestCase {
     
         // Sometimes swap in codec that impls ord():
         if (random().nextInt(10) == 7) {
    -      Codec codec = _TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene40WithOrds"));
    +      Codec codec = _TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene41WithOrds"));
           conf.setCodec(codec);
         }
         
    diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
    index 0c9bd4b5a79..bb304c4a666 100644
    --- a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
    +++ b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
    @@ -61,7 +61,7 @@ public class TestDuelingCodecs extends LuceneTestCase {
       public void setUp() throws Exception {
         super.setUp();
     
    -    // for now its SimpleText vs Lucene40(random postings format)
    +    // for now its SimpleText vs Lucene41(random postings format)
         // as this gives the best overall coverage. when we have more
         // codecs we should probably pick 2 from Codec.availableCodecs()
         
    diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFlex.java b/lucene/core/src/test/org/apache/lucene/index/TestFlex.java
    index 76a1ee53481..2bd65a9ab85 100644
    --- a/lucene/core/src/test/org/apache/lucene/index/TestFlex.java
    +++ b/lucene/core/src/test/org/apache/lucene/index/TestFlex.java
    @@ -19,7 +19,7 @@ package org.apache.lucene.index;
     
     import org.apache.lucene.store.*;
     import org.apache.lucene.analysis.*;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
     import org.apache.lucene.document.*;
     import org.apache.lucene.util.*;
     
    @@ -65,7 +65,7 @@ public class TestFlex extends LuceneTestCase {
       public void testTermOrd() throws Exception {
         Directory d = newDirectory();
         IndexWriter w = new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT,
    -                                                             new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())));
    +                                                             new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())));
         Document doc = new Document();
         doc.add(newTextField("f", "a b c", Field.Store.NO));
         w.addDocument(doc);
    diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
    index 42de0b6e414..2ee0449b5d0 100644
    --- a/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
    +++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
    @@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicInteger;
     
     import org.apache.lucene.analysis.*;
     import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
     import org.apache.lucene.store.Directory;
    @@ -69,7 +69,7 @@ public class TestMultiLevelSkipList extends LuceneTestCase {
     
       public void testSimpleSkip() throws IOException {
         Directory dir = new CountingRAMDirectory(new RAMDirectory());
    -    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new PayloadAnalyzer()).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())).setMergePolicy(newLogMergePolicy()));
    +    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new PayloadAnalyzer()).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())).setMergePolicy(newLogMergePolicy()));
         Term term = new Term("test", "a");
         for (int i = 0; i < 5000; i++) {
           Document d1 = new Document();
    diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermEnum.java
    index 4554243ca1a..1379a3c223e 100644
    --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermEnum.java
    +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentTermEnum.java
    @@ -24,7 +24,7 @@ import org.apache.lucene.util.LuceneTestCase;
     import org.apache.lucene.util.BytesRef;
     import org.apache.lucene.util._TestUtil;
     import org.apache.lucene.analysis.MockAnalyzer;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.index.IndexWriterConfig.OpenMode;
     import org.apache.lucene.store.Directory;
    @@ -75,7 +75,7 @@ public class TestSegmentTermEnum extends LuceneTestCase {
     
       public void testPrevTermAtEnd() throws IOException
       {
    -    IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())));
    +    IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())));
         addDoc(writer, "aaa bbb");
         writer.close();
         SegmentReader reader = getOnlySegmentReader(DirectoryReader.open(dir));
    diff --git a/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java b/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java
    index ef7d047b06d..bff508ff8ea 100644
    --- a/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java
    +++ b/lucene/core/src/test/org/apache/lucene/util/TestNamedSPILoader.java
    @@ -25,8 +25,8 @@ import org.apache.lucene.codecs.Codec;
     // enough to test the basics via Codec
     public class TestNamedSPILoader extends LuceneTestCase {
       public void testLookup() {
    -    Codec codec = Codec.forName("Lucene40");
    -    assertEquals("Lucene40", codec.getName());
    +    Codec codec = Codec.forName("Lucene41");
    +    assertEquals("Lucene41", codec.getName());
       }
       
       // we want an exception if its not found.
    @@ -39,6 +39,6 @@ public class TestNamedSPILoader extends LuceneTestCase {
       
       public void testAvailableServices() {
         Set codecs = Codec.availableCodecs();
    -    assertTrue(codecs.contains("Lucene40"));
    +    assertTrue(codecs.contains("Lucene41"));
       }
     }
    diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    index 4b45302bc27..4412738cdff 100644
    --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    @@ -141,6 +141,11 @@ public class WeightedSpanTermExtractor {
           extractWeightedSpanTerms(terms, (SpanQuery) query);
         } else if (query instanceof FilteredQuery) {
           extract(((FilteredQuery) query).getQuery(), terms);
    +    } else if (query instanceof ConstantScoreQuery) {
    +      final Query q = ((ConstantScoreQuery) query).getQuery();
    +      if (q != null) {
    +        extract(q, terms);
    +      }
         } else if (query instanceof DisjunctionMaxQuery) {
           for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
             extract(iterator.next(), terms);
    diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    index 7ce0731e1ef..115fc10f2f6 100644
    --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    @@ -603,7 +603,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
         // Not sure we can assert anything here - just running to check we dont
         // throw any exceptions
       }
    -
    +  
       public void testSpanHighlighting() throws Exception {
         Query query1 = new SpanNearQuery(new SpanQuery[] {
             new SpanTermQuery(new Term(FIELD_NAME, "wordx")),
    @@ -663,6 +663,31 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
     
         helper.start();
       }
    +  
    +  public void testGetBestFragmentsConstantScore() throws Exception {
    +    TestHighlightRunner helper = new TestHighlightRunner() {
    +
    +      @Override
    +      public void run() throws Exception {
    +        numHighlights = 0;
    +        if (random().nextBoolean()) {
    +          BooleanQuery bq = new BooleanQuery();
    +          bq.add(new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery(
    +              new Term(FIELD_NAME, "kennedy")))), Occur.MUST);
    +          bq.add(new ConstantScoreQuery(new TermQuery(new Term(FIELD_NAME, "kennedy"))), Occur.MUST);
    +          doSearching(bq);
    +        } else {
    +          doSearching(new ConstantScoreQuery(new TermQuery(new Term(FIELD_NAME,
    +              "kennedy"))));
    +        }
    +        doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
    +        assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
    +            numHighlights == 4);
    +      }
    +    };
    +
    +    helper.start();
    +  }
     
       public void testGetFuzzyFragments() throws Exception {
         TestHighlightRunner helper = new TestHighlightRunner() {
    diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    index 68ac8106fd9..77dcedf5036 100644
    --- a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
     import org.apache.lucene.analysis.MockAnalyzer;
     import org.apache.lucene.analysis.MockTokenFilter;
     import org.apache.lucene.analysis.MockTokenizer;
    -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
    +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
     import org.apache.lucene.index.AtomicReader;
    @@ -123,7 +123,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
         Directory ramdir = new RAMDirectory();
         Analyzer analyzer = randomAnalyzer();
         IndexWriter writer = new IndexWriter(ramdir,
    -                                         new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat())));
    +                                         new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(_TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat())));
         Document doc = new Document();
         Field field1 = newTextField("foo", fooField.toString(), Field.Store.NO);
         Field field2 = newTextField("term", termField.toString(), Field.Store.NO);
    diff --git a/lucene/site/xsl/index.xsl b/lucene/site/xsl/index.xsl
    index 08352bcdb82..bf77408d671 100644
    --- a/lucene/site/xsl/index.xsl
    +++ b/lucene/site/xsl/index.xsl
    @@ -63,9 +63,10 @@
             

    Reference Documents

    • Changes: List of changes in this release.
    • +
    • System Requirements: Minimum and supported Java versions.
    • Migration Guide: What changed in Lucene 4; how to migrate code from Lucene 3.x.
    • JRE Version Migration: Information about upgrading between major JRE versions.
    • -
    • File Formats: Guide to the supported index format used by Lucene. This can be customized by using an alternate codec.
    • +
    • File Formats: Guide to the supported index format used by Lucene. This can be customized by using an alternate codec.
    • Search and Scoring in Lucene: Introduction to how Lucene scores documents.
    • Classic Scoring Formula: Formula of Lucene's classic Vector Space implementation. (look here for other models)
    • Classic QueryParser Syntax: Overview of the Classic QueryParser's syntax and features.
    • diff --git a/lucene/spatial/src/test-files/data/LUCENE-4464.txt b/lucene/spatial/src/test-files/data/LUCENE-4464.txt new file mode 100644 index 00000000000..dfb5a40a739 --- /dev/null +++ b/lucene/spatial/src/test-files/data/LUCENE-4464.txt @@ -0,0 +1,3 @@ +#id name shape +poly1 poly1 POLYGON ((-93.17288720912401 45.280265431486754, -93.17232270645628 45.2802724629027, -93.17229737711205 45.279497574052314, -93.1722224854913 45.277577770983854, -93.17218124644266 45.276747010395624, -93.16722650828461 45.276819421108826, -93.16581262076448 45.27684404529939, -93.16363038333625 45.276882054199596, -93.16249244695301 45.276929493877525, -93.16247370542268 45.27641118002343, -93.16246893668628 45.276279382682894, -93.1624671302382 45.274701063846244, -93.16246679905096 45.273381422360785, -93.16247689122851 45.273189685068424, -93.16249146710186 45.27291249464421, -93.16249868565903 45.272467966062614, -93.16247955957382 45.27177209534391, -93.1624787718002 45.27127651548793, -93.16247840794293 45.27104491547271, -93.16247917486976 45.27087000356473, -93.1624817727418 45.270279315147775, -93.16252487154968 45.26996729342093, -93.16254025661699 45.26976826077157, -93.16247902564132 45.269527941604, -93.16242684845764 45.2692774997531, -93.16242126018722 45.26894470083864, -93.16241263011544 45.26769394309626, -93.16246809168283 45.26571736107859, -93.16247263940593 45.26195548919013, -93.16253090997651 45.258615729449964, -93.16256878834184 45.25650987969364, -93.1626048203569 45.2546538608912, -93.16265873943591 45.251876274357876, -93.16275002007988 45.2510418534315, -93.16282237443883 45.25042383853711, -93.16286421513767 45.249181538840595, -93.16288289220509 45.24862697953288, -93.1629601120395 45.248250613185206, -93.16301002807151 45.24802483983211, -93.16301621932013 45.247670020958665, -93.16301519349018 45.247478630666144, -93.16303001333274 45.24727504082362, -93.16303463142393 45.24713931946277, -93.16302280990728 45.2470107542477, -93.16298327344437 45.24685970499298, -93.16294217154733 45.246633449219054, -93.16294315088282 45.246419514713516, -93.16295754265565 45.24621538933992, -93.16296755618336 45.24580786412655, -93.16296268372803 45.245362220836384, -93.16296319568123 45.245046689033444, -93.16297766811293 45.24481357093532, -93.16296370759883 45.2445699039253, -93.16294931051515 45.24231310924752, -93.16294559876471 45.24173111255096, -93.16295568091667 45.240776604513705, -93.1629609359182 45.24053954238007, -93.1629658719288 45.24019639978025, -93.1625355179785 45.24018482062359, -93.15847246037083 45.24007549519542, -93.15641780558727 45.24006372373029, -93.15470331938288 45.24002991133718, -93.1515176880772 45.240038275846665, -93.14892151971884 45.24004508944476, -93.14597353408716 45.240012024375574, -93.14198169289922 45.239944427606616, -93.14246140322608 45.21441838866706, -93.14239730934507 45.20842345035032, -93.14240307538512 45.203669567890245, -93.13209436867183 45.20385828388066, -93.13238731320574 45.19696183064252, -93.13244550539693 45.19559178376392, -93.13255875219626 45.19292582294682, -93.12747185962866 45.19303831675316, -93.12741613255534 45.196689407842044, -93.12341724811418 45.196748516850086, -93.12336451543653 45.19630050937325, -93.12233270487748 45.19631189179194, -93.12244695905335 45.18943470505876, -93.12752867296823 45.18931969757398, -93.1275981937757 45.18579899512077, -93.12249095182051 45.18589579364393, -93.12250905286206 45.18230218633591, -93.11745336177542 45.182234528897865, -93.11742994994425 45.17494109686777, -93.11234677240823 45.174914625057596, -93.11232755368056 45.178541858988, -93.09142510557425 45.17830768889981, -93.0878908215621 45.18208021181682, -93.04087986544745 45.182020129318005, -93.02011304608662 45.18206919600553, -92.99725469269949 45.18154883703301, -92.9866455346556 45.18162938363265, -92.98002761377205 45.181741313792635, -92.97460481311676 45.1817232745721, -92.95815138711436 45.18159971137449, -92.95832448011389 45.16710586357575, -92.95821211351648 45.15266682925307, -92.94804883291458 45.152678829402525, -92.94820512323935 45.14582287000843, -92.94821449767262 45.14541149629351, -92.93808126859899 45.145435393255234, -92.938064080176 45.1464755574292, -92.93790172782569 45.15630033200825, -92.93776855788026 45.156299483202375, -92.93416458772786 45.15627656196406, -92.92776593175911 45.156235863288074, -92.92779198321185 45.15260820059608, -92.9228643837518 45.15257871636257, -92.91761510291013 45.15254730117589, -92.91755895303478 45.15978011255037, -92.90742527225278 45.15975884768774, -92.90734951861361 45.16700513027527, -92.90243435593408 45.16697925148226, -92.90226994175299 45.16697838648701, -92.90228225598396 45.16960751885433, -92.90228682505473 45.170583562524534, -92.89838293958822 45.17058359192683, -92.89776337384279 45.17058359923907, -92.89720228241329 45.170636798053465, -92.89720546113311 45.171648743169875, -92.89721045187194 45.17323675651512, -92.89721215942521 45.17377958217219, -92.8972133713998 45.17416655315385, -92.89752994284902 45.17416793500262, -92.90230392627396 45.174188700362095, -92.90230695467396 45.17483317173849, -92.90230939234701 45.175352265892315, -92.90231342163983 45.17620891826606, -92.9023378718661 45.18141217320357, -92.89829195794003 45.18137903577816, -92.89197067471983 45.181327269964534, -92.86573042754982 45.18111238484799, -92.86537258386163 45.18110945889712, -92.86579788828743 45.16683341076013, -92.85850341291456 45.166840495697045, -92.85576616527777 45.1668651317465, -92.8455814929548 45.16695680639518, -92.8403672382906 45.167003741522834, -92.84037534438275 45.166359277271084, -92.83914257524022 45.166407761467035, -92.83786182101709 45.16655768366541, -92.83762301824869 45.16658563659705, -92.83700510809494 45.16665797101126, -92.83700330475195 45.1670405349812, -92.83520392476423 45.16704646605868, -92.83519998302931 45.1672093811339, -92.83518241658018 45.17114095264113, -92.8351705215998 45.17380185475555, -92.83516823773242 45.17431412368648, -92.82501384033566 45.174380025018145, -92.82373302900695 45.174963166130034, -92.82127603798283 45.17799740439804, -92.81495695139105 45.17798284134312, -92.81498212776123 45.18394380043827, -92.81496335262872 45.20297631525698, -92.81496300732859 45.2033351264244, -92.8149190887153 45.20460132029917, -92.81473397710002 45.21, -92.8198460035041 45.21, -92.81985864578533 45.21352006541341, -92.81476009958381 45.21350519453624, -92.81473397710002 45.211, -92.79434616877515 45.20979982288059, -92.79434485197183 45.210003888814526, -92.7942994128934 45.217028016258524, -92.79414754531777 45.217027433538036, -92.75950558164095 45.216895251116746, -92.75791266717471 45.216889175072694, -92.75634408090858 45.21737192056616, -92.75539334998972 45.21781096867505, -92.75544275719047 45.219840930849315, -92.75232263931744 45.219847708152834, -92.75345360864661 45.22241622713623, -92.75393100188802 45.22290500013628, -92.75454911801587 45.22425238152991, -92.75465656863904 45.22441872007679, -92.75478824580995 45.22461252606749, -92.75573200183275 45.22594899943625, -92.7559326169467 45.2263989667922, -92.756173357985 45.22677479396459, -92.75628338889855 45.227185737281864, -92.75651400327136 45.22770300256764, -92.75667800355963 45.228069998932774, -92.75745600158125 45.23052599674398, -92.75737071502948 45.23131853178694, -92.75760683805547 45.23212889115611, -92.7575248338702 45.23249816977935, -92.75760900807862 45.233043995948975, -92.75740715667484 45.23498808590038, -92.75739258433605 45.23515457917446, -92.75736004212973 45.235441823970014, -92.75728900664646 45.2361259970008, -92.75750924881613 45.23833187652166, -92.75783421241928 45.239151014730965, -92.75799784052033 45.2401986059374, -92.75814399470411 45.24075700093086, -92.75910499448543 45.24444199845027, -92.75927217262658 45.246363482652335, -92.759708376526 45.24795052230262, -92.76024900009054 45.24960000150479, -92.76026400206055 45.25171699829065, -92.75984499770836 45.25286799832034, -92.75883599655404 45.25442699925451, -92.75592228367496 45.256779108256175, -92.75559993467031 45.25707105760005, -92.75540261715516 45.25725539605134, -92.75458100472993 45.258140999051975, -92.75362100152239 45.25941899619891, -92.75258800661327 45.2617860021943, -92.7523530053651 45.26244399793552, -92.7521330910868 45.26318539548715, -92.75199986320791 45.26381589028983, -92.7519440909167 45.26415703570502, -92.75192391851121 45.26559725594415, -92.75247612752318 45.26746623235666, -92.75254008932185 45.26768063816608, -92.75267394173396 45.268130176728555, -92.75287910082022 45.2688320393691, -92.7530104867237 45.26921012533672, -92.75329204456183 45.26980089141646, -92.75414711285153 45.2712720735891, -92.7552129966957 45.27237299947564, -92.75574299378961 45.27288399662051, -92.75678399520334 45.273891998902435, -92.75750199664172 45.27442999825494, -92.75801999923948 45.274822998224586, -92.75866321741752 45.27578539520815, -92.7589271849383 45.27616491445647, -92.75924599787822 45.27671899844492, -92.75941999802778 45.27718649803985, -92.75960999785612 45.27731999914, -92.75978699565532 45.27743849638546, -92.76004300142414 45.277978995119405, -92.76061199738588 45.27882799808139, -92.76117799722955 45.280582999200305, -92.7613619999475 45.28220800042353, -92.76167096088638 45.2836803717185, -92.76198517744629 45.2850267976271, -92.76206945308458 45.2853507773657, -92.76202745146396 45.286658659028, -92.76204199858486 45.28698499388888, -92.76201199644161 45.28793199672008, -92.76200399722086 45.28821299803955, -92.76121399640145 45.28913599914764, -92.7603870028136 45.28991599406784, -92.75871000510011 45.29096499709372, -92.75799200634881 45.291140996050984, -92.75687800551285 45.29148399845183, -92.75507700319366 45.2919269952758, -92.75480030147037 45.291986779669465, -92.74569331443023 45.29606484000191, -92.74555580404507 45.29614422445335, -92.74523588498667 45.29631411941847, -92.76071968429389 45.29617634034589, -92.79448651640953 45.29587194744184, -92.82553071142016 45.29634288822895, -92.82523623967 45.28697641600944, -92.8246113114385 45.27459391718561, -92.82414631698042 45.26733414102221, -92.83443181636859 45.267466042102846, -92.83450366471794 45.265666722695805, -92.8395297613521 45.26570782145342, -92.83954651660255 45.2675117790906, -92.85488466565545 45.267633226883305, -92.85446380439222 45.260381978642265, -92.8530801070886 45.256940031152055, -92.8746167542768 45.2569553750289, -92.87517983690772 45.26774272327855, -92.88032459143679 45.26775272915376, -92.88028907325248 45.27498539130476, -92.885429695981 45.27499516876503, -92.88541044770409 45.27862274921294, -92.8854460740016 45.28269595676258, -92.8858306795285 45.28583335680999, -92.89095994168375 45.285838365551086, -92.89147668909354 45.290056047991875, -92.89183494474656 45.292995365557246, -92.89287941280966 45.29621886928581, -92.93574219102997 45.296382695230655, -92.9366855829562 45.29639453639271, -92.93730010601949 45.29640233268984, -92.93773633826109 45.296407862218295, -92.95031707870098 45.29656663627082, -92.95732733387652 45.29663267857854, -92.95723233585932 45.305785498930874, -92.95755812361517 45.31807293816823, -92.9575313307762 45.325662873647204, -92.96200814151011 45.32569410734573, -92.96201051236334 45.33056403262943, -92.95763365021791 45.330562956294486, -92.95750484414667 45.34006528297348, -92.95740249422305 45.3523406680097, -92.96272753035339 45.352295608902175, -92.96260253143201 45.363259386181184, -92.95732537061275 45.363286992831206, -92.95715614538045 45.36869421119079, -92.97302216756823 45.36904156334545, -92.9731090974606 45.37554810693529, -92.98760985309234 45.37555619312347, -92.98429494637762 45.38215591061988, -92.9924184629002 45.38233326055907, -93.01850137881846 45.38277378724873, -93.01956464133914 45.41174708503911, -93.03973263863047 45.412106304897264, -93.06569776540464 45.412656360563524, -93.08346874844985 45.41297273973574, -93.09263091377308 45.41335460313747, -93.1012213163472 45.413720365424695, -93.10759754754753 45.41373499082408, -93.14214551761233 45.41373101611429, -93.1421802894172 45.40666589187203, -93.14209155741717 45.38498980813781, -93.14398965535287 45.369981475770224, -93.13861914028635 45.36992203894643, -93.13946982733188 45.35540022959687, -93.14362673736643 45.35542059147377, -93.14338145836778 45.34816201728363, -93.14259222919002 45.34815677471413, -93.14123737100095 45.34271091215897, -93.14120170425102 45.34166175650565, -93.14159640367895 45.340845226624126, -93.16430988689314 45.34107128935172, -93.1641229508536 45.33731028186903, -93.163783504365 45.32713863170596, -93.16354815472778 45.31568179036097, -93.1634974864936 45.3115083559682, -93.16335415000293 45.30838048844207, -93.16326942872365 45.30653168298998, -93.16286993093225 45.29781375116957, -93.16292479029 45.297483756012355, -93.16251838572086 45.29748043583636, -93.16242411934059 45.29340169752503, -93.16237192435095 45.291513658346155, -93.16125915756838 45.29101148729498, -93.16224903398384 45.290456018307964, -93.16243543883762 45.29031474509565, -93.16248365754952 45.29016960982244, -93.1625270557542 45.28932067928762, -93.16350507037129 45.28940282906675, -93.16413761242012 45.28944739938537, -93.16430369461645 45.289411531953206, -93.164472138656 45.28937514511818, -93.16431016328954 45.288334379584406, -93.16422830296436 45.28780835028316, -93.16373011428878 45.287807744950875, -93.16348868413621 45.28778563548775, -93.16304669211718 45.28779811404454, -93.16252493722239 45.28781182501286, -93.1625182014603 45.28601279964026, -93.1625127377889 45.28416325442296, -93.1717122152211 45.28391079701647, -93.17291828928865 45.28387769615237, -93.17292468588315 45.28327561174209, -93.1729215958459 45.28269914269899, -93.17290904354249 45.28216703245599, -93.17290447076888 45.281410092382885, -93.17289432485279 45.28068732375472, -93.17288720912401 45.280265431486754)) +poly2 poly2 POLYGON((-93.26592485308495 45.18931973506328, -93.26373519655886 45.18933815615675, -93.2637828223868 45.18660121752107, -93.26280973893772 45.18656958194617, -93.2603275028686 45.186488876325654, -93.25976682936536 45.18646929139094, -93.25877703935303 45.18686109057519, -93.25788401039608 45.18633824889261, -93.25713811973642 45.186864792015704, -93.25660115549654 45.18628640445176, -93.24081325108644 45.18609354693712, -93.2356823133177 45.1860308697061, -93.23474944979115 45.186019474019865, -93.23478565684188 45.18266103967549, -93.23072066106351 45.18267669158043, -93.22480340476464 45.18267437402639, -93.21952101307244 45.18267371221728, -93.21950131879755 45.184689058075534, -93.21950381582634 45.18590104693386, -93.21950547892035 45.186708829298695, -93.21948324866376 45.18808573281868, -93.21947477056304 45.188619717930756, -93.2194751507154 45.1899146284615, -93.22390334137022 45.18991091026497, -93.2245904557543 45.18993775453468, -93.2245784309098 45.19028702856576, -93.2245932424241 45.19081834295508, -93.22460314163764 45.19137779927979, -93.22459067695124 45.19162607300785, -93.22458367100289 45.19176562022696, -93.22354968949122 45.191760188521705, -93.22131530006368 45.19175468785821, -93.22018302807493 45.19175762419069, -93.21965635944291 45.19175898704962, -93.21824735047468 45.191762639857636, -93.21840068968908 45.191840907619024, -93.21858279007587 45.191950538176606, -93.21874378970492 45.19205449060312, -93.21893581214327 45.192204972059955, -93.21911499957261 45.19238205879934, -93.21934767139433 45.192628269473076, -93.21954522989743 45.1928508489684, -93.21972003978802 45.19304459976245, -93.21997538064213 45.19332124206717, -93.22011354045264 45.193470928079385, -93.22046875034326 45.19384479955501, -93.2206469058326 45.19404172922978, -93.22079845082156 45.194244494502364, -93.2209416400795 45.19447508772328, -93.22107397875365 45.19474417974581, -93.2211368505518 45.19490985928749, -93.22118231976518 45.195047277731625, -93.22124659963487 45.19525315038074, -93.22128314962913 45.195396480693944, -93.22130715028514 45.195564823375, -93.22131862069979 45.195757013030224, -93.22130704484326 45.19599065847414, -93.22127083850016 45.19622942989826, -93.22124456959293 45.19636257994296, -93.22120917947201 45.19651471803614, -93.22115328972328 45.196774039833144, -93.22110053150747 45.19700410181286, -93.22105123806169 45.19721904984113, -93.21939747849284 45.19720754776318, -93.21658707902952 45.19719901749774, -93.21405492494755 45.19718389708806, -93.21060961905127 45.19716332241369, -93.20846870851273 45.19715738191871, -93.20635420918421 45.19714993030806, -93.20384995444252 45.19713947337882, -93.20382099935851 45.195915480832355, -93.20379040854755 45.195493880093856, -93.20373937951182 45.19525460196455, -93.20366799901262 45.194730001052676, -93.20359944927 45.194273469702246, -93.20351980946141 45.19386975065817, -93.20336890147132 45.1933312322322, -93.20348773988103 45.19317805926476, -93.20364964522179 45.19294381603321, -93.20373782170354 45.192758795441485, -93.20378634041538 45.1925589245846, -93.20378780054193 45.1924118820702, -93.20373224993294 45.192246366644895, -93.20366678053941 45.192063182244134, -93.20349712021084 45.19164111034226, -93.20336402335359 45.191262445660406, -93.20333661484061 45.19107258136713, -93.20334012614478 45.19082850506992, -93.20338500114326 45.190584969374704, -93.20346313590359 45.19035226093307, -93.20353125074365 45.19015096025676, -93.20337886118753 45.19012069933683, -93.20280004152556 45.18999823901699, -93.20236430223584 45.1898748712581, -93.20223796285948 45.18983446401002, -93.20171338128353 45.189666689690526, -93.20105175026708 45.18940210042135, -93.20059509118217 45.18937347081525, -93.20014399997638 45.18935951962055, -93.1999096512546 45.18934032171285, -93.19969162075753 45.18934030912719, -93.19953079227915 45.18938062079311, -93.19930724128803 45.189471810355066, -93.19836742091539 45.18954495845859, -93.19790904174889 45.189755310346555, -93.19770094626355 45.18978905045578, -93.19728573057267 45.1898563687543, -93.19706717806918 45.18978234280038, -93.1961191012612 45.18980511056629, -93.19583707702907 45.18977039110604, -93.19495714548943 45.18966207098092, -93.19409949054268 45.18955648989894, -93.19361391124465 45.18954758129998, -93.19142135137997 45.189507349701145, -93.18867729058191 45.18943758222878, -93.18766468614145 45.18941183701645, -93.1869063815807 45.18939255950494, -93.18676117212036 45.18939312363656, -93.18583601993124 45.18939673056086, -93.18362870083628 45.18940533739182, -93.18015920861117 45.189432919714875, -93.17748344774633 45.18940274982507, -93.17100678798263 45.18934067185518, -93.1680509570817 45.18931686702863, -93.16712265967519 45.189309389152754, -93.1632729184803 45.189289560128074, -93.1524420382428 45.189137301470666, -93.1488330300988 45.189087681208825, -93.14258337454692 45.18900953614207, -93.1425728385595 45.18964797148711, -93.14257129908563 45.19044710129245, -93.14256839076815 45.191380659844974, -93.14257549009486 45.192639988690985, -93.14256591028126 45.193624481846925, -93.1425562203409 45.19475816134898, -93.14254671019609 45.19564806883362, -93.14253591314012 45.19592629600891, -93.1425191002932 45.19635953895129, -93.14240307328147 45.20366956427245, -93.14239731024965 45.20842345007226, -93.14246141142196 45.2144183909345, -93.14198170032972 45.23994442974387, -93.14597353942523 45.240012030562795, -93.14892151981124 45.24004509174428, -93.15151768504401 45.24003827478177, -93.15470331907811 45.2400299112851, -93.15641781022819 45.240063720104146, -93.15847245794774 45.24007548756677, -93.16253551804624 45.24018481776239, -93.16296586932476 45.24019639699945, -93.16296093749654 45.240539543608094, -93.16295567833508 45.24077659970959, -93.16294559992268 45.24173110984731, -93.16294931429802 45.242313107885224, -93.16296371061823 45.24456989801016, -93.16297766989932 45.24481356907269, -93.16296319587042 45.24504668430867, -93.16296267909655 45.24536222031531, -93.16296756070733 45.24580785775435, -93.16295754084666 45.24621538734816, -93.16294315030365 45.24641950970948, -93.1629421699368 45.246633444731216, -93.16298326866249 45.24685970478054, -93.16302280494743 45.24701074802872, -93.1630346343297 45.247139320093076, -93.16303000914128 45.24727503858908, -93.16301519072017 45.24747862874394, -93.16301622062082 45.247670019373224, -93.16301002844395 45.24802483903903, -93.16296010836595 45.248250609285236, -93.16288288941641 45.248626979189, -93.16286421036493 45.24918153632857, -93.16282236866641 45.25042383853131, -93.16275001793326 45.25104184745623, -93.16265874011768 45.251876269431015, -93.1626048141941 45.25465385517585, -93.162568780952 45.25650987775294, -93.16253090903855 45.25861572819838, -93.16247264162719 45.261955487720506, -93.16246809047925 45.26571735738526, -93.16241263022145 45.267693939529536, -93.16242125944353 45.26894469986081, -93.16242684956876 45.269277499432015, -93.16247902269161 45.26952793567272, -93.16254025984375 45.269768259020054, -93.1625248689828 45.26996728874923, -93.16248176954191 45.27027930739088, -93.16247917649272 45.270869996810376, -93.16247840915516 45.27104490906511, -93.16247877426206 45.27127651283899, -93.162479560911 45.27177208702322, -93.16249869026827 45.272467959171365, -93.16249147172434 45.27291248854739, -93.16247688682598 45.27318968296259, -93.16246680083795 45.27338141702519, -93.1624671298516 45.27470105775956, -93.16246893968787 45.276279379505084, -93.1624737063593 45.2764111771935, -93.16249244905424 45.276929488819604, -93.16363037995181 45.27688204948932, -93.16581262202895 45.276844043452684, -93.16722651010657 45.27681941864911, -93.17218124072862 45.27674700948904, -93.1722224784459 45.27757776899891, -93.17229737034532 45.279497570305445, -93.17232269933695 45.28027246109518, -93.17288721010608 45.28026543129147, -93.1728943187817 45.2806873180744, -93.17290447218495 45.28141008817547, -93.17290904002667 45.28216703008146, -93.17292159084371 45.28269913830247, -93.17292468118433 45.283275608616165, -93.17291828224536 45.28387769767021, -93.1717122127579 45.283910797244246, -93.16251273143365 45.28416325629099, -93.16251820094257 45.28601279797615, -93.16252493935717 45.287811833132764, -93.16304669905364 45.28779811692505, -93.16348868871324 45.28778563925035, -93.16373011962693 45.28780774767522, -93.16422830587629 45.28780835110865, -93.1643101699488 45.28833437868018, -93.16447213914093 45.289375147768524, -93.16430369361024 45.28941153310711, -93.16413761723706 45.28944740219967, -93.16350507286433 45.289402832527344, -93.16252705964098 45.289320683284735, -93.16248365939401 45.29016961156254, -93.16243543831087 45.29031475002342, -93.16224903970826 45.2904560215217, -93.16125915934788 45.29101149209126, -93.16237192796683 45.291513661220456, -93.16242412151107 45.29340170072084, -93.16251838980172 45.29748044313293, -93.16292479370829 45.29748376064082, -93.18639094534673 45.29767533425263, -93.18833342032521 45.29769119188229, -93.1925428426471 45.29770437859642, -93.19474753040078 45.29771128804242, -93.19765740975974 45.29769541872667, -93.20297591868295 45.29776263827187, -93.20683144906876 45.29774197003572, -93.20883497923562 45.297766559466794, -93.21546742887979 45.297768422222155, -93.22617724980643 45.29791971794424, -93.23408017640227 45.298023690859175, -93.2343080073169 45.288444186545625, -93.23432525195352 45.287995322205425, -93.23469515647318 45.269279712377234, -93.23475627635968 45.266203358381446, -93.23560542207227 45.26619551047824, -93.23899176558338 45.26613779367068, -93.24250527367546 45.26608234822973, -93.243445378056 45.26606503829342, -93.24512861083372 45.2660344570852, -93.24588057830995 45.26602026067889, -93.24713274287363 45.26599455787498, -93.25036838013868 45.26592734514467, -93.25172461510564 45.265900698298395, -93.25236738024864 45.265888260809106, -93.25481754173921 45.26583307838667, -93.25571357952906 45.265819559899164, -93.2594981489083 45.26575415212897, -93.26098138766197 45.265754375486374, -93.26155216698102 45.26565612540643, -93.26170097145753 45.26562288963898, -93.26208574477789 45.26553876835043, -93.26245875524685 45.265434673708015, -93.26277275191426 45.265316250819595, -93.26311663127117 45.26517251314189, -93.26346212923646 45.26500240317637, -93.26393572774133 45.26477558787491, -93.2651820516718 45.26406759657772, -93.26518110226205 45.26337226279194, -93.26515218908767 45.26311636791454, -93.26518703008779 45.262871689663605, -93.2652064900752 45.26265582104258, -93.2652110298225 45.26215614194132, -93.26522443086994 45.26112430402238, -93.26522989950563 45.260703199933474, -93.26524872191168 45.25930812973533, -93.26525187087448 45.258897852775995, -93.26525857049303 45.258025812056765, -93.26527734826267 45.256675072153314, -93.26528081766433 45.25612813038996, -93.265287399575 45.25512698071874, -93.26530031054412 45.253711671615115, -93.26531490547187 45.25273002640574, -93.26532214123614 45.252243491267, -93.26533817105908 45.25062180123498, -93.26535413994274 45.24906421173263, -93.26536141910549 45.24841165046578, -93.26536638602661 45.24796649509243, -93.26537318826473 45.24735637067748, -93.26539798003012 45.24589779189643, -93.265404909549 45.24454674190931, -93.2654060939449 45.24296904311022, -93.26540624905046 45.24276127146885, -93.26540843815205 45.2420263885843, -93.26541275006169 45.240577352345994, -93.2654375717671 45.238843301612725, -93.26544518264211 45.237906888690105, -93.26544940933664 45.23738688110566, -93.26546966016808 45.236093591927926, -93.2654781584622 45.235359229961944, -93.26548338867605 45.23490715107922, -93.26553582901259 45.23354268990693, -93.26554071996831 45.23330119833777, -93.26555987026248 45.2323552839169, -93.26557251955711 45.23173040973764, -93.26556626032777 45.22975235185782, -93.26556606661761 45.229367333607186, -93.26556579189545 45.228823722705066, -93.26562882232702 45.226872206176665, -93.26571073971922 45.224335971082276, -93.26574560622672 45.22192222321787, -93.26574836877063 45.22173093256304, -93.26577033227747 45.22021043432355, -93.26578588443306 45.21913391123174, -93.26580662128347 45.21769799745153, -93.26580983179628 45.217475736026664, -93.26581322607608 45.217240685631346, -93.26590715360736 45.210737684073244, -93.26591966090616 45.209871711997586, -93.2659016992406 45.20722015227932, -93.26587484243684 45.203254836571126, -93.26585637174348 45.20052765082941, -93.26585684827346 45.19841676076085, -93.26587786763154 45.19732741144391, -93.2658624676632 45.1970879109074, -93.2659274100303 45.194004979577755, -93.26595017983325 45.191531890895845, -93.26595423366354 45.19092534610275, -93.26593099287571 45.190637988686554, -93.2659274057232 45.18986823069059, -93.26592485308495 45.18931973506328)) \ No newline at end of file diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java b/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java index 28459025157..ba8d53c8bf3 100644 --- a/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java @@ -34,6 +34,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.spatial.query.SpatialArgsParser; import org.junit.Assert; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -109,8 +110,11 @@ public abstract class StrategyTestCase extends SpatialTestCase { } protected Iterator getSampleData(String testDataFile) throws IOException { - return new SampleDataReader( - getClass().getClassLoader().getResourceAsStream("data/"+testDataFile) ); + String path = "data/" + testDataFile; + InputStream stream = getClass().getClassLoader().getResourceAsStream(path); + if (stream == null) + throw new FileNotFoundException("classpath resource not found: "+path); + return new SampleDataReader(stream); } protected Iterator getTestQueries(String testQueryFile, SpatialContext ctx) throws IOException { diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java new file mode 100644 index 00000000000..13ae6544e77 --- /dev/null +++ b/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/JtsPolygonTest.java @@ -0,0 +1,73 @@ +package org.apache.lucene.spatial.prefix; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.spatial4j.core.context.SpatialContextFactory; +import com.spatial4j.core.shape.Shape; +import org.apache.lucene.spatial.StrategyTestCase; +import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; +import org.junit.Test; + +import java.io.IOException; +import java.util.HashMap; + +public class JtsPolygonTest extends StrategyTestCase { + + private static final double LUCENE_4464_distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;//DEFAULT 2.5% + + public JtsPolygonTest() { + try { + HashMap args = new HashMap(); + args.put("spatialContextFactory", + "com.spatial4j.core.context.jts.JtsSpatialContextFactory"); + ctx = SpatialContextFactory.makeSpatialContext(args, getClass().getClassLoader()); + } catch (NoClassDefFoundError e) { + assumeTrue("This test requires JTS jar: "+e, false); + } + + GeohashPrefixTree grid = new GeohashPrefixTree(ctx, 11);//< 1 meter == 11 maxLevels + this.strategy = new RecursivePrefixTreeStrategy(grid, getClass().getSimpleName()); + ((RecursivePrefixTreeStrategy)this.strategy).setDistErrPct(LUCENE_4464_distErrPct);//1% radius (small!) + } + + @Test + /** LUCENE-4464 */ + public void testCloseButNoMatch() throws IOException { + getAddAndVerifyIndexedDocuments("LUCENE-4464.txt"); + SpatialArgs args = q( + "POLYGON((-93.18100824442227 45.25676372469945," + + "-93.23182001200654 45.21421290799412," + + "-93.16315546122038 45.23742639412364," + + "-93.18100824442227 45.25676372469945))", + LUCENE_4464_distErrPct); + SearchResults got = executeQuery(strategy.makeQuery(args), 100); + assertEquals(1, got.numFound); + assertEquals("poly2", got.results.get(0).document.get("id")); + //did not find poly 1 ! + } + + private SpatialArgs q(String shapeStr, double distErrPct) { + Shape shape = ctx.readShape(shapeStr); + SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects, shape); + args.setDistErrPct(distErrPct); + return args; + } + +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java index 49a20b09fff..7d775e3613f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java @@ -20,10 +20,10 @@ package org.apache.lucene.codecs.asserting; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.TermVectorsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; /** - * Acts like {@link Lucene40Codec} but with additional asserts. + * Acts like {@link Lucene41Codec} but with additional asserts. */ public final class AssertingCodec extends FilterCodec { @@ -31,7 +31,7 @@ public final class AssertingCodec extends FilterCodec { private final TermVectorsFormat vectors = new AssertingTermVectorsFormat(); public AssertingCodec() { - super("Asserting", new Lucene40Codec()); + super("Asserting", new Lucene41Codec()); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java index 775f972a1eb..94b88117e23 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java @@ -27,7 +27,7 @@ import org.apache.lucene.codecs.PostingsConsumer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.TermStats; import org.apache.lucene.codecs.TermsConsumer; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.index.AssertingAtomicReader; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; @@ -38,10 +38,10 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.OpenBitSet; /** - * Just like {@link Lucene40PostingsFormat} but with additional asserts. + * Just like {@link Lucene41PostingsFormat} but with additional asserts. */ public final class AssertingPostingsFormat extends PostingsFormat { - private final PostingsFormat in = new Lucene40PostingsFormat(); + private final PostingsFormat in = new Lucene41PostingsFormat(); public AssertingPostingsFormat() { super("Asserting"); diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene40Postings.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java similarity index 87% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene40Postings.java rename to lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java index 50c5a98bdfa..d5229f6f496 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene40Postings.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene41Postings.java @@ -22,19 +22,19 @@ import java.io.IOException; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; /** * A class used for testing {@link BloomFilteringPostingsFormat} with a concrete - * delegate (Lucene40). Creates a Bloom filter on ALL fields and with tiny + * delegate (Lucene41). Creates a Bloom filter on ALL fields and with tiny * amounts of memory reserved for the filter. DO NOT USE IN A PRODUCTION * APPLICATION This is not a realistic application of Bloom Filters as they * ordinarily are larger and operate on only primary key type fields. */ -public final class TestBloomFilteredLucene40Postings extends PostingsFormat { +public final class TestBloomFilteredLucene41Postings extends PostingsFormat { private BloomFilteringPostingsFormat delegate; @@ -54,9 +54,9 @@ public final class TestBloomFilteredLucene40Postings extends PostingsFormat { } } - public TestBloomFilteredLucene40Postings() { - super("TestBloomFilteredLucene40Postings"); - delegate = new BloomFilteringPostingsFormat(new Lucene40PostingsFormat(), + public TestBloomFilteredLucene41Postings() { + super("TestBloomFilteredLucene41Postings"); + delegate = new BloomFilteringPostingsFormat(new Lucene41PostingsFormat(), new LowMemoryBloomFactory()); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java index 91f6055b79f..904fedf0f9b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java @@ -21,14 +21,14 @@ import java.util.Random; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.StoredFieldsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import com.carrotsearch.randomizedtesting.generators.RandomInts; import com.carrotsearch.randomizedtesting.generators.RandomPicks; /** * A codec that uses {@link CompressingStoredFieldsFormat} for its stored - * fields and delegates to {@link Lucene40Codec} for everything else. + * fields and delegates to {@link Lucene41Codec} for everything else. */ public class CompressingCodec extends FilterCodec { @@ -49,7 +49,7 @@ public class CompressingCodec extends FilterCodec { */ public CompressingCodec(CompressionMode compressionMode, int chunkSize, CompressingStoredFieldsIndex storedFieldsIndexFormat) { - super("Compressing", new Lucene40Codec()); + super("Compressing", new Lucene41Codec()); this.storedFieldsFormat = new CompressingStoredFieldsFormat(compressionMode, chunkSize, storedFieldsIndexFormat); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java similarity index 95% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java rename to lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java index 44b953bd780..65d10b2a385 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java @@ -45,16 +45,6 @@ import org.apache.lucene.util.IOUtils; * @lucene.experimental */ public final class Lucene40PostingsWriter extends PostingsWriterBase { - final static String TERMS_CODEC = "Lucene40PostingsWriterTerms"; - final static String FRQ_CODEC = "Lucene40PostingsWriterFrq"; - final static String PRX_CODEC = "Lucene40PostingsWriterPrx"; - - //private static boolean DEBUG = BlockTreeTermsWriter.DEBUG; - - // Increment version to change it: - final static int VERSION_START = 0; - final static int VERSION_LONG_SKIP = 1; - final static int VERSION_CURRENT = VERSION_LONG_SKIP; final IndexOutput freqOut; final IndexOutput proxOut; @@ -111,7 +101,7 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase { boolean success = false; IndexOutput proxOut = null; try { - CodecUtil.writeHeader(freqOut, FRQ_CODEC, VERSION_CURRENT); + CodecUtil.writeHeader(freqOut, Lucene40PostingsReader.FRQ_CODEC, Lucene40PostingsReader.VERSION_CURRENT); // TODO: this is a best effort, if one of these fields has no postings // then we make an empty prx file, same as if we are wrapped in // per-field postingsformat. maybe... we shouldn't @@ -121,7 +111,7 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase { // prox file fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION); proxOut = state.directory.createOutput(fileName, state.context); - CodecUtil.writeHeader(proxOut, PRX_CODEC, VERSION_CURRENT); + CodecUtil.writeHeader(proxOut, Lucene40PostingsReader.PRX_CODEC, Lucene40PostingsReader.VERSION_CURRENT); } else { // Every field omits TF so we will write no prox file proxOut = null; @@ -146,7 +136,7 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase { @Override public void start(IndexOutput termsOut) throws IOException { this.termsOut = termsOut; - CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT); + CodecUtil.writeHeader(termsOut, Lucene40PostingsReader.TERMS_CODEC, Lucene40PostingsReader.VERSION_CURRENT); termsOut.writeInt(skipInterval); // write skipInterval termsOut.writeInt(maxSkipLevels); // write maxSkipLevels termsOut.writeInt(skipMinimum); // write skipMinimum diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java new file mode 100644 index 00000000000..f749216bf38 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java @@ -0,0 +1,50 @@ +package org.apache.lucene.codecs.lucene40; + +import java.io.IOException; + +import org.apache.lucene.codecs.BlockTreeTermsWriter; +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.PostingsWriterBase; +import org.apache.lucene.index.SegmentWriteState; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Read-write version of {@link Lucene40PostingsFormat} for testing. + */ +public class Lucene40RWPostingsFormat extends Lucene40PostingsFormat { + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + PostingsWriterBase docs = new Lucene40PostingsWriter(state); + + // TODO: should we make the terms index more easily + // pluggable? Ie so that this codec would record which + // index impl was used, and switch on loading? + // Or... you must make a new Codec for this? + boolean success = false; + try { + FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize); + success = true; + return ret; + } finally { + if (!success) { + docs.close(); + } + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java similarity index 99% rename from lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java rename to lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java index 34cdac1dcac..62bd3047878 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java @@ -29,8 +29,9 @@ import org.apache.lucene.codecs.MultiLevelSkipListWriter; * that stores positions and payloads. * * @see Lucene40PostingsFormat - * @lucene.experimental + * @deprecated Only for reading old 4.0 segments */ +@Deprecated public class Lucene40SkipListWriter extends MultiLevelSkipListWriter { private int[] lastSkipDoc; private int[] lastSkipPayloadLength; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/block/package.html b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html similarity index 91% rename from lucene/codecs/src/java/org/apache/lucene/codecs/block/package.html rename to lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html index c4fe9c6a8ea..c83302cf5b7 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/block/package.html +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html @@ -20,6 +20,6 @@ -BlockPostingsFormat file format. +Support for testing {@link org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat}. diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java similarity index 89% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java rename to lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java index 42f0d853688..8865136be1b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/Lucene41WithOrds.java @@ -1,4 +1,4 @@ -package org.apache.lucene.codecs.lucene40ords; +package org.apache.lucene.codecs.lucene41ords; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -30,9 +30,9 @@ import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader; import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter; import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase; import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; // javadocs -import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; // javadocs +import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.BytesRef; @@ -41,18 +41,18 @@ import org.apache.lucene.util.BytesRef; // any PostingsBaseFormat and make it ord-able... /** - * Customized version of {@link Lucene40Codec} that uses + * Customized version of {@link Lucene41Codec} that uses * {@link FixedGapTermsIndexWriter}. */ -public final class Lucene40WithOrds extends PostingsFormat { +public final class Lucene41WithOrds extends PostingsFormat { - public Lucene40WithOrds() { - super("Lucene40WithOrds"); + public Lucene41WithOrds() { + super("Lucene41WithOrds"); } @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase docs = new Lucene40PostingsWriter(state); + PostingsWriterBase docs = new Lucene41PostingsWriter(state); // TODO: should we make the terms index more easily // pluggable? Ie so that this codec would record which @@ -91,7 +91,7 @@ public final class Lucene40WithOrds extends PostingsFormat { @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - PostingsReaderBase postings = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); + PostingsReaderBase postings = new Lucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); TermsIndexReaderBase indexReader; boolean success = false; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/package.html b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/package.html similarity index 100% rename from lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/package.html rename to lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41ords/package.html diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java index c44f05bcc38..55958b14970 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java @@ -38,8 +38,8 @@ import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase; import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase; import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader; import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter; import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat; import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat; import org.apache.lucene.codecs.mocksep.MockSingleIntFactory; @@ -174,7 +174,8 @@ public final class MockRandomPostingsFormat extends PostingsFormat { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing Standard postings"); } - postingsWriter = new Lucene40PostingsWriter(state, skipInterval); + // TODO: randomize variables like acceptibleOverHead?! + postingsWriter = new Lucene41PostingsWriter(state, skipInterval); } if (random.nextBoolean()) { @@ -313,7 +314,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Standard postings"); } - postingsReader = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); + postingsReader = new Lucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); } if (random.nextBoolean()) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java index c44f3ef5db7..31f897e39f0 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java @@ -26,8 +26,8 @@ import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsReader; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter; import org.apache.lucene.codecs.pulsing.PulsingPostingsReader; import org.apache.lucene.codecs.pulsing.PulsingPostingsWriter; import org.apache.lucene.index.SegmentReadState; @@ -35,7 +35,7 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.IOUtils; /** - * Pulsing(1, Pulsing(2, Lucene40)) + * Pulsing(1, Pulsing(2, Lucene41)) * @lucene.experimental */ // TODO: if we create PulsingPostingsBaseFormat then we @@ -55,7 +55,7 @@ public final class NestedPulsingPostingsFormat extends PostingsFormat { // Terms dict boolean success = false; try { - docsWriter = new Lucene40PostingsWriter(state); + docsWriter = new Lucene41PostingsWriter(state); pulsingWriterInner = new PulsingPostingsWriter(2, docsWriter); pulsingWriter = new PulsingPostingsWriter(1, pulsingWriterInner); @@ -77,7 +77,7 @@ public final class NestedPulsingPostingsFormat extends PostingsFormat { PostingsReaderBase pulsingReader = null; boolean success = false; try { - docsReader = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); + docsReader = new Lucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix); pulsingReaderInner = new PulsingPostingsReader(docsReader); pulsingReader = new PulsingPostingsReader(pulsingReaderInner); FieldsProducer ret = new BlockTreeTermsReader( diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java index b8676d7dafc..61de20efe1f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java @@ -29,11 +29,10 @@ import java.util.Set; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.asserting.AssertingPostingsFormat; -import org.apache.lucene.codecs.block.BlockPostingsFormat; -import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene40Postings; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; -import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; -import org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; +import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; +import org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds; +import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings; import org.apache.lucene.codecs.memory.DirectPostingsFormat; import org.apache.lucene.codecs.memory.MemoryPostingsFormat; import org.apache.lucene.codecs.mockintblock.MockFixedIntBlockPostingsFormat; @@ -41,7 +40,7 @@ import org.apache.lucene.codecs.mockintblock.MockVariableIntBlockPostingsFormat; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; import org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat; -import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat; +import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -55,7 +54,7 @@ import org.apache.lucene.util._TestUtil; * documents in different orders and the test will still be deterministic * and reproducable. */ -public class RandomCodec extends Lucene40Codec { +public class RandomCodec extends Lucene41Codec { /** Shuffled list of postings formats to use for new mappings */ private List formats = new ArrayList(); @@ -94,23 +93,22 @@ public class RandomCodec extends Lucene40Codec { int lowFreqCutoff = _TestUtil.nextInt(random, 2, 100); add(avoidCodecs, - new Lucene40PostingsFormat(minItemsPerBlock, maxItemsPerBlock), - new BlockPostingsFormat(minItemsPerBlock, maxItemsPerBlock), + new Lucene41PostingsFormat(minItemsPerBlock, maxItemsPerBlock), new DirectPostingsFormat(LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : maxItemsPerBlock), LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : lowFreqCutoff)), - new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock), + new Pulsing41PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock), // add pulsing again with (usually) different parameters - new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock), - //TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene40Postings to be constructed + new Pulsing41PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock), + //TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene41Postings to be constructed //with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing //with such "wrapper" classes? - new TestBloomFilteredLucene40Postings(), + new TestBloomFilteredLucene41Postings(), new MockSepPostingsFormat(), new MockFixedIntBlockPostingsFormat(_TestUtil.nextInt(random, 1, 2000)), new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)), new MockRandomPostingsFormat(random), new NestedPulsingPostingsFormat(), - new Lucene40WithOrds(), + new Lucene41WithOrds(), new SimpleTextPostingsFormat(), new AssertingPostingsFormat(), new MemoryPostingsFormat(true, random.nextFloat()), diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java index b432416c15b..e87720d59f5 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java @@ -32,6 +32,8 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.asserting.AssertingCodec; import org.apache.lucene.codecs.compressing.CompressingCodec; import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.index.RandomCodec; @@ -129,26 +131,23 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule { avoidCodecs.addAll(Arrays.asList(a.value())); } - PREFLEX_IMPERSONATION_IS_ACTIVE = false; savedCodec = Codec.getDefault(); int randomVal = random.nextInt(10); - /* note: re-enable this if we make a 4.x impersonator - if ("Lucene3x".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && + if ("Lucene40".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && "random".equals(TEST_POSTINGSFORMAT) && randomVal < 2 && - !shouldAvoidCodec("Lucene3x"))) { // preflex-only setup - codec = Codec.forName("Lucene3x"); - assert (codec instanceof PreFlexRWCodec) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; - PREFLEX_IMPERSONATION_IS_ACTIVE = true; - } else */ if (!"random".equals(TEST_POSTINGSFORMAT)) { + !shouldAvoidCodec("Lucene40"))) { + codec = Codec.forName("Lucene40"); + assert (PostingsFormat.forName("Lucene40") instanceof Lucene40RWPostingsFormat) : "fix your classpath to have tests-framework.jar before lucene-core.jar"; + } else if (!"random".equals(TEST_POSTINGSFORMAT)) { final PostingsFormat format; if ("MockRandom".equals(TEST_POSTINGSFORMAT)) { format = new MockRandomPostingsFormat(random); } else { format = PostingsFormat.forName(TEST_POSTINGSFORMAT); } - codec = new Lucene40Codec() { + codec = new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return format; diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java index 8e46b10f385..d2760ae6151 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java @@ -44,7 +44,7 @@ import java.util.zip.ZipFile; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40Codec; +import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; import org.apache.lucene.document.ByteDocValuesField; import org.apache.lucene.document.DerefBytesDocValuesField; @@ -651,7 +651,7 @@ public class _TestUtil { if (LuceneTestCase.VERBOSE) { System.out.println("forcing postings format to:" + format); } - return new Lucene40Codec() { + return new Lucene41Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return format; diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 4c82a0146bb..3b7b3836da4 100644 --- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -19,7 +19,7 @@ org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat org.apache.lucene.codecs.mocksep.MockSepPostingsFormat org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat -org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds -org.apache.lucene.codecs.bloom.TestBloomFilteredLucene40Postings +org.apache.lucene.codecs.lucene41ords.Lucene41WithOrds +org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings org.apache.lucene.codecs.asserting.AssertingPostingsFormat - +org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index cb4852e907f..389dc773c8e 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -42,9 +42,7 @@ New Features values of a multiValued field in their original order when highlighting. (Joel Bernstein via yonik) -* SOLR-3929 -support configuring IndexWriter max thread count in solrconfig - +* SOLR-3929: Support configuring IndexWriter max thread count in solrconfig. (phunt via Mark Miller) Optimizations @@ -62,6 +60,9 @@ Optimizations * SOLR-3734: Improve Schema-Browser Handling for CopyField using dynamicField's (steffkes) + +* SOLR-3941: The "commitOnLeader" part of distributed recovery can use + openSearcher=false. (Tomas Fernandez Lobbe via Mark Miller) Bug Fixes ---------------------- @@ -73,6 +74,15 @@ Bug Fixes * SOLR-3917: Partial State on Schema-Browser UI is not defined for Dynamic Fields & Types (steffkes) + +* SOLR-3939: Consider a sync attempt from leader to replica that fails due + to 404 a success. (Mark Miller, Joel Bernstein) + +* SOLR-3940: Rejoining the leader election incorrectly triggers the code path + for a fresh cluster start rather than fail over. (Mark Miller) + +* SOLR-3961: Fixed error using LimitTokenCountFilterFactory + (Jack Krupansky, hossman) Other Changes ---------------------- diff --git a/solr/SYSTEM_REQUIREMENTS.txt b/solr/SYSTEM_REQUIREMENTS.txt new file mode 100644 index 00000000000..9b38cbf24f6 --- /dev/null +++ b/solr/SYSTEM_REQUIREMENTS.txt @@ -0,0 +1,16 @@ +# System Requirements + +Apache Solr runs of Java 6 or greater. When using Java 7, be sure to +install at least Update 1! With all Java versions it is strongly +recommended to not use experimental `-XX` JVM options. It is also +recommended to always use the latest update version of your Java VM, +because bugs may affect Solr. An overview of known JVM bugs can be +found on http://wiki.apache.org/lucene-java/SunJavaBugs. + +CPU, disk and memory requirements are based on the many choices made in +implementing Solr (document size, number of documents, and number of +hits retrieved to name a few). The benchmarks page has some information +related to performance on particular platforms. + +*To build Apache Solr from source, refer to the `BUILD.txt` file in +the distribution directory.* diff --git a/solr/build.xml b/solr/build.xml index f74b3726c35..b873957d62a 100644 --- a/solr/build.xml +++ b/solr/build.xml @@ -144,7 +144,7 @@ depends="javadocs,changes-to-html,process-webpages"/> - + @@ -163,12 +163,10 @@ - @@ -439,8 +437,8 @@ + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml index e08ab8deb99..e28cec73722 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml @@ -17,9 +17,9 @@ --> - + - + diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java index dcf3963f442..ab55358fe61 100644 --- a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java @@ -49,6 +49,7 @@ import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; import org.apache.solr.client.solrj.request.CoreAdminRequest; import org.apache.solr.client.solrj.request.CoreAdminRequest.Create; +import org.apache.solr.client.solrj.request.CoreAdminRequest.Unload; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.response.CoreAdminResponse; import org.apache.solr.client.solrj.response.QueryResponse; @@ -742,10 +743,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { 0, ((HttpSolrServer) client).getBaseURL().length() - DEFAULT_COLLECTION.length() - 1); - createCollection(oneInstanceCollection2, collectionClients, baseUrl, 1, "slice1"); - createCollection(oneInstanceCollection2, collectionClients, baseUrl, 2, "slice2"); - createCollection(oneInstanceCollection2, collectionClients, baseUrl, 3, "slice2"); - createCollection(oneInstanceCollection2, collectionClients, baseUrl, 4, "slice1"); + createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 1, "slice1"); + createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 2, "slice2"); + createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 3, "slice2"); + createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 4, "slice1"); while (pending != null && pending.size() > 0) { @@ -764,7 +765,7 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { assertAllActive(oneInstanceCollection2, solrj.getZkStateReader()); - printLayout(); + //printLayout(); // TODO: enable when we don't falsely get slice1... // solrj.getZkStateReader().getLeaderUrl(oneInstanceCollection2, "slice1", 30000); @@ -803,6 +804,27 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { assertNotNull(slices); String roles = slices.get("slice1").getReplicasMap().values().iterator().next().getStr(ZkStateReader.ROLES_PROP); assertEquals("none", roles); + + + ZkCoreNodeProps props = new ZkCoreNodeProps(solrj.getZkStateReader().getClusterState().getLeader(oneInstanceCollection2, "slice1")); + + // now test that unloading a core gets us a new leader + HttpSolrServer server = new HttpSolrServer(baseUrl); + Unload unloadCmd = new Unload(true); + unloadCmd.setCoreName(props.getCoreName()); + + String leader = props.getCoreUrl(); + + server.request(unloadCmd); + + int tries = 50; + while (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "slice1", 10000))) { + Thread.sleep(100); + if (tries-- == 0) { + fail("Leader never changed"); + } + } + } private void testSearchByCollectionName() throws SolrServerException { @@ -875,10 +897,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { private void createCollection(String collection, List collectionClients, String baseUrl, int num) { - createCollection(collection, collectionClients, baseUrl, num, null); + createSolrCore(collection, collectionClients, baseUrl, num, null); } - private void createCollection(final String collection, + private void createSolrCore(final String collection, List collectionClients, final String baseUrl, final int num, final String shardId) { Callable call = new Callable() { diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java index 1b12c659586..a4f69fcbfce 100644 --- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java @@ -40,7 +40,6 @@ import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NoNodeException; import org.junit.AfterClass; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; @Slow @@ -114,7 +113,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 { elector, "shard1", "collection1", Integer.toString(nodeNumber), props, zkStateReader); elector.setup(context); - seq = elector.joinElection(context); + seq = elector.joinElection(context, false); electionDone = true; seqToThread.put(seq, this); } @@ -175,7 +174,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 { ElectionContext context = new ShardLeaderElectionContextBase(elector, "shard2", "collection1", "dummynode1", props, zkStateReader); elector.setup(context); - elector.joinElection(context); + elector.joinElection(context, false); assertEquals("http://127.0.0.1/solr/", getLeaderUrl("collection1", "shard2")); } @@ -188,7 +187,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 { ElectionContext firstContext = new ShardLeaderElectionContextBase(first, "slice1", "collection2", "dummynode1", props, zkStateReader); first.setup(firstContext); - first.joinElection(firstContext); + first.joinElection(firstContext, false); Thread.sleep(1000); assertEquals("original leader was not registered", "http://127.0.0.1/solr/1/", getLeaderUrl("collection2", "slice1")); @@ -199,7 +198,7 @@ public class LeaderElectionTest extends SolrTestCaseJ4 { ElectionContext context = new ShardLeaderElectionContextBase(second, "slice1", "collection2", "dummynode1", props, zkStateReader); second.setup(context); - second.joinElection(context); + second.joinElection(context, false); Thread.sleep(1000); assertEquals("original leader should have stayed leader", "http://127.0.0.1/solr/1/", getLeaderUrl("collection2", "slice1")); firstContext.cancelElection(); diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java index 6520c6bd42b..59071c7d951 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java @@ -139,7 +139,7 @@ public class OverseerTest extends SolrTestCaseJ4 { ShardLeaderElectionContextBase ctx = new ShardLeaderElectionContextBase( elector, shardId, collection, nodeName + "_" + coreName, props, zkStateReader); - elector.joinElection(ctx); + elector.joinElection(ctx, false); return shardId; } Thread.sleep(500); @@ -876,7 +876,7 @@ public class OverseerTest extends SolrTestCaseJ4 { new HttpShardHandlerFactory().getShardHandler(), "/admin/cores", reader); ElectionContext ec = new OverseerElectionContext(zkClient, overseer, address.replaceAll("/", "_")); overseerElector.setup(ec); - overseerElector.joinElection(ec); + overseerElector.joinElection(ec, false); return zkClient; } diff --git a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java index 3bf7e713eea..a49fbf98397 100644 --- a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java +++ b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java @@ -37,14 +37,14 @@ public class TestCodecSupport extends SolrTestCaseJ4 { Map fields = h.getCore().getSchema().getFields(); SchemaField schemaField = fields.get("string_pulsing_f"); PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat(); - assertEquals("Pulsing40", format.getPostingsFormatForField(schemaField.getName()).getName()); + assertEquals("Pulsing41", format.getPostingsFormatForField(schemaField.getName()).getName()); schemaField = fields.get("string_simpletext_f"); assertEquals("SimpleText", format.getPostingsFormatForField(schemaField.getName()).getName()); schemaField = fields.get("string_standard_f"); - assertEquals("Lucene40", format.getPostingsFormatForField(schemaField.getName()).getName()); + assertEquals("Lucene41", format.getPostingsFormatForField(schemaField.getName()).getName()); schemaField = fields.get("string_f"); - assertEquals("Lucene40", format.getPostingsFormatForField(schemaField.getName()).getName()); + assertEquals("Lucene41", format.getPostingsFormatForField(schemaField.getName()).getName()); } public void testDynamicFields() { @@ -53,10 +53,10 @@ public class TestCodecSupport extends SolrTestCaseJ4 { assertEquals("SimpleText", format.getPostingsFormatForField("foo_simple").getName()); assertEquals("SimpleText", format.getPostingsFormatForField("bar_simple").getName()); - assertEquals("Pulsing40", format.getPostingsFormatForField("foo_pulsing").getName()); - assertEquals("Pulsing40", format.getPostingsFormatForField("bar_pulsing").getName()); - assertEquals("Lucene40", format.getPostingsFormatForField("foo_standard").getName()); - assertEquals("Lucene40", format.getPostingsFormatForField("bar_standard").getName()); + assertEquals("Pulsing41", format.getPostingsFormatForField("foo_pulsing").getName()); + assertEquals("Pulsing41", format.getPostingsFormatForField("bar_pulsing").getName()); + assertEquals("Lucene41", format.getPostingsFormatForField("foo_standard").getName()); + assertEquals("Lucene41", format.getPostingsFormatForField("bar_standard").getName()); } public void testUnknownField() { diff --git a/solr/site/xsl/index.xsl b/solr/site/xsl/index.xsl index f5babeca9da..c967aebb38f 100644 --- a/solr/site/xsl/index.xsl +++ b/solr/site/xsl/index.xsl @@ -56,6 +56,7 @@

      Reference Documents