From 2ea2adcf6b3633aa895b841eedc09ab540dc9b4d Mon Sep 17 00:00:00 2001
From: Robert Muir
Date: Sat, 13 Oct 2012 15:27:24 +0000
Subject: [PATCH] LUCENE-4446: wrap up cutover to blockpostingsformat
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4446@1397858 13f79535-47bb-0310-9956-ffa450edef68
---
.../codecs/memory/DirectPostingsFormat.java | 8 +--
.../pulsing/Pulsing41PostingsFormat.java | 6 +--
.../java/org/apache/lucene/codecs/Codec.java | 2 +-
.../org/apache/lucene/codecs/FilterCodec.java | 2 +-
.../lucene/codecs/lucene40/Lucene40Codec.java | 4 +-
.../lucene40/Lucene40PostingsBaseFormat.java | 5 +-
.../lucene40/Lucene40PostingsFormat.java | 30 ++++-------
.../lucene40/Lucene40PostingsReader.java | 22 +++++---
.../lucene40/Lucene40SkipListReader.java | 3 +-
.../lucene41/Lucene41PostingsFormat.java | 6 +++
.../lucene41/Lucene41PostingsWriter.java | 5 +-
.../lucene/codecs/lucene41/package.html | 2 +-
.../org/apache/lucene/codecs/package.html | 4 +-
.../lucene/index/LiveIndexWriterConfig.java | 2 +-
.../codecs/lucene40/TestReuseDocsEnum.java | 8 +--
.../lucene40/Lucene40PostingsWriter.java | 16 ++----
.../lucene40/Lucene40RWPostingsFormat.java | 50 +++++++++++++++++++
.../lucene40/Lucene40SkipListWriter.java | 3 +-
.../lucene/codecs/lucene40/package.html | 25 ++++++++++
.../mockrandom/MockRandomPostingsFormat.java | 10 ++--
.../org/apache/lucene/index/RandomCodec.java | 2 +-
.../util/TestRuleSetupAndRestoreClassEnv.java | 4 +-
.../org.apache.lucene.codecs.PostingsFormat | 2 +-
.../solr/collection1/conf/schema_codec.xml | 4 +-
.../apache/solr/core/TestCodecSupport.java | 14 +++---
25 files changed, 154 insertions(+), 85 deletions(-)
rename lucene/{core => test-framework}/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java (95%)
create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWPostingsFormat.java
rename lucene/{core => test-framework}/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java (99%)
create mode 100644 lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/package.html
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
index 97e22f4a03d..936d4ed5d61 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
@@ -27,7 +27,7 @@ import java.util.TreeMap;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; // javadocs
+import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
@@ -52,7 +52,7 @@ import org.apache.lucene.util.automaton.Transition;
// - build depth-N prefix hash?
// - or: longer dense skip lists than just next byte?
-/** Wraps {@link Lucene40PostingsFormat} format for on-disk
+/** Wraps {@link Lucene41PostingsFormat} format for on-disk
* storage, but then at read time loads and stores all
* terms & postings directly in RAM as byte[], int[].
*
@@ -100,12 +100,12 @@ public final class DirectPostingsFormat extends PostingsFormat {
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- return PostingsFormat.forName("Lucene40").fieldsConsumer(state);
+ return PostingsFormat.forName("Lucene41").fieldsConsumer(state);
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- FieldsProducer postings = PostingsFormat.forName("Lucene40").fieldsProducer(state);
+ FieldsProducer postings = PostingsFormat.forName("Lucene41").fieldsProducer(state);
if (state.context.context != IOContext.Context.MERGE) {
FieldsProducer loadedPostings;
try {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java
index 7fd7fb0504a..9946062e09b 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/pulsing/Pulsing41PostingsFormat.java
@@ -28,17 +28,17 @@ import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat; // javadocs
*/
public class Pulsing41PostingsFormat extends PulsingPostingsFormat {
- /** Inlines docFreq=1 terms, otherwise uses the normal "Lucene40" format. */
+ /** Inlines docFreq=1 terms, otherwise uses the normal "Lucene41" format. */
public Pulsing41PostingsFormat() {
this(1);
}
- /** Inlines docFreq=freqCutoff
terms, otherwise uses the normal "Lucene40" format. */
+ /** Inlines docFreq=freqCutoff
terms, otherwise uses the normal "Lucene41" format. */
public Pulsing41PostingsFormat(int freqCutoff) {
this(freqCutoff, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}
- /** Inlines docFreq=freqCutoff
terms, otherwise uses the normal "Lucene40" format. */
+ /** Inlines docFreq=freqCutoff
terms, otherwise uses the normal "Lucene41" format. */
public Pulsing41PostingsFormat(int freqCutoff, int minBlockSize, int maxBlockSize) {
super("Pulsing41", new Lucene41PostingsBaseFormat(), freqCutoff, minBlockSize, maxBlockSize);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
index 1892df6d300..7a473a3ed38 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
@@ -119,7 +119,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
loader.reload(classloader);
}
- private static Codec defaultCodec = Codec.forName("Lucene40");
+ private static Codec defaultCodec = Codec.forName("Lucene41");
/** expert: returns the default codec used for newly created
* {@link IndexWriterConfig}s.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
index ca8e439b6d8..12f17197d2b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/FilterCodec.java
@@ -21,7 +21,7 @@ package org.apache.lucene.codecs;
* A codec that forwards all its method calls to another codec.
*
* Extend this class when you need to reuse the functionality of an existing
- * codec. For example, if you want to build a codec that redefines Lucene40's
+ * codec. For example, if you want to build a codec that redefines Lucene41's
* {@link LiveDocsFormat}:
*
* public final class CustomCodec extends FilterCodec {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
index 076eeeaeadd..a0d66af61d0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
@@ -36,12 +36,12 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
* {@link FilterCodec}.
*
* @see org.apache.lucene.codecs.lucene40 package documentation for file format details.
- * @lucene.experimental
+ * @deprecated Only for reading old 4.0 segments
*/
// NOTE: if we make largish changes in a minor release, easier to just make Lucene42Codec or whatever
// if they are backwards compatible or smallish we can probably do the backwards in the postingsreader
// (it writes a minor version, etc).
-// nocommit: make readonly and add impersonator
+@Deprecated
public final class Lucene40Codec extends Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat();
private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
index df6611922e2..eaf452d6252 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
@@ -29,9 +29,10 @@ import org.apache.lucene.index.SegmentWriteState;
* Provides a {@link PostingsReaderBase} and {@link
* PostingsWriterBase}.
*
- * @lucene.experimental */
+ * @deprecated Only for reading old 4.0 segments */
// TODO: should these also be named / looked up via SPI?
+@Deprecated
public final class Lucene40PostingsBaseFormat extends PostingsBaseFormat {
/** Sole constructor. */
@@ -46,6 +47,6 @@ public final class Lucene40PostingsBaseFormat extends PostingsBaseFormat {
@Override
public PostingsWriterBase postingsWriterBase(SegmentWriteState state) throws IOException {
- return new Lucene40PostingsWriter(state);
+ throw new UnsupportedOperationException("this codec can only be used for reading");
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
index 16d9c47ed97..1f9c28efdb4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
@@ -211,15 +211,18 @@ import org.apache.lucene.util.fst.FST; // javadocs
* previous occurrence and an OffsetLength follows. Offset data is only written for
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.
*
- * @lucene.experimental */
+ * @deprecated Only for reading old 4.0 segments */
// TODO: this class could be created by wrapping
// BlockTreeTermsDict around Lucene40PostingsBaseFormat; ie
// we should not duplicate the code from that class here:
-public final class Lucene40PostingsFormat extends PostingsFormat {
+@Deprecated
+public class Lucene40PostingsFormat extends PostingsFormat {
- private final int minBlockSize;
- private final int maxBlockSize;
+ /** minimum items (terms or sub-blocks) per block for BlockTree */
+ protected final int minBlockSize;
+ /** maximum items (terms or sub-blocks) per block for BlockTree */
+ protected final int maxBlockSize;
/** Creates {@code Lucene40PostingsFormat} with default
* settings. */
@@ -231,7 +234,7 @@ public final class Lucene40PostingsFormat extends PostingsFormat {
* values for {@code minBlockSize} and {@code
* maxBlockSize} passed to block terms dictionary.
* @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
- public Lucene40PostingsFormat(int minBlockSize, int maxBlockSize) {
+ private Lucene40PostingsFormat(int minBlockSize, int maxBlockSize) {
super("Lucene40");
this.minBlockSize = minBlockSize;
assert minBlockSize > 1;
@@ -240,22 +243,7 @@ public final class Lucene40PostingsFormat extends PostingsFormat {
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase docs = new Lucene40PostingsWriter(state);
-
- // TODO: should we make the terms index more easily
- // pluggable? Ie so that this codec would record which
- // index impl was used, and switch on loading?
- // Or... you must make a new Codec for this?
- boolean success = false;
- try {
- FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize);
- success = true;
- return ret;
- } finally {
- if (!success) {
- docs.close();
- }
- }
+ throw new UnsupportedOperationException("this codec can only be used for reading");
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
index 64d2e49b1ff..a3729e2f1da 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
@@ -45,10 +45,21 @@ import org.apache.lucene.util.IOUtils;
* postings format.
*
* @see Lucene40PostingsFormat
- * @lucene.experimental */
-
+ * @deprecated Only for reading old 4.0 segments */
+@Deprecated
public class Lucene40PostingsReader extends PostingsReaderBase {
+ final static String TERMS_CODEC = "Lucene40PostingsWriterTerms";
+ final static String FRQ_CODEC = "Lucene40PostingsWriterFrq";
+ final static String PRX_CODEC = "Lucene40PostingsWriterPrx";
+
+ //private static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+ // Increment version to change it:
+ final static int VERSION_START = 0;
+ final static int VERSION_LONG_SKIP = 1;
+ final static int VERSION_CURRENT = VERSION_LONG_SKIP;
+
private final IndexInput freqIn;
private final IndexInput proxIn;
// public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
@@ -67,7 +78,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
try {
freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION),
ioContext);
- CodecUtil.checkHeader(freqIn, Lucene40PostingsWriter.FRQ_CODEC, Lucene40PostingsWriter.VERSION_START,Lucene40PostingsWriter.VERSION_CURRENT);
+ CodecUtil.checkHeader(freqIn, FRQ_CODEC, VERSION_START, VERSION_CURRENT);
// TODO: hasProx should (somehow!) become codec private,
// but it's tricky because 1) FIS.hasProx is global (it
// could be all fields that have prox are written by a
@@ -79,7 +90,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
if (fieldInfos.hasProx()) {
proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION),
ioContext);
- CodecUtil.checkHeader(proxIn, Lucene40PostingsWriter.PRX_CODEC, Lucene40PostingsWriter.VERSION_START,Lucene40PostingsWriter.VERSION_CURRENT);
+ CodecUtil.checkHeader(proxIn, PRX_CODEC, VERSION_START, VERSION_CURRENT);
} else {
proxIn = null;
}
@@ -97,8 +108,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
public void init(IndexInput termsIn) throws IOException {
// Make sure we are talking to the matching past writer
- CodecUtil.checkHeader(termsIn, Lucene40PostingsWriter.TERMS_CODEC,
- Lucene40PostingsWriter.VERSION_START, Lucene40PostingsWriter.VERSION_CURRENT);
+ CodecUtil.checkHeader(termsIn, TERMS_CODEC, VERSION_START, VERSION_CURRENT);
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java
index 4cef37a5977..1580a390ba9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java
@@ -28,8 +28,9 @@ import org.apache.lucene.store.IndexInput;
* that stores positions and payloads.
*
* @see Lucene40PostingsFormat
- * @lucene.experimental
+ * @deprecated Only for reading old 4.0 segments
*/
+@Deprecated
public class Lucene40SkipListReader extends MultiLevelSkipListReader {
private boolean currentFieldStoresPayloads;
private boolean currentFieldStoresOffsets;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
index d1c21ed2846..3cbc9653bed 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
@@ -380,10 +380,16 @@ public final class Lucene41PostingsFormat extends PostingsFormat {
// NOTE: must be multiple of 64 because of PackedInts long-aligned encoding/decoding
public final static int BLOCK_SIZE = 128;
+ /** Creates {@code Lucene41PostingsFormat} with default
+ * settings. */
public Lucene41PostingsFormat() {
this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}
+ /** Creates {@code Lucene41PostingsFormat} with custom
+ * values for {@code minBlockSize} and {@code
+ * maxBlockSize} passed to block terms dictionary.
+ * @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
public Lucene41PostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
super("Lucene41");
this.minTermBlockSize = minTermBlockSize;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
index 51e2b02422f..4298ea6c13b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsWriter.java
@@ -63,10 +63,9 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
final static String POS_CODEC = "Lucene41PostingsWriterPos";
final static String PAY_CODEC = "Lucene41PostingsWriterPay";
- // Increment version to change it: nocommit: we can start at 0
+ // Increment version to change it
final static int VERSION_START = 0;
- final static int VERSION_NO_OFFSETS_IN_SKIPDATA = 1; // LUCENE-4443
- final static int VERSION_CURRENT = VERSION_NO_OFFSETS_IN_SKIPDATA;
+ final static int VERSION_CURRENT = VERSION_START;
final IndexOutput docOut;
final IndexOutput posOut;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html
index aff3d7a572f..14782803a1c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html
@@ -153,7 +153,7 @@ its title, url, or an identifier to access a database. The set of stored fields
returned for each hit when searching. This is keyed by document number.
-{@link org.apache.lucene.codecs.lucene41Lucene41PostingsFormat Term dictionary}.
+{@link org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat Term dictionary}.
A dictionary containing all of the terms used in all of the
indexed fields of all of the documents. The dictionary also contains the number
of documents which contain the term, and pointers to the term's frequency and
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/package.html b/lucene/core/src/java/org/apache/lucene/codecs/package.html
index e6de64d057b..91a65458ac1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/package.html
+++ b/lucene/core/src/java/org/apache/lucene/codecs/package.html
@@ -61,8 +61,8 @@ name of your codec.
If you just want to customise the {@link org.apache.lucene.codecs.PostingsFormat}, or use different postings
formats for different fields, then you can register your custom postings format in the same way (in
META-INF/services/org.apache.lucene.codecs.PostingsFormat), and then extend the default
- {@link org.apache.lucene.codecs.lucene40.Lucene40Codec} and override
- {@link org.apache.lucene.codecs.lucene40.Lucene40Codec#getPostingsFormatForField(String)} to return your custom
+ {@link org.apache.lucene.codecs.lucene41.Lucene41Codec} and override
+ {@link org.apache.lucene.codecs.lucene41.Lucene41Codec#getPostingsFormatForField(String)} to return your custom
postings format.